[*] Move GetSystemCpuSetInformation to AuProcAddresses.NT.cpp

[*] Fixed e-core awareness under modern nt targets
This commit is contained in:
Reece Wilson 2023-07-09 10:02:21 +01:00
parent 83fad7c538
commit d192e2529f
3 changed files with 161 additions and 75 deletions

View File

@ -57,6 +57,9 @@ namespace Aurora
ADD_GET_PROC_BI(Kernel32, KernelBase, MapViewOfFile3) ADD_GET_PROC_BI(Kernel32, KernelBase, MapViewOfFile3)
ADD_GET_PROC_BI(Kernel32, KernelBase, UnmapViewOfFile2) ADD_GET_PROC_BI(Kernel32, KernelBase, UnmapViewOfFile2)
ADD_GET_PROC(Kernel32, GetSystemCpuSetInformation)
ADD_GET_PROC(Kernel32, GetLogicalProcessorInformation)
ADD_GET_PROC(Sync, WaitOnAddress) ADD_GET_PROC(Sync, WaitOnAddress)
ADD_GET_PROC(Sync, WakeByAddressSingle) ADD_GET_PROC(Sync, WakeByAddressSingle)
ADD_GET_PROC(Sync, WakeByAddressAll) ADD_GET_PROC(Sync, WakeByAddressAll)

View File

@ -99,11 +99,24 @@ namespace Aurora
const LARGE_INTEGER * timeout); const LARGE_INTEGER * timeout);
#if defined(AURORA_PLATFORM_WIN32) #if defined(AURORA_PLATFORM_WIN32)
inline NTSTATUS(_stdcall *pRtlGetVersion)( inline NTSTATUS(__stdcall *pRtlGetVersion)(
PRTL_OSVERSIONINFOW lpVersionInformation PRTL_OSVERSIONINFOW lpVersionInformation
); );
#endif #endif
inline BOOL(__stdcall *pGetSystemCpuSetInformation)(
PSYSTEM_CPU_SET_INFORMATION Information,
ULONG BufferLength,
PULONG ReturnedLength,
HANDLE Process,
ULONG Flags
);
inline BOOL(__stdcall * pGetLogicalProcessorInformation)(
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer,
PDWORD ReturnedLength
);
inline bool gUseNativeWaitMutex {}; inline bool gUseNativeWaitMutex {};
inline bool gUseNativeWaitCondvar {}; inline bool gUseNativeWaitCondvar {};
inline bool gUseNativeWaitSemapahore {}; inline bool gUseNativeWaitSemapahore {};

View File

@ -1,7 +1,7 @@
/*** /***
Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved. Copyright (C) 2022-2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuCpuId.Nt.cpp File: AuCpuId.NT.cpp
Date: 2022-1-25 Date: 2022-1-25
Author: Reece Author: Reece
***/ ***/
@ -14,30 +14,20 @@
#include <VersionHelpers.h> #include <VersionHelpers.h>
#endif #endif
BOOL(__stdcall *GetSystemCpuSetInformation_f)(
PSYSTEM_CPU_SET_INFORMATION Information,
ULONG BufferLength,
PULONG ReturnedLength,
HANDLE Process,
ULONG Flags
);
namespace Aurora::HWInfo namespace Aurora::HWInfo
{ {
static bool TrySetNtCpuSetInfoSlowExtended() static bool SetWindows10CpuSetInfoSlow()
{ {
SYSTEM_CPU_SET_INFORMATION cpuSetInfo[128]; SYSTEM_CPU_SET_INFORMATION cpuSetInfo[128];
SYSTEM_LOGICAL_PROCESSOR_INFORMATION sysinfo[128]; SYSTEM_LOGICAL_PROCESSOR_INFORMATION sysinfo[128];
DWORD length = {}; DWORD dwLength {};
GetSystemCpuSetInformation_f = AuReinterpretCast<decltype(GetSystemCpuSetInformation_f)>(GetProcAddress(GetModuleHandleW(L"Kernel32.dll"), "GetSystemCpuSetInformation")); if (!pGetSystemCpuSetInformation)
if (!GetSystemCpuSetInformation_f)
{ {
return false; return false;
} }
if (!GetSystemCpuSetInformation_f(cpuSetInfo, sizeof(cpuSetInfo), &length, 0, 0)) if (!pGetSystemCpuSetInformation(cpuSetInfo, sizeof(cpuSetInfo), &dwLength, 0, 0))
{ {
return false; return false;
} }
@ -45,15 +35,47 @@ namespace Aurora::HWInfo
struct CpuInfo struct CpuInfo
{ {
AuList<AuUInt8> low; AuList<AuUInt8> low;
AuList<CpuBitId> server; AuList<AuPair<AuUInt8, CpuBitId>> server;
CpuBitId mask; CpuBitId mask;
}; };
AuBST<AuUInt8, CpuInfo> cpuThreads; AuBST<AuUInt8, CpuInfo> cpuThreads;
AuUInt8 cpuCount;
cpuCount = AuUInt8(length / sizeof(decltype(*cpuSetInfo))); AuUInt8 uThreadCount = AuUInt8(dwLength / sizeof(decltype(*cpuSetInfo)));
for (int i = 0; i < cpuCount; i++) AuUInt8 uBestClass {};
bool bHasBestClass {};
if (uThreadCount)
{
AuUInt8 countTable[255] {};
for (AU_ITERATE_N(i, uThreadCount))
{
countTable[cpuSetInfo[i].CpuSet.EfficiencyClass]++;
}
AuUInt8 uBestClassCounter {};
for (AU_ITERATE_N(i, AuArraySize(countTable)))
{
auto uCurrentCount = countTable[i];
if (uCurrentCount < uBestClassCounter)
{
continue;
}
if (!i)
{
continue;
}
uBestClassCounter = uCurrentCount;
bHasBestClass = true;
uBestClass = i;
}
}
for (AU_ITERATE_N(i, uThreadCount))
{ {
auto &idx = cpuThreads[cpuSetInfo[i].CpuSet.CoreIndex]; auto &idx = cpuThreads[cpuSetInfo[i].CpuSet.CoreIndex];
@ -80,7 +102,7 @@ namespace Aurora::HWInfo
SysAssert(sets.size() == 1); SysAssert(sets.size() == 1);
SysAssert(sets[0] == cpuSetInfo[i].CpuSet.Id); SysAssert(sets[0] == cpuSetInfo[i].CpuSet.Id);
idx.server.push_back(cpuId); idx.server.push_back(AuMakePair(cpuSetInfo[i].CpuSet.EfficiencyClass, cpuId));
idx.low.push_back(id); idx.low.push_back(id);
idx.mask.Add(cpuId); idx.mask.Add(cpuId);
gCpuInfo.maskAllCores.Add(cpuId); gCpuInfo.maskAllCores.Add(cpuId);
@ -89,10 +111,10 @@ namespace Aurora::HWInfo
for (const auto &[cpuId, coreIds] : cpuThreads) for (const auto &[cpuId, coreIds] : cpuThreads)
{ {
AuUInt64 shortMask {}; AuUInt64 shortMask {};
for (const auto &id : coreIds.server) for (const auto &[eClass, id] : coreIds.server)
{ {
// TODO (scar): if (bHasBestClass &&
if (false) eClass != uBestClass)
{ {
gCpuInfo.maskECores.Add(id); gCpuInfo.maskECores.Add(id);
} }
@ -113,18 +135,23 @@ namespace Aurora::HWInfo
} }
gCpuInfo.uSocket = 1; gCpuInfo.uSocket = 1;
gCpuInfo.uThreads = cpuCount; gCpuInfo.uThreads = uThreadCount;
gCpuInfo.uCores = AuUInt8(cpuThreads.size()); gCpuInfo.uCores = AuUInt8(cpuThreads.size());
if (!GetLogicalProcessorInformation(sysinfo, &length)) if (!pGetLogicalProcessorInformation)
{
return true;
}
if (!pGetLogicalProcessorInformation(sysinfo, &dwLength))
{ {
return true; return true;
} }
gCpuInfo.uSocket = 0; gCpuInfo.uSocket = 0;
length /= sizeof(*sysinfo); dwLength /= sizeof(*sysinfo);
for (auto i = 0u; i < length; i++) for (auto i = 0u; i < dwLength; i++)
{ {
if (sysinfo[i].Relationship == RelationProcessorPackage) if (sysinfo[i].Relationship == RelationProcessorPackage)
{ {
@ -152,30 +179,22 @@ namespace Aurora::HWInfo
return true; return true;
} }
void SetCpuTopologyNT() static bool SetWindowsXPSp3ExtendedInformation()
{ {
SYSTEM_LOGICAL_PROCESSOR_INFORMATION sysinfo[128]; SYSTEM_LOGICAL_PROCESSOR_INFORMATION sysinfo[128];
DWORD length = AuArraySize(sysinfo) * sizeof(*sysinfo); DWORD dwLength = AuArraySize(sysinfo) * sizeof(*sysinfo);
if (/*SWInfo::IsWindows10OrGreater() || IsWindowsServer() */ true) if (!pGetLogicalProcessorInformation)
{ {
if (TrySetNtCpuSetInfoSlowExtended()) return false;
{
return;
}
} }
if (!GetLogicalProcessorInformation(sysinfo, &length)) if (!pGetLogicalProcessorInformation(sysinfo, &dwLength))
{ {
SYSTEM_INFO sysinfo; return false;
GetSystemInfo(&sysinfo);
gCpuInfo.uSocket = 1;
gCpuInfo.uCores = 1;
gCpuInfo.uThreads = sysinfo.dwNumberOfProcessors;
return;
} }
length /= sizeof(*sysinfo); dwLength /= sizeof(*sysinfo);
gCpuInfo.uSocket = 0; gCpuInfo.uSocket = 0;
gCpuInfo.uCores = 0; gCpuInfo.uCores = 0;
@ -183,7 +202,7 @@ namespace Aurora::HWInfo
bool sparse = false; bool sparse = false;
bool hasHTCores = false; bool hasHTCores = false;
for (auto i = 0u; i < length; i++) for (auto i = 0u; i < dwLength; i++)
{ {
if (sysinfo[i].Relationship == RelationProcessorCore) if (sysinfo[i].Relationship == RelationProcessorCore)
{ {
@ -196,32 +215,12 @@ namespace Aurora::HWInfo
serverId.lower = mask; serverId.lower = mask;
gCpuInfo.coreTopology.push_back(serverId); gCpuInfo.coreTopology.push_back(serverId);
int counter {}; auto uThreadsInCore = AuPopCnt(mask);
unsigned long offset {}, tmp; gCpuInfo.uThreads += uThreadsInCore;
#if 0
while (offset != (sizeof(offset) * 8))
{
// Count the index to a 1
if (BitScanForward(&tmp, mask >> offset) == 0) break; // mask was zero, end of scan
offset += tmp;
// Count the 1's by inverting the bitmap and counting to 1 hasHTCores |= (uThreadsInCore == 2);
BitScanForward(&tmp, ~(mask >> offset));
offset += tmp;
if (counter++) sparse = true; if (hasHTCores && (uThreadsInCore == 1))
// Increment threads by the bits set in
gCpuInfo.uThreads += tmp;
}
#else
tmp = AuPopCnt(mask);
gCpuInfo.uThreads += tmp;
#endif
hasHTCores |= (tmp == 2);
if (hasHTCores && (tmp == 1))
{ {
AuUInt8 idx {}; AuUInt8 idx {};
while (serverId.CpuBitScanForward(idx, idx)) while (serverId.CpuBitScanForward(idx, idx))
@ -266,5 +265,76 @@ namespace Aurora::HWInfo
gCpuInfo.bMaskMTContig = !sparse; gCpuInfo.bMaskMTContig = !sparse;
gCpuInfo.bMaskMTHalf = sparse; gCpuInfo.bMaskMTHalf = sparse;
return true;
}
static void SetBasicWindowsXPInformation()
{
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
gCpuInfo.uSocket = 1;
gCpuInfo.uCores = 1;
gCpuInfo.uThreads = sysinfo.dwNumberOfProcessors;
if (sysinfo.dwNumberOfProcessors & 1)
{
for (AU_ITERATE_N(i, gCpuInfo.uThreads))
{
auto mask = 1 << i;
gCpuInfo.maskPCores.SetBit(i);
gCpuInfo.threadTopology.push_back(mask);
CpuBitId coreId;
coreId.lower = mask;
gCpuInfo.coreTopology.push_back(coreId);
}
}
else
{
for (AU_ITERATE_N(i, gCpuInfo.uThreads))
{
if (i & 1)
{
continue;
}
gCpuInfo.maskPCores.SetBit(i);
gCpuInfo.maskPCores.SetBit(i + 1);
auto maskA = 1u << i;
auto maskB = 1u << (i + 1);
auto maskC = maskA | maskB;
gCpuInfo.threadTopology.push_back(maskA);
gCpuInfo.threadTopology.push_back(maskB);
CpuBitId coreId;
coreId.lower = maskC;
gCpuInfo.coreTopology.push_back(coreId);
}
gCpuInfo.uCores = gCpuInfo.uThreads / 2;
gCpuInfo.uThreads = 2;
gCpuInfo.bMaskMTHalf = true;
}
}
void SetCpuTopologyNT()
{
if (SetWindows10CpuSetInfoSlow())
{
return;
}
if (SetWindowsXPSp3ExtendedInformation())
{
return;
}
SetBasicWindowsXPInformation();
} }
} }