AuroraRuntime/Source/HWInfo/AuCpuInfo.NT.cpp
Reece c42ebc9853 [*] Improved cache reporting under NT under various conditions
new hw: return upper bound of range at level index.
old sw: missing impl.
2022-12-14 05:25:05 +00:00

270 lines
8.5 KiB
C++

/***
Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuCpuId.Nt.cpp
Date: 2022-1-25
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "AuHWInfo.hpp"
#include "AuCpuInfo.hpp"
#include "AuCpuInfo.NT.hpp"
#if defined(AURORA_IS_MODERNNT_DERIVED)
#include <VersionHelpers.h>
#endif
BOOL(__stdcall *GetSystemCpuSetInformation_f)(
PSYSTEM_CPU_SET_INFORMATION Information,
ULONG BufferLength,
PULONG ReturnedLength,
HANDLE Process,
ULONG Flags
);
namespace Aurora::HWInfo
{
static bool TrySetNtCpuSetInfoSlowExtended()
{
SYSTEM_CPU_SET_INFORMATION cpuSetInfo[128];
SYSTEM_LOGICAL_PROCESSOR_INFORMATION sysinfo[128];
DWORD length = {};
GetSystemCpuSetInformation_f = AuReinterpretCast<decltype(GetSystemCpuSetInformation_f)>(GetProcAddress(GetModuleHandleW(L"Kernel32.dll"), "GetSystemCpuSetInformation"));
if (!GetSystemCpuSetInformation_f)
{
return false;
}
if (!GetSystemCpuSetInformation_f(cpuSetInfo, sizeof(cpuSetInfo), &length, 0, 0))
{
return false;
}
struct CpuInfo
{
AuList<AuUInt8> low;
AuList<CpuBitId> server;
CpuBitId mask;
};
AuBST<AuUInt8, CpuInfo> cpuThreads;
AuUInt8 cpuCount;
cpuCount = AuUInt8(length / sizeof(decltype(*cpuSetInfo)));
for (int i = 0; i < cpuCount; i++)
{
auto &idx = cpuThreads[cpuSetInfo[i].CpuSet.CoreIndex];
// Win7 KAFFINITY = u64 affinity masks
// Windows 10 + seems to be ((this->group + 1ul) * 0x100ul) + index
// Windows internals says...
// ULONG sets[] = { 0x100, 0x101, 0x102, 0x103 };
// ::SetProcessDefaultCpuSets/SetThreadSelectedCpuSets(::GetCurrentProcess(), sets, _countof(sets));
// (useless)
// People generally isolate group and keep logical processors in a different set, kinda worthless for bitwise math
SysAssert(cpuSetInfo[i].CpuSet.LogicalProcessorIndex < 64);
#if defined(_AU_MASSIVE_CPUID)
SysAssert(cpuSetInfo[i].CpuSet.Group < 4);
#else
SysAssert(cpuSetInfo[i].CpuSet.Group < 2);
#endif
AuUInt8 id = AuUInt8(cpuSetInfo[i].CpuSet.LogicalProcessorIndex /*no greater than 64*/ + (cpuSetInfo[i].CpuSet.Group * 64));
auto cpuId = CpuBitId(id);
auto sets = cpuId.ToCpuSets();
SysAssert(sets.size() == 1);
SysAssert(sets[0] == cpuSetInfo[i].CpuSet.Id);
idx.server.push_back(cpuId);
idx.low.push_back(id);
idx.mask.Add(cpuId);
gCpuInfo.maskAllCores.Add(cpuId);
}
for (const auto &[cpuId, coreIds] : cpuThreads)
{
AuUInt64 shortMask {};
for (const auto &id : coreIds.server)
{
// TODO (scar):
if (false)
{
gCpuInfo.maskECores.Add(id);
}
else
{
gCpuInfo.maskPCores.Add(id);
gCpuInfo.pCoreTopology.push_back(id);
}
}
for (const auto &id : coreIds.low)
{
shortMask |= AuUInt64(1) << AuUInt64(id);
}
gCpuInfo.coreTopology.push_back(coreIds.mask);
gCpuInfo.threadTopology.push_back(shortMask);
}
gCpuInfo.uSocket = 1;
gCpuInfo.uThreads = cpuCount;
gCpuInfo.uCores = AuUInt8(cpuThreads.size());
if (!GetLogicalProcessorInformation(sysinfo, &length))
{
return true;
}
gCpuInfo.uSocket = 0;
length /= sizeof(*sysinfo);
for (auto i = 0u; i < length; i++)
{
if (sysinfo[i].Relationship == RelationProcessorPackage)
{
gCpuInfo.uSocket++;
}
if (sysinfo[i].Relationship == RelationCache)
{
switch (sysinfo[i].Cache.Level)
{
case 1:
gCpuInfo.dwCacheLine = AuMax<AuUInt32>(gCpuInfo.dwCacheLine, sysinfo[i].Cache.LineSize);
gCpuInfo.dwCacheL1 = AuMax<AuUInt32>(gCpuInfo.dwCacheL1, sysinfo[i].Cache.Size);
break;
case 2:
gCpuInfo.dwCacheL2 = AuMax<AuUInt32>(gCpuInfo.dwCacheL2, sysinfo[i].Cache.Size);
break;
case 3:
gCpuInfo.dwCacheL3 = AuMax<AuUInt32>(gCpuInfo.dwCacheL3, sysinfo[i].Cache.Size);
break;
}
}
}
return true;
}
void SetCpuTopologyNT()
{
SYSTEM_LOGICAL_PROCESSOR_INFORMATION sysinfo[128];
DWORD length = AuArraySize(sysinfo) * sizeof(*sysinfo);
if (/*SWInfo::IsWindows10OrGreater() || IsWindowsServer() */ true)
{
if (TrySetNtCpuSetInfoSlowExtended())
{
return;
}
}
if (!GetLogicalProcessorInformation(sysinfo, &length))
{
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
gCpuInfo.uSocket = 1;
gCpuInfo.uCores = 1;
gCpuInfo.uThreads = sysinfo.dwNumberOfProcessors;
return;
}
length /= sizeof(*sysinfo);
gCpuInfo.uSocket = 0;
gCpuInfo.uCores = 0;
gCpuInfo.uThreads = 0;
bool sparse = false;
bool hasHTCores = false;
for (auto i = 0u; i < length; i++)
{
if (sysinfo[i].Relationship == RelationProcessorCore)
{
auto mask = sysinfo[i].ProcessorMask;
gCpuInfo.uCores++;
gCpuInfo.threadTopology.push_back(mask);
CpuBitId serverId;
serverId.lower = mask;
gCpuInfo.coreTopology.push_back(serverId);
int counter {};
unsigned long offset {}, tmp;
#if 0
while (offset != (sizeof(offset) * 8))
{
// Count the index to a 1
if (BitScanForward(&tmp, mask >> offset) == 0) break; // mask was zero, end of scan
offset += tmp;
// Count the 1's by inverting the bitmap and counting to 1
BitScanForward(&tmp, ~(mask >> offset));
offset += tmp;
if (counter++) sparse = true;
// Increment threads by the bits set in
gCpuInfo.uThreads += tmp;
}
#else
tmp = AuPopCnt(mask);
gCpuInfo.uThreads += tmp;
#endif
hasHTCores |= (tmp == 2);
if (hasHTCores && (tmp == 1))
{
AuUInt8 idx {};
while (serverId.CpuBitScanForward(idx, idx))
{
gCpuInfo.maskECores.Add(idx);
idx++;
}
}
else
{
AuUInt8 idx {};
while (serverId.CpuBitScanForward(idx, idx))
{
gCpuInfo.maskPCores.Add(idx);
idx++;
}
gCpuInfo.pCoreTopology.push_back(mask);
}
}
else if (sysinfo[i].Relationship == RelationProcessorPackage)
{
gCpuInfo.uSocket++;
}
else if (sysinfo[i].Relationship == RelationCache)
{
switch (sysinfo[i].Cache.Level)
{
case 1:
gCpuInfo.dwCacheLine = AuMax<AuUInt32>(gCpuInfo.dwCacheLine, sysinfo[i].Cache.LineSize);
gCpuInfo.dwCacheL1 = AuMax<AuUInt32>(gCpuInfo.dwCacheL1, sysinfo[i].Cache.Size);
break;
case 2:
gCpuInfo.dwCacheL2 = AuMax<AuUInt32>(gCpuInfo.dwCacheL2, sysinfo[i].Cache.Size);
break;
case 3:
gCpuInfo.dwCacheL3 = AuMax<AuUInt32>(gCpuInfo.dwCacheL3, sysinfo[i].Cache.Size);
break;
}
}
}
gCpuInfo.bMaskMTContig = !sparse;
gCpuInfo.bMaskMTHalf = sparse;
}
}