[*] Improve CpuId awareness through affinity

[*] Clean up OSThread
[*] Tweak HWInfo/CpuInfo
This commit is contained in:
Reece Wilson 2022-03-16 17:12:08 +00:00
parent ab76f77871
commit 672915cd55
10 changed files with 234 additions and 88 deletions

View File

@ -0,0 +1,44 @@
/***
Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: CpuBitId.NT.hpp
Date: 2022-3-16
Author: Reece
***/
#pragma once
namespace Aurora::HWInfo
{
AuList<unsigned long> CpuBitId::ToCpuSets() const
{
AuList<unsigned long> ret;
AuUInt8 index {};
while (CpuBitScanForward(index, index))
{
unsigned long logicalProcessorIndex = index % 64;
unsigned long groupIndex = index / 64;
ret.push_back(((groupIndex + 1ull) * 0x100ull) + logicalProcessorIndex);
index++;
}
return ret;
}
void CpuBitId::ToMsWin7GroupAffinity(void *ptr) const
{
auto &logicalProcessorQWord = *AuReinterpretCast<AuUInt64 *>(ptr);
auto &groupIndexU16 = *AuReinterpretCast<AuUInt16 *>(AuReinterpretCast<AuUInt8 *>(ptr) + 8);
AuUInt8 index {};
while (CpuBitScanForward(index, index))
{
AuUInt64 logicalProcessorIndex = index % 64;
AuUInt64 groupIndex = index / 64;
logicalProcessorQWord |= 1ull << logicalProcessorIndex;
groupIndexU16 = AuUInt16(groupIndex);
index++;
}
}
}

View File

@ -44,7 +44,16 @@ namespace Aurora::HWInfo
inline CpuBitId &operator=(const CpuBitId &id);
inline operator bool() const;
#if defined(AURORA_IS_MODERNNT_DERIVED)
inline AuList<unsigned long> ToCpuSets() const;
inline void ToMsWin7GroupAffinity(void *ptr) const;
#endif
};
}
#include "CpuBitId.inl"
#if defined(AURORA_IS_MODERNNT_DERIVED)
#include "CpuBitId.NT.inl"
#endif

View File

@ -54,19 +54,32 @@ namespace Aurora::HWInfo
}
else if (offset >= 128)
{
if (!AuBitScanForward(index, AuUInt64(upper2) >> AuUInt64(offset - 128))) return false;
if (!AuBitScanForward(index, AuUInt64(upper2) >> AuUInt64(offset - 128)))
{
return CpuBitScanForward(index, 128);
}
index += 128;
}
else
#endif
if (offset >= 64)
#else
if (offset >= 128)
{
if (!AuBitScanForward(index, AuUInt64(upper) >> AuUInt64(offset - 64))) return false;
return false;
}
#endif
else if (offset >= 64)
{
if (!AuBitScanForward(index, AuUInt64(upper) >> AuUInt64(offset - 64)))
{
return CpuBitScanForward(index, 128);
}
index += 64;
}
else
{
if (!AuBitScanForward(index, AuUInt64(lower) >> AuUInt64(0))) return false;
if (!AuBitScanForward(index, AuUInt64(lower) >> AuUInt64(offset)))
{
return CpuBitScanForward(index, 64);
}
}
return true;

View File

@ -23,6 +23,7 @@ namespace Aurora::HWInfo
AuList<AuInt64> threadTopology;
AuList<CpuBitId> serverTopology;
CpuBitId maskECores;
CpuBitId entireCpu;
bool bMaskMTContig;
bool bMaskMTHalf;

View File

@ -12,6 +12,11 @@ namespace Aurora::Loop
struct ILoopSource;
}
namespace Aurora::HWInfo
{
struct CpuBitId;
}
namespace Aurora::Threading::Threads
{
class TLSView;
@ -42,11 +47,11 @@ namespace Aurora::Threading::Threads
virtual void SendExitSignal() = 0;
virtual void SetPrio(EThreadPrio prio) = 0;
virtual void SetAffinity(AuUInt64 mask) = 0;
virtual void SetAffinity(const HWInfo::CpuBitId &mask) = 0;
virtual void SetName(const AuString &name) = 0;
virtual EThreadPrio GetPrio() = 0;
virtual AuUInt64 GetMask() = 0;
virtual HWInfo::CpuBitId GetMask() = 0;
virtual AuString GetName() = 0;
/// Registers a thread feature _not_ calling on init

View File

@ -48,12 +48,35 @@ namespace Aurora::HWInfo
for (int i = 0; i < cpuCount; i++)
{
auto &idx = cpuThreads[cpuSetInfo[i].CpuSet.CoreIndex + cpuSetInfo[i].CpuSet.Group];
AuUInt8 id = AuUInt8(cpuSetInfo[i].CpuSet.LogicalProcessorIndex + cpuSetInfo[i].CpuSet.Group);
auto &idx = cpuThreads[cpuSetInfo[i].CpuSet.CoreIndex];
// Win7 KAFFINITY = u64 affinity masks
// Windows 10 + seems to be ((this->group + 1ul) * 0x100ul) + index
// Windows internals says...
// ULONG sets[] = { 0x100, 0x101, 0x102, 0x103 };
// ::SetProcessDefaultCpuSets/SetThreadSelectedCpuSets(::GetCurrentProcess(), sets, _countof(sets));
// (useless)
// People generally isolate group and keep logical processors in a different set, kinda worthless for bitwise math
SysAssert(cpuSetInfo[i].CpuSet.LogicalProcessorIndex < 64);
#if defined(_AU_MASSIVE_CPUID)
SysAssert(cpuSetInfo[i].CpuSet.Group < 4);
#else
SysAssert(cpuSetInfo[i].CpuSet.Group < 2);
#endif
AuUInt8 id = AuUInt8(cpuSetInfo[i].CpuSet.LogicalProcessorIndex /*no greater than 64*/ + (cpuSetInfo[i].CpuSet.Group * 64));
auto cpuId = CpuBitId(id);
auto sets = cpuId.ToCpuSets();
SysAssert(sets.size() == 1);
SysAssert(sets[0] == cpuSetInfo[i].CpuSet.Id);
idx.server.push_back(cpuId);
idx.low.push_back(id);
idx.mask.Add(cpuId);
gCpuInfo.entireCpu.Add(cpuId);
}
for (const auto &[cpuId, coreIds] : cpuThreads)

View File

@ -65,6 +65,7 @@ namespace Aurora::HWInfo
mask.lower = shortMask | shortMask2;
gCpuInfo.serverTopology.push_back(mask);
gCpuInfo.threadTopology.push_back(shortMask);
gCpuInfo.entireCpu.Add(mask);
}
}
else
@ -76,6 +77,7 @@ namespace Aurora::HWInfo
mask.lower = shortMask;
gCpuInfo.serverTopology.push_back(mask);
gCpuInfo.threadTopology.push_back(shortMask);
gCpuInfo.entireCpu.Add(mask);
}
}
}

View File

@ -25,7 +25,7 @@ namespace Aurora::SWInfo
static bool IsWindowsEnterpriseBranch()
{
OSVERSIONINFOEXW osvi = {sizeof(osvi), 0, 0, 0, 0, {0}, 0, 0, VER_SUITE_ENTERPRISE, 0};
OSVERSIONINFOEXW osvi = {sizeof(osvi), 0, 0, 0, 0, {0}, 0, 0, VER_SUITE_ENTERPRISE, 0, 0};
DWORDLONG const dwlConditionMask = VerSetConditionMask(0, VER_SUITENAME, VER_EQUAL);
return !VerifyVersionInfoW(&osvi, VER_SUITENAME, dwlConditionMask);

View File

@ -32,29 +32,29 @@ namespace Aurora::Threading::Threads
OSThread::OSThread(const ThreadInfo &info) : info_(info)
{
name_ = info.name.value_or("Aurora Thread");
terminated_ = Primitives::EventShared(true, false);
this->name_ = info.name.value_or("Aurora Thread");
this->terminated_ = Primitives::EventShared(true, false);
// maybe we should atomic exchange compare these when needed frogthink
terminateSignal_ = Primitives::EventShared(true, false, true);
terminatedSignalLs_ = Loop::NewLSEvent(true, false);
terminateSignalLs_ = Loop::NewLSEvent(true, false, true);
this->terminateSignal_ = Primitives::EventShared(true, false, true);
this->terminatedSignalLs_ = Loop::NewLSEvent(true, false);
this->terminateSignalLs_ = Loop::NewLSEvent(true, false, true);
exitOnlyOnce_ = Primitives::CriticalSectionUnique();
SysAssert(terminated_ ? true : false, "out of memory");
this->exitOnlyOnce_ = Primitives::CriticalSectionUnique();
SysAssert(this->terminated_ ? true : false, "out of memory");
}
OSThread::OSThread() : info_(gDummyThreadInfo)
{
name_ = "Main Thread";
terminated_ = Primitives::EventShared(true, false);
exitOnlyOnce_ = Primitives::CriticalSectionUnique();
this->name_ = "Main Thread";
this->terminated_ = Primitives::EventShared(true, false);
this->exitOnlyOnce_ = Primitives::CriticalSectionUnique();
}
OSThread::OSThread(AuUInt64 id) : info_(gDummyThreadInfo)
{
name_ = "System Thread";
handle_ = reinterpret_cast<decltype(handle_)>(id);
this->name_ = "System Thread";
this->handle_ = reinterpret_cast<decltype(handle_)>(id);
}
OSThread::~OSThread()
@ -62,20 +62,20 @@ namespace Aurora::Threading::Threads
bool bDetached {};
bool bDetachedSuccess {};
if (contextUsed_)
if (this->contextUsed_)
{
if (detached_)
if (this->detached_)
{
bDetached = true;
}
else
{
Exit();
WaitFor(terminated_.get());
WaitFor(this->terminated_.get());
}
}
terminated_.reset();
this->terminated_.reset();
FreeOSContext();
if (bDetached)
@ -112,7 +112,7 @@ namespace Aurora::Threading::Threads
{
tls_.reset();
for (const auto &feature : threadFeatures_)
for (const auto &feature : this->threadFeatures_)
{
feature->Cleanup();
}
@ -125,22 +125,22 @@ namespace Aurora::Threading::Threads
{
return;
}
AuTryInsert(threadFeatures_, feature);
AuTryInsert(this->threadFeatures_, feature);
}
void OSThread::Detach()
{
detached_ = true;
this->detached_ = true;
}
AuSPtr<IWaitable> OSThread::AsWaitable()
{
return terminated_;
return this->terminated_;
}
void OSThread::SendExitSignal()
{
exiting_ = true;
this->exiting_ = true;
if (this->terminateSignalLs_)
{
@ -155,26 +155,26 @@ namespace Aurora::Threading::Threads
bool OSThread::Run()
{
if (!terminated_)
if (!this->terminated_)
{
SysPanic("::Run called on system thread");
}
if (AuExchange(contextUsed_, true))
if (AuExchange(this->contextUsed_, true))
{
return false;
}
terminated_->Reset();
this->terminated_->Reset();
return ExecuteNewOSContext([=]()
{
try
{
// this functional backends are being deprecated
if (info_.callbacks)
if (this->info_.callbacks)
{
info_.callbacks->OnEntry(this);
this->info_.callbacks->OnEntry(this);
}
}
catch (...)
@ -186,7 +186,7 @@ namespace Aurora::Threading::Threads
void OSThread::Exit()
{
while ((!terminated_) || (!terminated_->TryLock()))
while ((!this->terminated_) || (!this->terminated_->TryLock()))
{
if (Exit(false)) break;
}
@ -214,22 +214,22 @@ namespace Aurora::Threading::Threads
else
{
// exit signal
exiting_ = true;
this->exiting_ = true;
if (!terminated_)
if (!this->terminated_)
{
return true;
}
// attempt to join with the thread once it has exited, or timeout
if (WaitFor(terminated_.get(), 15 * 1000))
if (WaitFor(this->terminated_.get(), 15 * 1000))
{
return true;
}
// Do not force terminate if we're marked as dead and still running
// The thread must've requested suicide and got stuck in a lengthy clean up effort
if (!exitOnlyOnce_->TryLock())
if (!this->exitOnlyOnce_->TryLock())
{
AuLogWarn("Watchdog error - OS thread context didn't finish in 15 seconds, but he should exiting now.");
return false;
@ -246,41 +246,51 @@ namespace Aurora::Threading::Threads
this->InternalKill(true);
});
exitOnlyOnce_->Unlock();
this->exitOnlyOnce_->Unlock();
}
return true;
}
bool OSThread::Exiting()
{
return exiting_;
return this->exiting_;
}
void OSThread::SetName(const AuString &name)
{
name_ = name;
this->name_ = name;
}
EThreadPrio OSThread::GetPrio()
{
return prio_;
return this->prio_;
}
AuUInt64 OSThread::GetMask()
AuHwInfo::CpuBitId OSThread::GetMask()
{
return affinityProcessMask_;
return this->mask_;
}
AuString OSThread::GetName()
{
return name_;
return this->name_;
}
void OSThread::SetAffinity(AuUInt64 mask)
void OSThread::SetAffinity(const HWInfo::CpuBitId &mask)
{
if (mask == 0) mask = 0xFFFFFFFFFFFFFFFF;
affinityProcessMask_ = mask;
UpdateAffinity(mask);
auto zero = HWInfo::CpuBitId();
if (mask == zero ||
mask == zero.Not())
{
this->mask_ = HWInfo::GetCPUInfo().entireCpu;
}
else
{
this->mask_ = mask;
}
UpdateAffinity(this->mask_);
}
void OSThread::SetPrio(EThreadPrio prio)
@ -291,26 +301,26 @@ namespace Aurora::Threading::Threads
AuSPtr<TLSView> OSThread::GetTlsView()
{
if (!tls_)
if (!this->tls_)
{
tls_ = AuMakeShared<TLSViewImpl>();
this->tls_ = AuMakeShared<TLSViewImpl>();
}
return tls_;
return this->tls_;
}
bool OSThread::ExecuteNewOSContext(AuFunction<void()> task)
{
task_ = task;
this->task_ = task;
auto ret = SpawnThread([this]()
{
this->_ThreadEP();
}, GetName(), info_.stackSize);
}, GetName(), this->info_.stackSize);
if (ret.first)
{
handle_ = (decltype(handle_))ret.second;
this->handle_ = (decltype(handle_))ret.second;
}
return ret.first;
@ -399,6 +409,8 @@ namespace Aurora::Threading::Threads
info.dwThreadID = ::GetThreadId(this->handle_);
info.dwFlags = 0;
auto raise = AuStaticCast<void(__cdecl *)(THREADNAME_INFO &)>([](THREADNAME_INFO &info)
{
__try
{
RaiseException(kMSVCExceptionSetName, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR *)&info);
@ -406,6 +418,9 @@ namespace Aurora::Threading::Threads
__except (EXCEPTION_EXECUTE_HANDLER)
{
}
});
raise(info);
}
else
{
@ -423,7 +438,7 @@ namespace Aurora::Threading::Threads
#elif defined(AURORA_HAS_PTHREADS)
pthread_setname_np(handle_, name_.c_str());
pthread_setname_np(this->handle_, this->name_.c_str());
#endif
}
@ -448,8 +463,8 @@ namespace Aurora::Threading::Threads
#elif defined(AURORA_HAS_PTHREADS)
this->unixThreadId_ = 0; // !!!!
#endif
UpdatePrio(prio_);
SetAffinity(affinityProcessMask_);
UpdatePrio(this->prio_);
SetAffinity(this->mask_);
UpdateName();
}
@ -508,7 +523,7 @@ namespace Aurora::Threading::Threads
return;
}
SetThreadPriority(handle_, *val);
SetThreadPriority(this->handle_, *val);
#elif defined(AURORA_HAS_PTHREADS)
@ -522,14 +537,14 @@ namespace Aurora::Threading::Threads
sched_param param {};
param.sched_priority = sched_get_priority_min(SCHED_RR);
if (pthread_setschedparam(handle_, SCHED_RR, &param) == 0)
if (pthread_setschedparam(this->handle_, SCHED_RR, &param) == 0)
{
return;
}
// fall through on error
}
else if (prio_ == EThreadPrio::ePrioRT)
else if (this->prio_ == EThreadPrio::ePrioRT)
{
int policyNonRT =
#if defined(AURORA_IS_XNU_DERIVED)
@ -540,7 +555,7 @@ namespace Aurora::Threading::Threads
sched_param param {};
param.sched_priority = sched_get_priority_min(policyNonRT);
pthread_setschedparam(handle_, policyNonRT, &param);
pthread_setschedparam(this->handle_, policyNonRT, &param);
}
const int *val;
@ -555,18 +570,53 @@ namespace Aurora::Threading::Threads
}
#endif
prio_ = prio;
this->prio_ = prio;
}
void OSThread::UpdateAffinity(AuUInt64 mask)
void OSThread::UpdateAffinity(const HWInfo::CpuBitId &mask)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
if (handle_ == INVALID_HANDLE_VALUE)
if (this->handle_ == INVALID_HANDLE_VALUE)
{
return;
}
if ((AuBuild::kCurrentPlatform != AuBuild::EPlatform::ePlatformWin32) || (AuSwInfo::IsWindows10OrGreater()))
{
static BOOL(WINAPI * SetThreadSelectedCpuSets_f)(HANDLE, const ULONG *, ULONG);
if (!SetThreadSelectedCpuSets_f)
{
SetThreadSelectedCpuSets_f = AuReinterpretCast<decltype(SetThreadSelectedCpuSets_f)>(GetProcAddress(GetModuleHandleW(L"Kernel32.dll"), "SetThreadSelectedCpuSets"));
}
auto sets = mask.ToCpuSets();
if (SetThreadSelectedCpuSets_f)
{
if (SetThreadSelectedCpuSets_f(this->handle_, sets.data(), sets.size()))
{
// happy days :D
return;
}
SysPushErrorUnavailableError("SetThreadSelectedCpuSets is expected on modern NT (CoreOS?) excluding Windows; or Win10+");
}
}
#if defined(AURORA_PLATFORM_WIN32)
GROUP_AFFINITY affinityGroup {0};
mask.ToMsWin7GroupAffinity(&affinityGroup);
if (SetThreadGroupAffinity(this->handle_, &affinityGroup, nullptr))
{
return;
}
SetThreadAffinityMask(handle_, static_cast<DWORD_PTR>(mask));
#endif
#endif
SysPushErrorUnavailableError("Couldn't set thread affinity");
}
void OSThread::OSDeatach()
@ -576,7 +626,7 @@ namespace Aurora::Threading::Threads
bool OSThread::InternalKill(bool locked)
{
if (terminated_)
if (this->terminated_)
{
if (!locked)
{
@ -628,7 +678,7 @@ namespace Aurora::Threading::Threads
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
if (handle_ == INVALID_HANDLE_VALUE)
if (this->handle_ == INVALID_HANDLE_VALUE)
{
return;
}
@ -639,7 +689,7 @@ namespace Aurora::Threading::Threads
}
else
{
TerminateThread(handle_, 0);
TerminateThread(this->handle_, 0);
}
#elif defined(AURORA_HAS_PTHREADS)
@ -656,10 +706,7 @@ namespace Aurora::Threading::Threads
void OSThread::FreeOSContext()
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
if (auto handle = AuExchange(handle_, {}))
{
CloseHandle(handle_);
}
AuWin32CloseHandle(this->handle_);
#endif
}
@ -680,6 +727,8 @@ namespace Aurora::Threading::Threads
void InitThreading()
{
#if defined(AURORA_PLATFORM_WIN32)
AuxUlibInitialize();
#endif
}
}

View File

@ -23,11 +23,11 @@ namespace Aurora::Threading::Threads
void SendExitSignal() override;
void SetPrio(EThreadPrio prio) override;
void SetAffinity(AuUInt64 mask) override;
void SetAffinity(const HWInfo::CpuBitId &mask) override;
void SetName(const AuString &name) override;
EThreadPrio GetPrio() override;
AuUInt64 GetMask() override;
HWInfo::CpuBitId GetMask() override;
AuString GetName() override;
AuSPtr<TLSView> GetTlsView() override;
@ -48,7 +48,7 @@ namespace Aurora::Threading::Threads
bool Exit(bool willReturnToOS);
bool ExecuteNewOSContext(AuFunction<void()> task);
void UpdatePrio(EThreadPrio prio);
void UpdateAffinity(AuUInt64 mask);
void UpdateAffinity(const HWInfo::CpuBitId &mask);
void UpdateName();
void OSAttach();
void OSDeatach();
@ -65,7 +65,7 @@ namespace Aurora::Threading::Threads
OSThread * tlsReferenceThread_ {};
AuString name_;
ThreadInfo info_;
AuUInt64 affinityProcessMask_ = 0xFFFFFFFFFFFFFFFF;
HWInfo::CpuBitId mask_ = HWInfo::CpuBitId().Not();
EThreadPrio prio_ = EThreadPrio::ePrioNormal;
bool exiting_{};