[+] Global adaptive spin

This commit is contained in:
Reece Wilson 2023-08-22 12:57:47 +01:00
parent ccfd0fafab
commit 7ad725ca04
4 changed files with 155 additions and 15 deletions

View File

@ -348,6 +348,7 @@ namespace Aurora
AuUInt8 uSpinLoopPowerA { 5 }; // Nudgable spinloop power. This is our local userland niceness factor; where 1 << n is the amount of smt-yield instructions to stall for
// This is comparable to Win32's SetCriticalSectionSpinCount applied across every single AuThreadPrimitives try-lock and lock.
// Adjust this value to compensate for longer critical sections when context switching isn't preferrable.
AuUInt8 uSpinLoopLinearBit { 4 };
AuUInt64 bEnableAggressiveScheduling : 1 { false };
AuUInt64 bEnableAgrSchedulingRatelimit : 1 { true };
AuUInt64 bPreferNt51XpMutexesOver8 : 1 { false };
@ -362,8 +363,14 @@ namespace Aurora
AuUInt64 bPreferLinuxCondMutexSpinTryLock : 1 { true };
AuUInt64 bPreferEmulatedWakeOnAddress : 1 { false };
AuUInt64 bPreferRWLockReadLockSpin : 1 { true };
AuUInt64 bUWPNanosecondEmulationCheckFirst: 1 { false};
AuUInt64 uUWPNanosecondEmulationMaxYields : 7 { 12 };
AuUInt64 bUWPNanosecondEmulationCheckFirst: 1 { false };
AuUInt64 uUWPNanosecondEmulationMaxYields : 7 { 12 };
AuUInt64 bForceEnableAdaptiveSpin : 1 { false };
AuUInt64 bPreferLinuxAdaptiveSpin : 1 { true };
AuUInt64 uAdaptiveSpinCUCnt0 : 4 { 0 };
AuUInt64 uAdaptiveSpinCUCnt4 : 4 { 2 };
AuUInt64 uAdaptiveSpinCUCnt8 : 4 { 3 };
AuUInt64 uAdaptiveSpinCUCnt16 : 4 { 4 };
};
struct DummyConfig

View File

@ -36,6 +36,7 @@
#include "Grug/AuGrug.hpp"
#include "Threading/AuSleep.hpp"
#include "Memory/Cache.hpp"
#include "Threading/Primitives/SMTYield.hpp"
#include "AuProcAddresses.hpp"
@ -96,6 +97,7 @@ static void Init()
Aurora::Processes::Init();
Aurora::Hashing::InitHashing();
Aurora::Async::InitAsync();
Aurora::Threading::Primitives::InitAdaptiveThresholdFirstTime();
gRuntimeRunLevel = 2;
#if defined(AURORA_PLATFORM_WIN32)

View File

@ -50,11 +50,64 @@ namespace Aurora::Threading
}
gRuntimeConfig.threadingConfig = decltype(gRuntimeConfig.threadingConfig)(ThreadingConfig(*pUpdateConfig));
Primitives::InitAdaptiveThreshold();
}
}
namespace Aurora::Threading::Primitives
{
void InitAdaptiveThreshold()
{
auto uCores = AuHwInfo::GetCPUInfo().uThreads;
gSpinLinearPart = gRuntimeConfig.threadingConfig.uSpinLoopLinearBit;
if (!gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin)
{
gSpinAdaptiveThreshold = 0;
return;
}
if (uCores >= 16)
{
gSpinAdaptiveThreshold = uCores / gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt16;
}
else if (uCores >= 8 && gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt8)
{
gSpinAdaptiveThreshold = uCores / gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt8;
}
else if (uCores >= 4 && gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt4)
{
gSpinAdaptiveThreshold = uCores / gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt4;
}
else if (uCores >= 0 && gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt0)
{
gSpinAdaptiveThreshold = uCores / gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt0;
}
else
{
gSpinAdaptiveThreshold = 0;
}
}
void InitAdaptiveThresholdFirstTime()
{
if (!gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin)
{
#if defined(AURORA_IS_LINUX_DERIVED)
if (gRuntimeConfig.threadingConfig.bPreferLinuxAdaptiveSpin)
{
gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin = true;
}
#else
if (AuSwInfo::IsWindows10OrGreater())
{
gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin = true;
}
#endif
}
InitAdaptiveThreshold();
}
}

View File

@ -15,6 +15,13 @@ namespace Aurora::Threading
namespace Aurora::Threading::Primitives
{
inline AuUInt32 gSpinAdaptiveThreshold {};
inline AuUInt32 gSpinAdaptiveCurrentCount {};
inline AuUInt32 gSpinLinearPart {};
void InitAdaptiveThreshold();
void InitAdaptiveThresholdFirstTime();
static auline void SMPPause()
{
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
@ -39,27 +46,80 @@ namespace Aurora::Threading::Primitives
return true;
}
int loops = (1 << spin);
while (loops > 0)
if (gSpinAdaptiveThreshold)
{
SMPPause();
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
loops -= 1;
if (callback())
if (uNow <= gSpinAdaptiveThreshold)
{
return true;
int loops = (1 << spin);
auto uLinear = gSpinLinearPart;
while (loops > 0)
{
for (AU_ITERATE_N(i, uLinear))
{
SMPPause();
}
loops -= 1;
if (callback())
{
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
return true;
}
}
if (gHasThreadLocalTimeout)
{
auto uCount = tlsSpinCountLocal;
int loops = (1 << uCount);
while (loops > 0)
{
SMPPause();
loops -= 1;
if (callback())
{
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
return true;
}
}
}
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
}
else
{
int loops = (1 << spin) / 3;
while (loops > 0)
{
SMPPause();
loops -= 1;
if (callback())
{
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
return true;
}
}
}
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
}
if (gHasThreadLocalTimeout)
else
{
auto uCount = tlsSpinCountLocal;
int loops = (1 << uCount);
int loops = (1 << spin);
auto uLinear = gSpinLinearPart;
while (loops > 0)
{
SMPPause();
for (AU_ITERATE_N(i, uLinear))
{
SMPPause();
}
loops -= 1;
@ -68,6 +128,24 @@ namespace Aurora::Threading::Primitives
return true;
}
}
if (gHasThreadLocalTimeout)
{
auto uCount = tlsSpinCountLocal;
int loops = (1 << uCount);
while (loops > 0)
{
SMPPause();
loops -= 1;
if (callback())
{
return true;
}
}
}
}
return callback();