AuroraRuntime/Source/Threading/Primitives/SMTYield.cpp
2024-05-05 19:42:10 +01:00

209 lines
8.5 KiB
C++

/***
Copyright (C) 2023-2024 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: SMTYield.cpp
Date: 2023-3-12
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "SMTYield.hpp"
#include <Source/HWInfo/AuCpuId.hpp>
namespace Aurora::Threading
{
AUKN_SYM void SetSpinCountTimeout(AuUInt32 uTimeout)
{
gRuntimeConfig.threadingConfig.uSpinLoopPowerA = uTimeout;
}
AUKN_SYM AuUInt32 GetSpinCountTimeout()
{
return gRuntimeConfig.threadingConfig.uSpinLoopPowerA;
}
AUKN_SYM void SetThreadLocalAdditionalSpinCountTimeout(AuUInt32 uTimeout)
{
gHasThreadLocalTimeout = 1;
tlsSpinCountLocal = uTimeout;
}
AUKN_SYM AuUInt32 GetTotalSpinCountTime()
{
AuUInt32 uCount {};
if (!Primitives::ThrdCfg::gPlatformIsSMPProcessorOptimized)
{
return 0;
}
uCount = AuUInt32(gRuntimeConfig.threadingConfig.uSpinLoopPowerA);
if (gHasThreadLocalTimeout)
{
uCount += tlsSpinCountLocal;
}
//#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
// uCount *= 4;
//#endif
// well, i guess not because intel recommends going by rdtsc ticks, and not to assume eventual uop sleep of an unspecified period
return uCount;
}
AUKN_SYM const ThreadingConfig *GetThreadingConfig()
{
return &gRuntimeConfig.threadingConfig;
}
AUKN_SYM void SetThreadingConfig(const ThreadingConfig *pUpdateConfig)
{
if (!pUpdateConfig)
{
return;
}
ThreadingConfig cpy(*pUpdateConfig);
cpy.bPreferFutexRWLock = Primitives::ThrdCfg::gPreferFutexRWLock;
cpy.bPreferFutexEvent = Primitives::ThrdCfg::gPreferFutexEvent;
cpy.bPreferEmulatedWakeOnAddress = Primitives::ThrdCfg::gPreferEmulatedWakeOnAddress;
gRuntimeConfig.threadingConfig = decltype(gRuntimeConfig.threadingConfig)(cpy);
Primitives::InitCfg();
Primitives::InitAdaptiveThreshold();
}
bool IsNativeWaitOnSupported();
}
namespace Aurora::Threading::Primitives
{
void InitAdaptiveThreshold()
{
auto uCores = AuHwInfo::GetCPUInfo().uThreads;
ThrdCfg::gCountOfPCores = AuHwInfo::GetCPUInfo().maskPCores.CpuBitCount();
bool bPermitWOAInternal = IsNativeWaitOnSupported();
gUseFutexRWLock = ThrdCfg::gPreferFutexRWLock &&
bPermitWOAInternal;
gPreferFutexEvent = ThrdCfg::gPreferFutexEvent &&
bPermitWOAInternal;
gSpinAdaptiveThreadCount = uCores;
if (uCores == 1)
{
gSpinAdaptiveThreshold = 0;
ThrdCfg::gPlatformIsSMPProcessorOptimized = false;
return;
}
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
{
#if 0
auto cpuId = AuHwInfo::cpuid(7);
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
#else
auto &cpuId = AuHwInfo::GetCPUInfo().cpuId;
ThrdCfg::gIsIntelAlderLakeOrGreater = AuBitTest(cpuId.f_7_ECX, 5);
ThrdCfg::gIsZen3OrGreater = AuBitTest(cpuId.f_81_ECX, 29);
#endif
}
#endif
if (!ThrdCfg::gForceEnableAdaptiveSpin)
{
gSpinAdaptiveThreshold = 0;
return;
}
if (uCores >= 16)
{
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt16;
}
else if (uCores >= 8 && ThrdCfg::gAdaptiveSpinCUCnt8)
{
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt8;
}
else if (uCores >= 4 && ThrdCfg::gAdaptiveSpinCUCnt4)
{
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt4;
}
else if (uCores >= 0 && ThrdCfg::gAdaptiveSpinCUCnt0)
{
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt0;
}
else
{
gSpinAdaptiveThreshold = 0;
}
}
void InitAdaptiveThresholdFirstTime()
{
if (!ThrdCfg::gForceEnableAdaptiveSpin &&
ThrdCfg::gPreferEnableAdaptiveSpin)
{
#if defined(AURORA_IS_LINUX_DERIVED)
if (ThrdCfg::gPreferLinuxAdaptiveSpin)
{
ThrdCfg::gForceEnableAdaptiveSpin = true;
}
#else
if (AuSwInfo::IsWindows10OrGreater())
{
ThrdCfg::gForceEnableAdaptiveSpin = ThrdCfg::gPreferNewWin32AdaptiveSpin;
}
else
{
ThrdCfg::gForceEnableAdaptiveSpin = ThrdCfg::gPreferOldWin32AdaptiveSpin;
}
#endif
}
InitAdaptiveThreshold();
}
void InitCfg()
{
ThrdCfg::gPlatformIsSMPProcessorOptimized = gRuntimeConfig.threadingConfig.bPlatformIsSMPProcessorOptimized;
ThrdCfg::gEnableAggressiveScheduling = gRuntimeConfig.threadingConfig.bEnableAggressiveScheduling;
ThrdCfg::gEnableAgrSchedulingRatelimit = gRuntimeConfig.threadingConfig.bEnableAgrSchedulingRatelimit;
ThrdCfg::gPreferNtCondvarModernWinSpin = gRuntimeConfig.threadingConfig.bPreferNtCondvarModernWinSpin;
ThrdCfg::gPreferNtCondvarOlderWinSpin = gRuntimeConfig.threadingConfig.bPreferNtCondvarOlderWinSpin;
ThrdCfg::gPreferNtSemaphoreSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtSemaphoreSpinTryLock;
ThrdCfg::gPreferNtMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtMutexSpinTryLock;
ThrdCfg::gPreferNtCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtCondMutexSpinTryLock;
ThrdCfg::gPreferLinuxSemaphoreSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxSemaphoreSpinTryLock;
ThrdCfg::gPreferLinuxMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxMutexSpinTryLock;
ThrdCfg::gPreferLinuxCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxCondMutexSpinTryLock;
ThrdCfg::gPreferEmulatedWakeOnAddress = gRuntimeConfig.threadingConfig.bPreferEmulatedWakeOnAddress;
ThrdCfg::gPreferWaitOnAddressAlwaysSpin = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpin;
ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpinNative;
ThrdCfg::gPreferRWLockReadLockSpin = gRuntimeConfig.threadingConfig.bPreferRWLockReadLockSpin;
ThrdCfg::gUWPNanosecondEmulationCheckFirst = gRuntimeConfig.threadingConfig.bUWPNanosecondEmulationCheckFirst;
ThrdCfg::gUWPNanosecondEmulationMaxYields = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
ThrdCfg::gForceEnableAdaptiveSpin = gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin;
ThrdCfg::gPreferEnableAdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferEnableAdaptiveSpin;
ThrdCfg::gPreferLinuxAdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferLinuxAdaptiveSpin;
ThrdCfg::gPreferOldWin32AdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferOldWin32AdaptiveSpin;
ThrdCfg::gPreferNewWin32AdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferNewWin32AdaptiveSpin;
ThrdCfg::gAdaptiveSpinCUCnt0 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt0;
ThrdCfg::gAdaptiveSpinCUCnt4 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt4;
ThrdCfg::gAdaptiveSpinCUCnt8 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt8;
ThrdCfg::gAdaptiveSpinCUCnt16 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt16;
ThrdCfg::gPreferFutexRWLock = gRuntimeConfig.threadingConfig.bPreferFutexRWLock;
ThrdCfg::gWinXpThrough7BlazeOptimizerPower = gRuntimeConfig.threadingConfig.bWinXpThrough7BlazeOptimizerPower;
ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin = gRuntimeConfig.threadingConfig.bPreferLinuxPrimitivesFutexNoSpin;
ThrdCfg::gPreferUnixPrimitivesNoSpin = gRuntimeConfig.threadingConfig.bPreferUnixPrimitivesNoSpin;
ThrdCfg::gAlwaysRWLockWriteBiasOnReadLock = gRuntimeConfig.threadingConfig.bAlwaysRWLockWriteBiasOnReadLock;
ThrdCfg::gEnableRWLockWriteBiasOnReadLock = gRuntimeConfig.threadingConfig.bEnableRWLockWriteBiasOnReadLock;
ThrdCfg::gPreferFutexEvent = gRuntimeConfig.threadingConfig.bPreferFutexEvent;
}
}
extern "C" AuUInt32 SMTGetAPICNumber(void)
{
return AuHwInfo::cpuid(1).ebx >> 24;
}