209 lines
8.5 KiB
C++
209 lines
8.5 KiB
C++
/***
|
|
Copyright (C) 2023-2024 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: SMTYield.cpp
|
|
Date: 2023-3-12
|
|
Author: Reece
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "SMTYield.hpp"
|
|
#include <Source/HWInfo/AuCpuId.hpp>
|
|
|
|
namespace Aurora::Threading
|
|
{
|
|
AUKN_SYM void SetSpinCountTimeout(AuUInt32 uTimeout)
|
|
{
|
|
gRuntimeConfig.threadingConfig.uSpinLoopPowerA = uTimeout;
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 GetSpinCountTimeout()
|
|
{
|
|
return gRuntimeConfig.threadingConfig.uSpinLoopPowerA;
|
|
}
|
|
|
|
AUKN_SYM void SetThreadLocalAdditionalSpinCountTimeout(AuUInt32 uTimeout)
|
|
{
|
|
gHasThreadLocalTimeout = 1;
|
|
tlsSpinCountLocal = uTimeout;
|
|
}
|
|
|
|
AUKN_SYM AuUInt32 GetTotalSpinCountTime()
|
|
{
|
|
AuUInt32 uCount {};
|
|
|
|
if (!Primitives::ThrdCfg::gPlatformIsSMPProcessorOptimized)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
uCount = AuUInt32(gRuntimeConfig.threadingConfig.uSpinLoopPowerA);
|
|
if (gHasThreadLocalTimeout)
|
|
{
|
|
uCount += tlsSpinCountLocal;
|
|
}
|
|
|
|
//#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
// uCount *= 4;
|
|
//#endif
|
|
// well, i guess not because intel recommends going by rdtsc ticks, and not to assume eventual uop sleep of an unspecified period
|
|
return uCount;
|
|
}
|
|
|
|
AUKN_SYM const ThreadingConfig *GetThreadingConfig()
|
|
{
|
|
return &gRuntimeConfig.threadingConfig;
|
|
}
|
|
|
|
AUKN_SYM void SetThreadingConfig(const ThreadingConfig *pUpdateConfig)
|
|
{
|
|
if (!pUpdateConfig)
|
|
{
|
|
return;
|
|
}
|
|
|
|
ThreadingConfig cpy(*pUpdateConfig);
|
|
cpy.bPreferFutexRWLock = Primitives::ThrdCfg::gPreferFutexRWLock;
|
|
cpy.bPreferFutexEvent = Primitives::ThrdCfg::gPreferFutexEvent;
|
|
cpy.bPreferEmulatedWakeOnAddress = Primitives::ThrdCfg::gPreferEmulatedWakeOnAddress;
|
|
gRuntimeConfig.threadingConfig = decltype(gRuntimeConfig.threadingConfig)(cpy);
|
|
|
|
Primitives::InitCfg();
|
|
Primitives::InitAdaptiveThreshold();
|
|
}
|
|
|
|
bool IsNativeWaitOnSupported();
|
|
}
|
|
|
|
namespace Aurora::Threading::Primitives
|
|
{
|
|
void InitAdaptiveThreshold()
|
|
{
|
|
auto uCores = AuHwInfo::GetCPUInfo().uThreads;
|
|
|
|
ThrdCfg::gCountOfPCores = AuHwInfo::GetCPUInfo().maskPCores.CpuBitCount();
|
|
|
|
bool bPermitWOAInternal = IsNativeWaitOnSupported();
|
|
|
|
gUseFutexRWLock = ThrdCfg::gPreferFutexRWLock &&
|
|
bPermitWOAInternal;
|
|
|
|
gPreferFutexEvent = ThrdCfg::gPreferFutexEvent &&
|
|
bPermitWOAInternal;
|
|
|
|
gSpinAdaptiveThreadCount = uCores;
|
|
|
|
if (uCores == 1)
|
|
{
|
|
gSpinAdaptiveThreshold = 0;
|
|
ThrdCfg::gPlatformIsSMPProcessorOptimized = false;
|
|
return;
|
|
}
|
|
|
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
{
|
|
#if 0
|
|
auto cpuId = AuHwInfo::cpuid(7);
|
|
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
|
|
#else
|
|
auto &cpuId = AuHwInfo::GetCPUInfo().cpuId;
|
|
ThrdCfg::gIsIntelAlderLakeOrGreater = AuBitTest(cpuId.f_7_ECX, 5);
|
|
ThrdCfg::gIsZen3OrGreater = AuBitTest(cpuId.f_81_ECX, 29);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
if (!ThrdCfg::gForceEnableAdaptiveSpin)
|
|
{
|
|
gSpinAdaptiveThreshold = 0;
|
|
return;
|
|
}
|
|
|
|
if (uCores >= 16)
|
|
{
|
|
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt16;
|
|
}
|
|
else if (uCores >= 8 && ThrdCfg::gAdaptiveSpinCUCnt8)
|
|
{
|
|
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt8;
|
|
}
|
|
else if (uCores >= 4 && ThrdCfg::gAdaptiveSpinCUCnt4)
|
|
{
|
|
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt4;
|
|
}
|
|
else if (uCores >= 0 && ThrdCfg::gAdaptiveSpinCUCnt0)
|
|
{
|
|
gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt0;
|
|
}
|
|
else
|
|
{
|
|
gSpinAdaptiveThreshold = 0;
|
|
}
|
|
}
|
|
|
|
void InitAdaptiveThresholdFirstTime()
|
|
{
|
|
if (!ThrdCfg::gForceEnableAdaptiveSpin &&
|
|
ThrdCfg::gPreferEnableAdaptiveSpin)
|
|
{
|
|
#if defined(AURORA_IS_LINUX_DERIVED)
|
|
if (ThrdCfg::gPreferLinuxAdaptiveSpin)
|
|
{
|
|
ThrdCfg::gForceEnableAdaptiveSpin = true;
|
|
}
|
|
#else
|
|
if (AuSwInfo::IsWindows10OrGreater())
|
|
{
|
|
ThrdCfg::gForceEnableAdaptiveSpin = ThrdCfg::gPreferNewWin32AdaptiveSpin;
|
|
}
|
|
else
|
|
{
|
|
ThrdCfg::gForceEnableAdaptiveSpin = ThrdCfg::gPreferOldWin32AdaptiveSpin;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
InitAdaptiveThreshold();
|
|
}
|
|
|
|
void InitCfg()
|
|
{
|
|
ThrdCfg::gPlatformIsSMPProcessorOptimized = gRuntimeConfig.threadingConfig.bPlatformIsSMPProcessorOptimized;
|
|
ThrdCfg::gEnableAggressiveScheduling = gRuntimeConfig.threadingConfig.bEnableAggressiveScheduling;
|
|
ThrdCfg::gEnableAgrSchedulingRatelimit = gRuntimeConfig.threadingConfig.bEnableAgrSchedulingRatelimit;
|
|
ThrdCfg::gPreferNtCondvarModernWinSpin = gRuntimeConfig.threadingConfig.bPreferNtCondvarModernWinSpin;
|
|
ThrdCfg::gPreferNtCondvarOlderWinSpin = gRuntimeConfig.threadingConfig.bPreferNtCondvarOlderWinSpin;
|
|
ThrdCfg::gPreferNtSemaphoreSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtSemaphoreSpinTryLock;
|
|
ThrdCfg::gPreferNtMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtMutexSpinTryLock;
|
|
ThrdCfg::gPreferNtCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtCondMutexSpinTryLock;
|
|
ThrdCfg::gPreferLinuxSemaphoreSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxSemaphoreSpinTryLock;
|
|
ThrdCfg::gPreferLinuxMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxMutexSpinTryLock;
|
|
ThrdCfg::gPreferLinuxCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxCondMutexSpinTryLock;
|
|
ThrdCfg::gPreferEmulatedWakeOnAddress = gRuntimeConfig.threadingConfig.bPreferEmulatedWakeOnAddress;
|
|
ThrdCfg::gPreferWaitOnAddressAlwaysSpin = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpin;
|
|
ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpinNative;
|
|
ThrdCfg::gPreferRWLockReadLockSpin = gRuntimeConfig.threadingConfig.bPreferRWLockReadLockSpin;
|
|
ThrdCfg::gUWPNanosecondEmulationCheckFirst = gRuntimeConfig.threadingConfig.bUWPNanosecondEmulationCheckFirst;
|
|
ThrdCfg::gUWPNanosecondEmulationMaxYields = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
|
|
ThrdCfg::gForceEnableAdaptiveSpin = gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin;
|
|
ThrdCfg::gPreferEnableAdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferEnableAdaptiveSpin;
|
|
ThrdCfg::gPreferLinuxAdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferLinuxAdaptiveSpin;
|
|
ThrdCfg::gPreferOldWin32AdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferOldWin32AdaptiveSpin;
|
|
ThrdCfg::gPreferNewWin32AdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferNewWin32AdaptiveSpin;
|
|
ThrdCfg::gAdaptiveSpinCUCnt0 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt0;
|
|
ThrdCfg::gAdaptiveSpinCUCnt4 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt4;
|
|
ThrdCfg::gAdaptiveSpinCUCnt8 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt8;
|
|
ThrdCfg::gAdaptiveSpinCUCnt16 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt16;
|
|
ThrdCfg::gPreferFutexRWLock = gRuntimeConfig.threadingConfig.bPreferFutexRWLock;
|
|
ThrdCfg::gWinXpThrough7BlazeOptimizerPower = gRuntimeConfig.threadingConfig.bWinXpThrough7BlazeOptimizerPower;
|
|
ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin = gRuntimeConfig.threadingConfig.bPreferLinuxPrimitivesFutexNoSpin;
|
|
ThrdCfg::gPreferUnixPrimitivesNoSpin = gRuntimeConfig.threadingConfig.bPreferUnixPrimitivesNoSpin;
|
|
ThrdCfg::gAlwaysRWLockWriteBiasOnReadLock = gRuntimeConfig.threadingConfig.bAlwaysRWLockWriteBiasOnReadLock;
|
|
ThrdCfg::gEnableRWLockWriteBiasOnReadLock = gRuntimeConfig.threadingConfig.bEnableRWLockWriteBiasOnReadLock;
|
|
ThrdCfg::gPreferFutexEvent = gRuntimeConfig.threadingConfig.bPreferFutexEvent;
|
|
}
|
|
}
|
|
|
|
extern "C" AuUInt32 SMTGetAPICNumber(void)
|
|
{
|
|
return AuHwInfo::cpuid(1).ebx >> 24;
|
|
} |