/*** Copyright (C) 2023-2024 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: SMTYield.cpp Date: 2023-3-12 Author: Reece ***/ #include #include "SMTYield.hpp" #include namespace Aurora::Threading { AUKN_SYM void SetSpinCountTimeout(AuUInt32 uTimeout) { gRuntimeConfig.threadingConfig.uSpinLoopPowerA = uTimeout; } AUKN_SYM AuUInt32 GetSpinCountTimeout() { return gRuntimeConfig.threadingConfig.uSpinLoopPowerA; } AUKN_SYM void SetThreadLocalAdditionalSpinCountTimeout(AuUInt32 uTimeout) { gHasThreadLocalTimeout = 1; tlsSpinCountLocal = uTimeout; } AUKN_SYM AuUInt32 GetTotalSpinCountTime() { AuUInt32 uCount {}; if (!Primitives::ThrdCfg::gPlatformIsSMPProcessorOptimized) { return 0; } uCount = AuUInt32(gRuntimeConfig.threadingConfig.uSpinLoopPowerA); if (gHasThreadLocalTimeout) { uCount += tlsSpinCountLocal; } //#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) // uCount *= 4; //#endif // well, i guess not because intel recommends going by rdtsc ticks, and not to assume eventual uop sleep of an unspecified period return uCount; } AUKN_SYM const ThreadingConfig *GetThreadingConfig() { return &gRuntimeConfig.threadingConfig; } AUKN_SYM void SetThreadingConfig(const ThreadingConfig *pUpdateConfig) { if (!pUpdateConfig) { return; } ThreadingConfig cpy(*pUpdateConfig); cpy.bPreferFutexRWLock = Primitives::ThrdCfg::gPreferFutexRWLock; cpy.bPreferFutexEvent = Primitives::ThrdCfg::gPreferFutexEvent; cpy.bPreferEmulatedWakeOnAddress = Primitives::ThrdCfg::gPreferEmulatedWakeOnAddress; gRuntimeConfig.threadingConfig = decltype(gRuntimeConfig.threadingConfig)(cpy); Primitives::InitCfg(); Primitives::InitAdaptiveThreshold(); } bool IsNativeWaitOnSupported(); } namespace Aurora::Threading::Primitives { void InitAdaptiveThreshold() { auto uCores = AuHwInfo::GetCPUInfo().uThreads; ThrdCfg::gCountOfPCores = AuHwInfo::GetCPUInfo().maskPCores.CpuBitCount(); bool bPermitWOAInternal = IsNativeWaitOnSupported(); gUseFutexRWLock = ThrdCfg::gPreferFutexRWLock && bPermitWOAInternal; gPreferFutexEvent = ThrdCfg::gPreferFutexEvent && bPermitWOAInternal; gSpinAdaptiveThreadCount = uCores; if (uCores == 1) { gSpinAdaptiveThreshold = 0; ThrdCfg::gPlatformIsSMPProcessorOptimized = false; return; } #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) { #if 0 auto cpuId = AuHwInfo::cpuid(7); ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1; #else auto &cpuId = AuHwInfo::GetCPUInfo().cpuId; ThrdCfg::gIsIntelAlderLakeOrGreater = AuBitTest(cpuId.f_7_ECX, 5); ThrdCfg::gIsZen3OrGreater = AuBitTest(cpuId.f_81_ECX, 29); #endif } #endif if (!ThrdCfg::gForceEnableAdaptiveSpin) { gSpinAdaptiveThreshold = 0; return; } if (uCores >= 16) { gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt16; } else if (uCores >= 8 && ThrdCfg::gAdaptiveSpinCUCnt8) { gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt8; } else if (uCores >= 4 && ThrdCfg::gAdaptiveSpinCUCnt4) { gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt4; } else if (uCores >= 0 && ThrdCfg::gAdaptiveSpinCUCnt0) { gSpinAdaptiveThreshold = uCores / ThrdCfg::gAdaptiveSpinCUCnt0; } else { gSpinAdaptiveThreshold = 0; } } void InitAdaptiveThresholdFirstTime() { if (!ThrdCfg::gForceEnableAdaptiveSpin && ThrdCfg::gPreferEnableAdaptiveSpin) { #if defined(AURORA_IS_LINUX_DERIVED) if (ThrdCfg::gPreferLinuxAdaptiveSpin) { ThrdCfg::gForceEnableAdaptiveSpin = true; } #else if (AuSwInfo::IsWindows10OrGreater()) { ThrdCfg::gForceEnableAdaptiveSpin = ThrdCfg::gPreferNewWin32AdaptiveSpin; } else { ThrdCfg::gForceEnableAdaptiveSpin = ThrdCfg::gPreferOldWin32AdaptiveSpin; } #endif } InitAdaptiveThreshold(); } void InitCfg() { ThrdCfg::gPlatformIsSMPProcessorOptimized = gRuntimeConfig.threadingConfig.bPlatformIsSMPProcessorOptimized; ThrdCfg::gEnableAggressiveScheduling = gRuntimeConfig.threadingConfig.bEnableAggressiveScheduling; ThrdCfg::gEnableAgrSchedulingRatelimit = gRuntimeConfig.threadingConfig.bEnableAgrSchedulingRatelimit; ThrdCfg::gPreferNtCondvarModernWinSpin = gRuntimeConfig.threadingConfig.bPreferNtCondvarModernWinSpin; ThrdCfg::gPreferNtCondvarOlderWinSpin = gRuntimeConfig.threadingConfig.bPreferNtCondvarOlderWinSpin; ThrdCfg::gPreferNtSemaphoreSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtSemaphoreSpinTryLock; ThrdCfg::gPreferNtMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtMutexSpinTryLock; ThrdCfg::gPreferNtCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferNtCondMutexSpinTryLock; ThrdCfg::gPreferLinuxSemaphoreSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxSemaphoreSpinTryLock; ThrdCfg::gPreferLinuxMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxMutexSpinTryLock; ThrdCfg::gPreferLinuxCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxCondMutexSpinTryLock; ThrdCfg::gPreferEmulatedWakeOnAddress = gRuntimeConfig.threadingConfig.bPreferEmulatedWakeOnAddress; ThrdCfg::gPreferWaitOnAddressAlwaysSpin = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpin; ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpinNative; ThrdCfg::gPreferRWLockReadLockSpin = gRuntimeConfig.threadingConfig.bPreferRWLockReadLockSpin; ThrdCfg::gUWPNanosecondEmulationCheckFirst = gRuntimeConfig.threadingConfig.bUWPNanosecondEmulationCheckFirst; ThrdCfg::gUWPNanosecondEmulationMaxYields = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields; ThrdCfg::gForceEnableAdaptiveSpin = gRuntimeConfig.threadingConfig.bForceEnableAdaptiveSpin; ThrdCfg::gPreferEnableAdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferEnableAdaptiveSpin; ThrdCfg::gPreferLinuxAdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferLinuxAdaptiveSpin; ThrdCfg::gPreferOldWin32AdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferOldWin32AdaptiveSpin; ThrdCfg::gPreferNewWin32AdaptiveSpin = gRuntimeConfig.threadingConfig.bPreferNewWin32AdaptiveSpin; ThrdCfg::gAdaptiveSpinCUCnt0 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt0; ThrdCfg::gAdaptiveSpinCUCnt4 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt4; ThrdCfg::gAdaptiveSpinCUCnt8 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt8; ThrdCfg::gAdaptiveSpinCUCnt16 = gRuntimeConfig.threadingConfig.uAdaptiveSpinCUCnt16; ThrdCfg::gPreferFutexRWLock = gRuntimeConfig.threadingConfig.bPreferFutexRWLock; ThrdCfg::gWinXpThrough7BlazeOptimizerPower = gRuntimeConfig.threadingConfig.bWinXpThrough7BlazeOptimizerPower; ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin = gRuntimeConfig.threadingConfig.bPreferLinuxPrimitivesFutexNoSpin; ThrdCfg::gPreferUnixPrimitivesNoSpin = gRuntimeConfig.threadingConfig.bPreferUnixPrimitivesNoSpin; ThrdCfg::gAlwaysRWLockWriteBiasOnReadLock = gRuntimeConfig.threadingConfig.bAlwaysRWLockWriteBiasOnReadLock; ThrdCfg::gEnableRWLockWriteBiasOnReadLock = gRuntimeConfig.threadingConfig.bEnableRWLockWriteBiasOnReadLock; ThrdCfg::gPreferFutexEvent = gRuntimeConfig.threadingConfig.bPreferFutexEvent; } } extern "C" AuUInt32 SMTGetAPICNumber(void) { return AuHwInfo::cpuid(1).ebx >> 24; }