/*** Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: SMTYield.hpp Date: 2023-3-12 Author: Reece ***/ #pragma once namespace Aurora::Threading { inline AuUInt32 gHasThreadLocalTimeout {}; inline thread_local AuUInt32 tlsSpinCountLocal {}; } #define SPIN_FOUR 1 namespace Aurora::Threading::Primitives { namespace ThrdCfg { inline bool gPlatformIsSMPProcessorOptimized {}; // to include or not to include 🤔 inline bool gEnableAggressiveScheduling {}; inline bool gEnableAgrSchedulingRatelimit {}; inline bool gPreferNtCondvarModernWinSpin {}; inline bool gPreferNtCondvarOlderWinSpin {}; inline bool gPreferNtSemaphoreSpinTryLock {}; inline bool gPreferNtMutexSpinTryLock {}; inline bool gPreferNtCondMutexSpinTryLock {}; inline bool gPreferLinuxSemaphoreSpinTryLock {}; inline bool gPreferLinuxMutexSpinTryLock {}; inline bool gPreferLinuxCondMutexSpinTryLock {}; inline bool gPreferEmulatedWakeOnAddress {}; inline bool gPreferWaitOnAddressAlwaysSpin {}; inline bool gPreferRWLockReadLockSpin {}; inline bool gUWPNanosecondEmulationCheckFirst {}; inline AuUInt32 gUWPNanosecondEmulationMaxYields {}; inline bool gForceEnableAdaptiveSpin {}; inline bool gPreferEnableAdaptiveSpin {}; inline bool gPreferLinuxAdaptiveSpin {}; inline bool gPreferOldWin32AdaptiveSpin {}; inline bool gPreferNewWin32AdaptiveSpin {}; inline AuUInt32 gAdaptiveSpinCUCnt0 {}; inline AuUInt32 gAdaptiveSpinCUCnt4 {}; inline AuUInt32 gAdaptiveSpinCUCnt8 {}; inline AuUInt32 gAdaptiveSpinCUCnt16 {}; inline bool gPreferFutexRWLock {}; inline bool gPreferFutexEvent {}; inline bool gWinXpThrough7BlazeOptimizerPower {}; inline bool gPreferLinuxPrimitivesFutexNoSpin {}; inline bool gPreferUnixPrimitivesNoSpin {}; inline bool gAlwaysRWLockWriteBiasOnReadLock {}; inline bool gEnableRWLockWriteBiasOnReadLock {}; } inline AuUInt32 gSpinAdaptiveThreshold {}; inline AuUInt32 gSpinAdaptiveCurrentCount {}; inline AuUInt32 gSpinAdaptiveThreadCount {}; inline AuUInt32 gUseFutexRWLock {}; inline AuUInt32 gPreferFutexEvent {}; void InitAdaptiveThreshold(); void InitAdaptiveThresholdFirstTime(); void InitCfg(); static auline void SMPPause() { #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) _mm_pause(); #elif defined(AURORA_ARCH_ARM) #if defined(AURORA_COMPILER_GCC) asm volatile("yield"); #else __yield(); #endif #else // TODO: your platform here AuThreading::ContextYield(); #endif } template bool auline YieldToSharedCore(long spin, T callback) { if (callback()) { return true; } if (gSpinAdaptiveThreshold) { auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u); if (uNow <= gSpinAdaptiveThreshold) { auto uCount = spin; #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) auto perfCounter = __rdtsc() + uCount; while (__rdtsc() < perfCounter) #else while (uCount > 0) #endif { if (callback()) { AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); return true; } else { #if defined(SPIN_FOUR) && SPIN_FOUR == 1 SMPPause(); SMPPause(); SMPPause(); SMPPause(); uCount -= 4; #else SMPPause(); uCount -= 1; #endif } } if (gHasThreadLocalTimeout) { auto uCount = tlsSpinCountLocal; #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) auto perfCounter = __rdtsc() + uCount; while (__rdtsc() < perfCounter) #else while (uCount > 0) #endif { if (callback()) { AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); return true; } else { SMPPause(); uCount--; } } } AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); } else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3)) { auto uCount = (spin) / 3; #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) auto perfCounter = __rdtsc() + uCount; while (__rdtsc() < perfCounter) #else while (uCount > 0) #endif { if (callback()) { AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); return true; } else { SMPPause(); uCount--; } } } AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); } else { auto uCount = spin; #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) auto perfCounter = __rdtsc() + uCount; while (__rdtsc() < perfCounter) #else while (uCount > 0) #endif { if (callback()) { return true; } else { #if defined(SPIN_FOUR) && SPIN_FOUR == 1 SMPPause(); SMPPause(); SMPPause(); SMPPause(); uCount -= 4; #else SMPPause(); uCount -= 1; #endif } } if (gHasThreadLocalTimeout) { auto uCount = tlsSpinCountLocal; #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) auto perfCounter = __rdtsc() + uCount; while (__rdtsc() < perfCounter) #else while (uCount > 0) #endif { if (callback()) { return true; } else { SMPPause(); uCount--; } } } } return callback(); } template bool auline DoTryIf(T callback) { if (ThrdCfg::gPlatformIsSMPProcessorOptimized) { return YieldToSharedCore(gRuntimeConfig.threadingConfig.uSpinLoopPowerA, callback); } else { return callback(); } } }