AuroraRuntime/Source/Threading/Primitives/SMTYield.hpp
2023-09-10 14:50:59 +01:00

242 lines
7.6 KiB
C++

/***
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: SMTYield.hpp
Date: 2023-3-12
Author: Reece
***/
#pragma once
namespace Aurora::Threading
{
inline AuUInt32 gHasThreadLocalTimeout {};
inline thread_local AuUInt32 tlsSpinCountLocal {};
}
#define SPIN_FOUR 1
namespace Aurora::Threading::Primitives
{
namespace ThrdCfg
{
inline bool gPlatformIsSMPProcessorOptimized {}; // to include or not to include 🤔
inline bool gEnableAggressiveScheduling {};
inline bool gEnableAgrSchedulingRatelimit {};
inline bool gPreferNtCondvarModernWinSpin {};
inline bool gPreferNtCondvarOlderWinSpin {};
inline bool gPreferNtSemaphoreSpinTryLock {};
inline bool gPreferNtMutexSpinTryLock {};
inline bool gPreferNtCondMutexSpinTryLock {};
inline bool gPreferLinuxSemaphoreSpinTryLock {};
inline bool gPreferLinuxMutexSpinTryLock {};
inline bool gPreferLinuxCondMutexSpinTryLock {};
inline bool gPreferEmulatedWakeOnAddress {};
inline bool gPreferWaitOnAddressAlwaysSpin {};
inline bool gPreferRWLockReadLockSpin {};
inline bool gUWPNanosecondEmulationCheckFirst {};
inline AuUInt32 gUWPNanosecondEmulationMaxYields {};
inline bool gForceEnableAdaptiveSpin {};
inline bool gPreferEnableAdaptiveSpin {};
inline bool gPreferLinuxAdaptiveSpin {};
inline bool gPreferOldWin32AdaptiveSpin {};
inline bool gPreferNewWin32AdaptiveSpin {};
inline AuUInt32 gAdaptiveSpinCUCnt0 {};
inline AuUInt32 gAdaptiveSpinCUCnt4 {};
inline AuUInt32 gAdaptiveSpinCUCnt8 {};
inline AuUInt32 gAdaptiveSpinCUCnt16 {};
inline bool gPreferFutexRWLock {};
inline bool gPreferFutexEvent {};
inline bool gWinXpThrough7BlazeOptimizerPower {};
inline bool gPreferLinuxPrimitivesFutexNoSpin {};
inline bool gPreferUnixPrimitivesNoSpin {};
inline bool gAlwaysRWLockWriteBiasOnReadLock {};
inline bool gEnableRWLockWriteBiasOnReadLock {};
}
inline AuUInt32 gSpinAdaptiveThreshold {};
inline AuUInt32 gSpinAdaptiveCurrentCount {};
inline AuUInt32 gSpinAdaptiveThreadCount {};
inline AuUInt32 gUseFutexRWLock {};
inline AuUInt32 gPreferFutexEvent {};
void InitAdaptiveThreshold();
void InitAdaptiveThresholdFirstTime();
void InitCfg();
static auline void SMPPause()
{
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
_mm_pause();
#elif defined(AURORA_ARCH_ARM)
#if defined(AURORA_COMPILER_GCC)
asm volatile("yield");
#else
__yield();
#endif
#else
// TODO: your platform here
AuThreading::ContextYield();
#endif
}
template <typename T>
bool auline YieldToSharedCore(long spin, T callback)
{
if (callback())
{
return true;
}
if (gSpinAdaptiveThreshold)
{
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
if (uNow <= gSpinAdaptiveThreshold)
{
auto uCount = spin;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
return true;
}
else
{
#if defined(SPIN_FOUR) && SPIN_FOUR == 1
SMPPause();
SMPPause();
SMPPause();
SMPPause();
uCount -= 4;
#else
SMPPause();
uCount -= 1;
#endif
}
}
if (gHasThreadLocalTimeout)
{
auto uCount = tlsSpinCountLocal;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
return true;
}
else
{
SMPPause();
uCount--;
}
}
}
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
}
else if (uNow <= gSpinAdaptiveThreadCount)
{
auto uCount = (spin) / 3;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
return true;
}
else
{
SMPPause();
uCount--;
}
}
}
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
}
else
{
auto uCount = spin;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
return true;
}
else
{
#if defined(SPIN_FOUR) && SPIN_FOUR == 1
SMPPause();
SMPPause();
SMPPause();
SMPPause();
uCount -= 4;
#else
SMPPause();
uCount -= 1;
#endif
}
}
if (gHasThreadLocalTimeout)
{
auto uCount = tlsSpinCountLocal;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
return true;
}
else
{
SMPPause();
uCount--;
}
}
}
}
return callback();
}
template <typename T>
bool auline DoTryIf(T callback)
{
if (ThrdCfg::gPlatformIsSMPProcessorOptimized)
{
return YieldToSharedCore(gRuntimeConfig.threadingConfig.uSpinLoopPowerA, callback);
}
else
{
return callback();
}
}
}