2023-09-09 16:37:14 +00:00
/***
2024-01-29 14:09:59 +00:00
Copyright ( C ) 2023 - 2024 J Reece Wilson ( a / k / a " Reece " ) . All rights reserved .
2023-03-12 15:27:28 +00:00
2023-03-21 03:18:09 +00:00
File : SMTYield . hpp
2023-03-12 15:27:28 +00:00
Date : 2023 - 3 - 12
Author : Reece
* * */
# pragma once
2023-08-19 17:14:28 +00:00
namespace Aurora : : Threading
{
inline AuUInt32 gHasThreadLocalTimeout { } ;
2023-09-06 15:24:43 +00:00
inline thread_local AuUInt32 tlsSpinCountLocal { } ;
2023-08-19 17:14:28 +00:00
}
2024-01-02 02:49:23 +00:00
extern " C "
{
2024-01-07 02:26:34 +00:00
AuUInt32 SMTGetAPICNumber ( void ) ;
2024-01-02 02:49:23 +00:00
}
# include <Source/Extensions/Clocks.aarch64.hpp>
2023-09-02 13:29:55 +00:00
# define SPIN_FOUR 1
2023-03-12 15:27:28 +00:00
namespace Aurora : : Threading : : Primitives
{
2023-09-09 16:37:14 +00:00
namespace ThrdCfg
{
inline bool gPlatformIsSMPProcessorOptimized { } ; // to include or not to include 🤔
inline bool gEnableAggressiveScheduling { } ;
inline bool gEnableAgrSchedulingRatelimit { } ;
inline bool gPreferNtCondvarModernWinSpin { } ;
inline bool gPreferNtCondvarOlderWinSpin { } ;
inline bool gPreferNtSemaphoreSpinTryLock { } ;
inline bool gPreferNtMutexSpinTryLock { } ;
inline bool gPreferNtCondMutexSpinTryLock { } ;
inline bool gPreferLinuxSemaphoreSpinTryLock { } ;
inline bool gPreferLinuxMutexSpinTryLock { } ;
inline bool gPreferLinuxCondMutexSpinTryLock { } ;
inline bool gPreferEmulatedWakeOnAddress { } ;
inline bool gPreferWaitOnAddressAlwaysSpin { } ;
2023-10-30 14:50:28 +00:00
inline bool gPreferWaitOnAddressAlwaysSpinNative { } ;
2023-09-09 16:37:14 +00:00
inline bool gPreferRWLockReadLockSpin { } ;
inline bool gUWPNanosecondEmulationCheckFirst { } ;
inline AuUInt32 gUWPNanosecondEmulationMaxYields { } ;
inline bool gForceEnableAdaptiveSpin { } ;
inline bool gPreferEnableAdaptiveSpin { } ;
inline bool gPreferLinuxAdaptiveSpin { } ;
inline bool gPreferOldWin32AdaptiveSpin { } ;
inline bool gPreferNewWin32AdaptiveSpin { } ;
inline AuUInt32 gAdaptiveSpinCUCnt0 { } ;
inline AuUInt32 gAdaptiveSpinCUCnt4 { } ;
inline AuUInt32 gAdaptiveSpinCUCnt8 { } ;
inline AuUInt32 gAdaptiveSpinCUCnt16 { } ;
inline bool gPreferFutexRWLock { } ;
2023-09-10 13:50:59 +00:00
inline bool gPreferFutexEvent { } ;
2023-09-09 16:37:14 +00:00
inline bool gWinXpThrough7BlazeOptimizerPower { } ;
inline bool gPreferLinuxPrimitivesFutexNoSpin { } ;
inline bool gPreferUnixPrimitivesNoSpin { } ;
inline bool gAlwaysRWLockWriteBiasOnReadLock { } ;
inline bool gEnableRWLockWriteBiasOnReadLock { } ;
2024-01-02 02:49:23 +00:00
inline AuUInt8 gCountOfPCores { } ;
2023-09-09 16:37:14 +00:00
}
2024-01-02 02:49:23 +00:00
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
using SMTAtomic_t = AuUInt8 ;
# else
using SMTAtomic_t = AuUInt32 ;
# endif
inline SMTAtomic_t gCoreTable [ 256 ] { } ;
2023-09-09 16:37:14 +00:00
2023-08-22 11:57:47 +00:00
inline AuUInt32 gSpinAdaptiveThreshold { } ;
inline AuUInt32 gSpinAdaptiveCurrentCount { } ;
2023-09-09 17:09:22 +00:00
inline AuUInt32 gSpinAdaptiveThreadCount { } ;
2023-08-22 11:57:47 +00:00
2023-08-23 13:52:47 +00:00
inline AuUInt32 gUseFutexRWLock { } ;
2023-09-10 13:50:59 +00:00
inline AuUInt32 gPreferFutexEvent { } ;
2023-08-23 13:52:47 +00:00
2023-08-22 11:57:47 +00:00
void InitAdaptiveThreshold ( ) ;
void InitAdaptiveThresholdFirstTime ( ) ;
2023-09-09 16:37:14 +00:00
void InitCfg ( ) ;
2023-08-22 11:57:47 +00:00
2024-01-02 02:49:23 +00:00
static const bool kEnableSmartScheduling =
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
true ;
# else
// tbd by arch and os
false ;
# endif
2023-03-12 15:27:28 +00:00
static auline void SMPPause ( )
{
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
_mm_pause ( ) ;
# elif defined(AURORA_ARCH_ARM)
# if defined(AURORA_COMPILER_GCC)
asm volatile ( " yield " ) ;
# else
__yield ( ) ;
# endif
# else
// TODO: your platform here
AuThreading : : ContextYield ( ) ;
# endif
}
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_ARM)
static AuUInt64 ConvertArmTicks ( AuUInt64 uCounter , AuUInt64 uFreq )
{
if ( uFreq = = 10000000 )
{
return uCounter * 100ull ;
}
else if ( uFreq = = 1000000 )
{
return uCounter * 1000ull ;
}
else if ( uFreq = = 100000 )
{
return uCounter * 10000ull ;
}
else if ( uFreq = = 100000000ull )
{
return uCounter * 10ull ;
}
else if ( uFreq = = 1000000000ull )
{
return uCounter ;
}
else
{
const long long uWhole = ( uCounter / uFreq ) * 1'000'000'000ull ;
const long long uPart = ( uCounter % uFreq ) * 1'000'000'000ull / uFreq ;
return uWhole + uPart ;
}
}
static AuUInt64 RdtscArmEmulated ( AuUInt64 uClockFreq )
{
return ConvertArmTicks ( ArmQueryClockCounter ( ) , uClockFreq ) * 4 ;
// context:
// Intel recommends we spin, considering the potential for exponential back-offs later on, with a coefficient based on the CPUID brand of the processor.
// Under most processors, RDTSC is not that of the instruction counter. That'd be worthless; modern processors are ingesting hundreds of instructions to speculate on.
// Instead, RDTSC reads back a steady system-wide clock (*). It doesn't scale per core, nor can you overclock it.
// Back to Intels recommentation, instead of spamming your processes execution pipeline with mm_pauses in a loop, you should query RDTSC to solve the ABA problem and normalize for changes in the micro-architecture.
// This does allow Intel to decrease this potentially-NOP mm_pause sleep period by changing the stated base clock.
// On the aarch side of things, we should be able to match the exact Intel behaviour by:
// * Reading the system wide clock (CNTVCT_EL0)
// * Normalizing to nanoseconds with the given frequency (CNTFRQ_EL0)
// * Divide by approx "3.6 Ghz" ops/ns
// *: Ok, techincally you can/need to verify Invariant TSC: CPUID.80000007H:EDX[8], but who actually cares?
}
# define __rdtsc() RdtscArmEmulated(uClockFreq)
# define ALT_RDT
# endif
2023-03-12 15:27:28 +00:00
template < typename T >
bool auline YieldToSharedCore ( long spin , T callback )
{
if ( callback ( ) )
{
return true ;
}
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_ARM)
AuUInt64 uClockFreq { ArmQueryClockFrequency ( ) } ;
# endif
if ( kEnableSmartScheduling )
{
bool bRet { false } ;
auto uWord = SMTGetAPICNumber ( ) ;
if ( uWord < AuArraySize ( gCoreTable ) & &
uWord < ThrdCfg : : gCountOfPCores )
{
AuAtomicStore < SMTAtomic_t > ( & gCoreTable [ uWord ] , 1u ) ;
auto uNow = AuAtomicAdd ( & gSpinAdaptiveCurrentCount , 1u ) ;
if ( uNow < = gSpinAdaptiveThreshold )
{
auto uCount = spin ;
if ( AuAtomicLoad ( & gCoreTable [ uWord ^ 1 ] ) )
{
uCount / = 5 ;
}
else if ( gHasThreadLocalTimeout )
{
uCount + = tlsSpinCountLocal ;
}
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
bRet = true ;
break ;
}
else
{
SMPPause ( ) ;
uCount - - ;
}
}
}
AuAtomicStore < SMTAtomic_t > ( & gCoreTable [ uWord ] , 0u ) ;
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
2023-03-12 15:27:28 +00:00
2024-01-02 02:49:23 +00:00
return bRet ;
}
else if ( gSpinAdaptiveThreshold )
2023-03-12 15:27:28 +00:00
{
2023-08-22 11:57:47 +00:00
auto uNow = AuAtomicAdd ( & gSpinAdaptiveCurrentCount , 1u ) ;
if ( uNow < = gSpinAdaptiveThreshold )
{
2023-09-06 15:24:43 +00:00
auto uCount = spin ;
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-02 13:29:55 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-02 13:29:55 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
2023-09-02 13:29:55 +00:00
# if defined(SPIN_FOUR) && SPIN_FOUR == 1
2023-08-27 20:27:49 +00:00
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 4 ;
2023-09-02 13:29:55 +00:00
# else
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 1 ;
2023-09-02 13:29:55 +00:00
# endif
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
2023-03-12 15:27:28 +00:00
2023-08-22 11:57:47 +00:00
if ( gHasThreadLocalTimeout )
{
2023-09-06 15:24:43 +00:00
auto uCount = tlsSpinCountLocal ;
2023-09-04 22:03:08 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-04 22:03:08 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-04 22:03:08 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - - ;
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
}
2023-03-12 15:27:28 +00:00
2023-08-22 11:57:47 +00:00
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
2023-09-19 00:38:16 +00:00
else if ( uNow < = ( gSpinAdaptiveThreadCount / 4 * 3 ) )
2023-03-12 15:27:28 +00:00
{
2023-09-06 15:24:43 +00:00
auto uCount = ( spin ) / 3 ;
2023-09-02 13:29:55 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-02 13:29:55 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-02 13:29:55 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - - ;
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
2023-03-12 15:27:28 +00:00
}
2023-08-22 11:57:47 +00:00
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
else
2023-08-19 17:14:28 +00:00
{
2023-09-06 15:24:43 +00:00
auto uCount = spin ;
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-02 13:29:55 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-02 13:29:55 +00:00
# endif
2023-08-19 17:14:28 +00:00
{
if ( callback ( ) )
{
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
2023-09-02 13:29:55 +00:00
# if defined(SPIN_FOUR) && SPIN_FOUR == 1
2023-08-27 20:27:49 +00:00
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 4 ;
2023-09-02 13:29:55 +00:00
# else
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 1 ;
2023-09-02 13:29:55 +00:00
# endif
2023-08-27 20:27:49 +00:00
}
2023-08-19 17:14:28 +00:00
}
2023-08-22 11:57:47 +00:00
if ( gHasThreadLocalTimeout )
{
auto uCount = tlsSpinCountLocal ;
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-04 22:03:08 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-04 22:03:08 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - - ;
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
}
2023-08-19 17:14:28 +00:00
}
2023-03-12 15:27:28 +00:00
return callback ( ) ;
}
template < typename T >
bool auline DoTryIf ( T callback )
{
2023-09-09 16:37:14 +00:00
if ( ThrdCfg : : gPlatformIsSMPProcessorOptimized )
2023-03-12 15:27:28 +00:00
{
return YieldToSharedCore ( gRuntimeConfig . threadingConfig . uSpinLoopPowerA , callback ) ;
}
else
{
return callback ( ) ;
}
}
}