2023-09-09 16:37:14 +00:00
/***
2024-01-29 14:09:59 +00:00
Copyright ( C ) 2023 - 2024 J Reece Wilson ( a / k / a " Reece " ) . All rights reserved .
2023-03-12 15:27:28 +00:00
2023-03-21 03:18:09 +00:00
File : SMTYield . hpp
2023-03-12 15:27:28 +00:00
Date : 2023 - 3 - 12
Author : Reece
* * */
# pragma once
2024-05-10 21:17:20 +00:00
// Whatever, i'll use this header to blead these flags in.
// It's the easiest way to get at all of the translation units compiling thread primitives.
// ...not required
# if defined(AURORA_COMPILER_MSVC)
# pragma strict_gs_check(off)
# pragma check_stack(off)
# endif
// dumbshit compiler is emitting stack checks under a non-zero amount of my thread primitives.
// "dumbshit compiler" doesnt quite do it justice when it believe a fucking spinlock-lock with an atomic bit test and set is worth a stack check.
2024-05-13 22:15:52 +00:00
# if defined(AURORA_COMPILER_CLANG) || defined(AURORA_COMPILER_GCC)
# pragma GCC optimize("no-stack-protector")
2024-05-10 21:17:20 +00:00
# endif
2024-05-07 13:41:16 +00:00
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) && \
! defined ( AURORA_COMPILER_MSVC ) & & \
! defined ( AURORA_COMPILER_INTEL ) & & \
! defined ( AURORA_A_GOOD_COMPILER_PLS )
// Even if clang (and gcc) has these intrins available, you must enable them globally, unlike see for some fucking reason.
// I mean, we can do runtime branching around SSE4 paths no problem. Why all of a sudden am i being gated out of the intrins im electing to use by hand?
// No, you (the compiler) may not use these in your baseline feature set (or incl in stl locks). Yes, i still want them. Now fuck off.
// If these end up being wrong, blame clang and gnu for being cunts, not me.
static auline void __mm_monitorx ( void * __p , unsigned __extensions , unsigned __hints )
{
asm volatile ( " .byte 0x0f, 0x01, 0xfa; " :
: " a " ( __p ) ,
" c " ( __extensions ) ,
" d " ( __hints ) ) ;
}
static auline void __mm_mwaitx ( unsigned __extensions , unsigned __hints , unsigned __clock )
{
asm volatile ( " .byte 0x0f, 0x01, 0xfb; " :
: " a " ( __hints ) ,
" b " ( __clock ) ,
" c " ( __extensions ) ) ;
}
static auline void __umonitor ( void * __address )
{
__asm__ volatile ( " .byte 0xF3, 0x0F, 0xAE, 0x01; " :
: " a " ( __address )
: ) ;
}
static auline unsigned char __umwait ( unsigned int __control , unsigned long long __counter )
{
AuUInt32 uTimeHi = AuUInt32 ( __counter > > 32 ) ;
AuUInt32 uTimeLo = AuUInt32 ( __counter & 0xffffffff ) ;
char flag ;
__asm__ volatile ( " .byte 0xF2, 0x0F, 0xAE, 0xF1 \n "
" setb %0 "
: " =r " ( flag )
: " a " ( uTimeLo ) ,
" d " ( uTimeHi ) ,
" c " ( __control )
: ) ;
return flag ;
}
static auline unsigned char __tpause ( unsigned int __control , unsigned long long __counter )
{
AuUInt32 uTimeHi = AuUInt32 ( __counter > > 32 ) ;
AuUInt32 uTimeLo = AuUInt32 ( __counter & 0xffffffff ) ;
char flag ;
__asm__ volatile ( " .byte 0x66, 0x0F, 0xAE, 0xF1 \n "
" setb %0 "
: " =r " ( flag )
[*] As I said, blame clang and gcc devs for being retarded cunts.
// Even if clang (and gcc) has these intrins available, you must enable them globally, unlike see for some fucking reason.
// I mean, we can do runtime branching around SSE4 paths no problem. Why all of a sudden am i being gated out of the intrins im electing to use by hand?
// No, you (the compiler) may not use these in your baseline feature set (or incl in stl locks). Yes, i still want them. Now fuck off.
// If these end up being wrong, blame clang and gnu for being cunts, not me.
No, I will not raise our requirements above ivybridge; no, I will not expose feature macros to the STL (et al) that boosts our requirements to modern intelaviv slop and amd atomic ackers
2024-08-19 07:02:35 +00:00
: " a " ( uTimeLo ) ,
2024-05-07 13:41:16 +00:00
" d " ( uTimeHi ) ,
" c " ( __control )
: ) ;
return flag ;
}
# define _mm_monitorx __mm_monitorx
# define _mm_mwaitx __mm_mwaitx
# define _umonitor __umonitor
# define _umwait __umwait
# define _tpause __tpause
# endif
2023-08-19 17:14:28 +00:00
namespace Aurora : : Threading
{
inline AuUInt32 gHasThreadLocalTimeout { } ;
2023-09-06 15:24:43 +00:00
inline thread_local AuUInt32 tlsSpinCountLocal { } ;
2023-08-19 17:14:28 +00:00
}
2024-01-02 02:49:23 +00:00
extern " C "
{
2024-01-07 02:26:34 +00:00
AuUInt32 SMTGetAPICNumber ( void ) ;
2024-01-02 02:49:23 +00:00
}
# include <Source/Extensions/Clocks.aarch64.hpp>
2023-09-02 13:29:55 +00:00
# define SPIN_FOUR 1
2024-04-13 21:49:05 +00:00
# define while_bc___(exp, ex) \
AuUInt32 __wsc # # ex { } ; \
while ( ( exp ) & & ( ( __wsc # # ex + + ) < 2 ) )
# define while_bc__(exp, ex) while_bc___(exp, ex)
# define while_bc_(exp, ex) while_bc__(exp, AU_WHAT(ex))
# if defined(AURORA_RUNTIME_ALWAYS_SPIN_ON_BROADCAST)
# define while_bc(exp) while (exp)
# elif defined(AURORA_RUNTIME_ALWAYS_CHECK_ONCE_ON_BROADCAST)
# define while_bc(exp) if (exp)
# elif defined(__COUNTER__)
# define while_bc(exp) while_bc_(exp, __COUNTER__)
# else
# define while_bc(exp) while_bc_(exp, __LINE__)
# endif
// Replace condition variable broadcasts: `if/while (waiters) { signal() }` loops with while_bc.
// while_bc will attempt to rebroadcast if another sleeper turns up. On unicore systems,
// depending on the scheduler, spinning in this fashion may result in a deadlock in a tight
// enough wait loop. In other systems, having a `while (waiters) { signal(); }` may help
// improve performance when other threads are tying up the system scheduler on the wake side.
// That way some threads can be late in, and we dont have to worry so much about there being a
// wake-up spin lock under real world use cases. To strike a balance between the two conditions,
// we add a little bit of extra branching overhead to ensure we don't spin more than 2-3 times.
2023-03-12 15:27:28 +00:00
namespace Aurora : : Threading : : Primitives
{
2023-09-09 16:37:14 +00:00
namespace ThrdCfg
{
inline bool gPlatformIsSMPProcessorOptimized { } ; // to include or not to include 🤔
inline bool gEnableAggressiveScheduling { } ;
inline bool gEnableAgrSchedulingRatelimit { } ;
inline bool gPreferNtCondvarModernWinSpin { } ;
inline bool gPreferNtCondvarOlderWinSpin { } ;
inline bool gPreferNtSemaphoreSpinTryLock { } ;
inline bool gPreferNtMutexSpinTryLock { } ;
inline bool gPreferNtCondMutexSpinTryLock { } ;
inline bool gPreferLinuxSemaphoreSpinTryLock { } ;
inline bool gPreferLinuxMutexSpinTryLock { } ;
inline bool gPreferLinuxCondMutexSpinTryLock { } ;
inline bool gPreferEmulatedWakeOnAddress { } ;
inline bool gPreferWaitOnAddressAlwaysSpin { } ;
2023-10-30 14:50:28 +00:00
inline bool gPreferWaitOnAddressAlwaysSpinNative { } ;
2023-09-09 16:37:14 +00:00
inline bool gPreferRWLockReadLockSpin { } ;
inline bool gUWPNanosecondEmulationCheckFirst { } ;
inline AuUInt32 gUWPNanosecondEmulationMaxYields { } ;
inline bool gForceEnableAdaptiveSpin { } ;
inline bool gPreferEnableAdaptiveSpin { } ;
inline bool gPreferLinuxAdaptiveSpin { } ;
inline bool gPreferOldWin32AdaptiveSpin { } ;
inline bool gPreferNewWin32AdaptiveSpin { } ;
inline AuUInt32 gAdaptiveSpinCUCnt0 { } ;
inline AuUInt32 gAdaptiveSpinCUCnt4 { } ;
inline AuUInt32 gAdaptiveSpinCUCnt8 { } ;
inline AuUInt32 gAdaptiveSpinCUCnt16 { } ;
inline bool gPreferFutexRWLock { } ;
2023-09-10 13:50:59 +00:00
inline bool gPreferFutexEvent { } ;
2023-09-09 16:37:14 +00:00
inline bool gWinXpThrough7BlazeOptimizerPower { } ;
inline bool gPreferLinuxPrimitivesFutexNoSpin { } ;
inline bool gPreferUnixPrimitivesNoSpin { } ;
inline bool gAlwaysRWLockWriteBiasOnReadLock { } ;
inline bool gEnableRWLockWriteBiasOnReadLock { } ;
2024-05-03 11:14:52 +00:00
inline AuUInt32 gIsIntelAlderLakeOrGreater { } ;
2024-05-05 18:42:10 +00:00
inline AuUInt32 gIsZen3OrGreater { } ;
2024-01-02 02:49:23 +00:00
inline AuUInt8 gCountOfPCores { } ;
2023-09-09 16:37:14 +00:00
}
2024-01-02 02:49:23 +00:00
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
using SMTAtomic_t = AuUInt8 ;
# else
using SMTAtomic_t = AuUInt32 ;
# endif
inline SMTAtomic_t gCoreTable [ 256 ] { } ;
2023-09-09 16:37:14 +00:00
2023-08-22 11:57:47 +00:00
inline AuUInt32 gSpinAdaptiveThreshold { } ;
inline AuUInt32 gSpinAdaptiveCurrentCount { } ;
2023-09-09 17:09:22 +00:00
inline AuUInt32 gSpinAdaptiveThreadCount { } ;
2023-08-22 11:57:47 +00:00
2023-08-23 13:52:47 +00:00
inline AuUInt32 gUseFutexRWLock { } ;
2023-09-10 13:50:59 +00:00
inline AuUInt32 gPreferFutexEvent { } ;
2023-08-23 13:52:47 +00:00
2024-05-06 21:04:26 +00:00
static constexpr AuUInt32 kMWAITXUseTSC = ( 1 < < 1 ) ;
static constexpr AuUInt32 kMWAITXAllowInterrupts = ( 1 < < 0 ) ;
static constexpr AuUInt32 kMWAITXFaultGP0 = ( 1 < < 31 ) ;
static constexpr AuUInt32 kMWAITXWaitOnStore = 0 ;
static constexpr AuUInt32 kMWAITXWaitOnStoreTimed = kMWAITXWaitOnStore | kMWAITXUseTSC ;
struct Blocky
{
SMTAtomic_t a ;
} ;
inline const AuAlignTo < 16 , Blocky > kMassiveBlock ;
2023-08-22 11:57:47 +00:00
void InitAdaptiveThreshold ( ) ;
void InitAdaptiveThresholdFirstTime ( ) ;
2023-09-09 16:37:14 +00:00
void InitCfg ( ) ;
2023-08-22 11:57:47 +00:00
2024-01-02 02:49:23 +00:00
static const bool kEnableSmartScheduling =
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
true ;
# else
// tbd by arch and os
false ;
# endif
2023-03-12 15:27:28 +00:00
static auline void SMPPause ( )
{
# if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
_mm_pause ( ) ;
# elif defined(AURORA_ARCH_ARM)
# if defined(AURORA_COMPILER_GCC)
asm volatile ( " yield " ) ;
# else
__yield ( ) ;
# endif
# else
// TODO: your platform here
AuThreading : : ContextYield ( ) ;
# endif
}
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_ARM)
static AuUInt64 ConvertArmTicks ( AuUInt64 uCounter , AuUInt64 uFreq )
{
if ( uFreq = = 10000000 )
{
return uCounter * 100ull ;
}
else if ( uFreq = = 1000000 )
{
return uCounter * 1000ull ;
}
else if ( uFreq = = 100000 )
{
return uCounter * 10000ull ;
}
else if ( uFreq = = 100000000ull )
{
return uCounter * 10ull ;
}
else if ( uFreq = = 1000000000ull )
{
return uCounter ;
}
else
{
const long long uWhole = ( uCounter / uFreq ) * 1'000'000'000ull ;
const long long uPart = ( uCounter % uFreq ) * 1'000'000'000ull / uFreq ;
return uWhole + uPart ;
}
}
static AuUInt64 RdtscArmEmulated ( AuUInt64 uClockFreq )
{
return ConvertArmTicks ( ArmQueryClockCounter ( ) , uClockFreq ) * 4 ;
// context:
// Intel recommends we spin, considering the potential for exponential back-offs later on, with a coefficient based on the CPUID brand of the processor.
// Under most processors, RDTSC is not that of the instruction counter. That'd be worthless; modern processors are ingesting hundreds of instructions to speculate on.
// Instead, RDTSC reads back a steady system-wide clock (*). It doesn't scale per core, nor can you overclock it.
// Back to Intels recommentation, instead of spamming your processes execution pipeline with mm_pauses in a loop, you should query RDTSC to solve the ABA problem and normalize for changes in the micro-architecture.
// This does allow Intel to decrease this potentially-NOP mm_pause sleep period by changing the stated base clock.
// On the aarch side of things, we should be able to match the exact Intel behaviour by:
// * Reading the system wide clock (CNTVCT_EL0)
// * Normalizing to nanoseconds with the given frequency (CNTFRQ_EL0)
// * Divide by approx "3.6 Ghz" ops/ns
// *: Ok, techincally you can/need to verify Invariant TSC: CPUID.80000007H:EDX[8], but who actually cares?
}
# define __rdtsc() RdtscArmEmulated(uClockFreq)
# define ALT_RDT
# endif
2023-03-12 15:27:28 +00:00
template < typename T >
bool auline YieldToSharedCore ( long spin , T callback )
{
if ( callback ( ) )
{
return true ;
}
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_ARM)
AuUInt64 uClockFreq { ArmQueryClockFrequency ( ) } ;
# endif
if ( kEnableSmartScheduling )
{
bool bRet { false } ;
auto uWord = SMTGetAPICNumber ( ) ;
if ( uWord < AuArraySize ( gCoreTable ) & &
uWord < ThrdCfg : : gCountOfPCores )
{
AuAtomicStore < SMTAtomic_t > ( & gCoreTable [ uWord ] , 1u ) ;
auto uNow = AuAtomicAdd ( & gSpinAdaptiveCurrentCount , 1u ) ;
2024-05-05 18:42:10 +00:00
if ( ! gSpinAdaptiveThreshold | | uNow < = gSpinAdaptiveThreshold )
2024-01-02 02:49:23 +00:00
{
auto uCount = spin ;
if ( AuAtomicLoad ( & gCoreTable [ uWord ^ 1 ] ) )
{
uCount / = 5 ;
}
else if ( gHasThreadLocalTimeout )
{
uCount + = tlsSpinCountLocal ;
}
2024-05-05 18:42:10 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
if ( ThrdCfg : : gIsIntelAlderLakeOrGreater )
2024-01-02 02:49:23 +00:00
{
if ( callback ( ) )
{
bRet = true ;
}
else
{
2024-05-06 21:04:26 +00:00
_tpause ( 0 , __rdtsc ( ) + uCount ) ;
bRet = callback ( ) ;
}
}
else if ( ThrdCfg : : gIsZen3OrGreater )
{
if ( callback ( ) )
{
bRet = true ;
}
else
{
2024-05-07 13:41:16 +00:00
_mm_monitorx ( ( void * ) & kMassiveBlock , 0U , 0U ) ;
2024-05-06 21:04:26 +00:00
_mm_mwaitx ( kMWAITXUseTSC , 0 , uCount ) ;
2024-05-05 18:42:10 +00:00
bRet = callback ( ) ;
}
}
else
# endif
{
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
bRet = true ;
break ;
}
else
{
SMPPause ( ) ;
uCount - - ;
}
2024-01-02 02:49:23 +00:00
}
}
}
AuAtomicStore < SMTAtomic_t > ( & gCoreTable [ uWord ] , 0u ) ;
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
2023-03-12 15:27:28 +00:00
2024-01-02 02:49:23 +00:00
return bRet ;
}
else if ( gSpinAdaptiveThreshold )
2023-03-12 15:27:28 +00:00
{
2023-08-22 11:57:47 +00:00
auto uNow = AuAtomicAdd ( & gSpinAdaptiveCurrentCount , 1u ) ;
if ( uNow < = gSpinAdaptiveThreshold )
{
2023-09-06 15:24:43 +00:00
auto uCount = spin ;
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-02 13:29:55 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-02 13:29:55 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
2023-09-02 13:29:55 +00:00
# if defined(SPIN_FOUR) && SPIN_FOUR == 1
2023-08-27 20:27:49 +00:00
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 4 ;
2023-09-02 13:29:55 +00:00
# else
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 1 ;
2023-09-02 13:29:55 +00:00
# endif
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
2023-03-12 15:27:28 +00:00
2023-08-22 11:57:47 +00:00
if ( gHasThreadLocalTimeout )
{
2023-09-06 15:24:43 +00:00
auto uCount = tlsSpinCountLocal ;
2024-05-05 18:42:10 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-04 22:03:08 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-04 22:03:08 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - - ;
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
}
2023-03-12 15:27:28 +00:00
2023-08-22 11:57:47 +00:00
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
2023-09-19 00:38:16 +00:00
else if ( uNow < = ( gSpinAdaptiveThreadCount / 4 * 3 ) )
2023-03-12 15:27:28 +00:00
{
2023-09-06 15:24:43 +00:00
auto uCount = ( spin ) / 3 ;
2024-05-05 18:42:10 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-02 13:29:55 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-02 13:29:55 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - - ;
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
2023-03-12 15:27:28 +00:00
}
2023-08-22 11:57:47 +00:00
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
else
2023-08-19 17:14:28 +00:00
{
2023-09-06 15:24:43 +00:00
auto uCount = spin ;
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-02 13:29:55 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-02 13:29:55 +00:00
# endif
2023-08-19 17:14:28 +00:00
{
if ( callback ( ) )
{
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
2023-09-02 13:29:55 +00:00
# if defined(SPIN_FOUR) && SPIN_FOUR == 1
2023-08-27 20:27:49 +00:00
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 4 ;
2023-09-02 13:29:55 +00:00
# else
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - = 1 ;
2023-09-02 13:29:55 +00:00
# endif
2023-08-27 20:27:49 +00:00
}
2023-08-19 17:14:28 +00:00
}
2023-08-22 11:57:47 +00:00
if ( gHasThreadLocalTimeout )
{
auto uCount = tlsSpinCountLocal ;
2024-01-02 02:49:23 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2023-09-06 15:24:43 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
2023-09-04 22:03:08 +00:00
while ( __rdtsc ( ) < perfCounter )
# else
2023-09-06 15:24:43 +00:00
while ( uCount > 0 )
2023-09-04 22:03:08 +00:00
# endif
2023-08-22 11:57:47 +00:00
{
if ( callback ( ) )
{
return true ;
}
2023-08-27 20:27:49 +00:00
else
{
SMPPause ( ) ;
2023-09-06 15:24:43 +00:00
uCount - - ;
2023-08-27 20:27:49 +00:00
}
2023-08-22 11:57:47 +00:00
}
}
2023-08-19 17:14:28 +00:00
}
2023-03-12 15:27:28 +00:00
return callback ( ) ;
}
2024-05-03 11:14:52 +00:00
template < typename T >
bool auline YieldToSharedCoreAlderLake ( long spin , T callback , const void * pWord )
{
if ( callback ( ) )
{
return true ;
}
# if defined(AURORA_ARCH_ARM)
AuUInt64 uClockFreq { ArmQueryClockFrequency ( ) } ;
# endif
if ( kEnableSmartScheduling )
{
bool bRet { false } ;
auto uWord = SMTGetAPICNumber ( ) ;
if ( uWord < AuArraySize ( gCoreTable ) & &
uWord < ThrdCfg : : gCountOfPCores )
{
AuAtomicStore < SMTAtomic_t > ( & gCoreTable [ uWord ] , 1u ) ;
auto uNow = AuAtomicAdd ( & gSpinAdaptiveCurrentCount , 1u ) ;
2024-05-05 18:42:10 +00:00
if ( ! gSpinAdaptiveThreshold | | uNow < = gSpinAdaptiveThreshold )
2024-05-03 11:14:52 +00:00
{
auto uCount = spin ;
bool bSMTProbablyHit { } ;
if ( AuAtomicLoad ( & gCoreTable [ uWord ^ 1 ] ) )
{
uCount / = 5 ;
bSMTProbablyHit = true ;
}
else if ( gHasThreadLocalTimeout )
{
uCount + = tlsSpinCountLocal ;
}
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
if ( ThrdCfg : : gIsIntelAlderLakeOrGreater )
{
_umonitor ( ( void * ) AuPageRound < AuUInt > ( AuUInt ( pWord ) , AuHWInfo : : GetCPUInfo ( ) . dwCacheLine ) ) ;
if ( callback ( ) )
{
bRet = true ;
}
else
{
2024-05-03 14:52:50 +00:00
_umwait ( /*0*/ /*1*/ bSMTProbablyHit ? 0 : 1 , __rdtsc ( ) + uCount ) ;
2024-05-03 11:14:52 +00:00
bRet = callback ( ) ;
}
}
2024-05-05 18:42:10 +00:00
else if ( ThrdCfg : : gIsZen3OrGreater )
{
2024-05-07 13:41:16 +00:00
_mm_monitorx ( ( void * ) pWord , 0U , 0U ) ;
2024-05-05 18:42:10 +00:00
if ( callback ( ) )
{
bRet = true ;
}
else
{
2024-05-06 21:04:26 +00:00
_mm_mwaitx ( kMWAITXWaitOnStoreTimed , 0 , uCount ) ;
2024-05-05 18:42:10 +00:00
bRet = callback ( ) ;
}
}
2024-05-03 11:14:52 +00:00
else
# endif
{
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
bRet = true ;
break ;
}
else
{
SMPPause ( ) ;
uCount - - ;
}
}
}
}
AuAtomicStore < SMTAtomic_t > ( & gCoreTable [ uWord ] , 0u ) ;
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
return bRet ;
}
else if ( gSpinAdaptiveThreshold )
{
auto uNow = AuAtomicAdd ( & gSpinAdaptiveCurrentCount , 1u ) ;
if ( uNow < = gSpinAdaptiveThreshold )
{
auto uCount = spin ;
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
else
{
# if defined(SPIN_FOUR) && SPIN_FOUR == 1
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
uCount - = 4 ;
# else
SMPPause ( ) ;
uCount - = 1 ;
# endif
}
}
if ( gHasThreadLocalTimeout )
{
auto uCount = tlsSpinCountLocal ;
2024-05-05 18:42:10 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2024-05-03 11:14:52 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
else
{
SMPPause ( ) ;
uCount - - ;
}
}
}
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
else if ( uNow < = ( gSpinAdaptiveThreadCount / 4 * 3 ) )
{
auto uCount = ( spin ) / 3 ;
2024-05-05 18:42:10 +00:00
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
2024-05-03 11:14:52 +00:00
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
return true ;
}
else
{
SMPPause ( ) ;
uCount - - ;
}
}
}
AuAtomicSub ( & gSpinAdaptiveCurrentCount , 1u ) ;
}
else
{
auto uCount = spin ;
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
return true ;
}
else
{
# if defined(SPIN_FOUR) && SPIN_FOUR == 1
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
SMPPause ( ) ;
uCount - = 4 ;
# else
SMPPause ( ) ;
uCount - = 1 ;
# endif
}
}
if ( gHasThreadLocalTimeout )
{
auto uCount = tlsSpinCountLocal ;
# if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc ( ) + uCount ;
while ( __rdtsc ( ) < perfCounter )
# else
while ( uCount > 0 )
# endif
{
if ( callback ( ) )
{
return true ;
}
else
{
SMPPause ( ) ;
uCount - - ;
}
}
}
}
return callback ( ) ;
}
2023-03-12 15:27:28 +00:00
template < typename T >
bool auline DoTryIf ( T callback )
{
2023-09-09 16:37:14 +00:00
if ( ThrdCfg : : gPlatformIsSMPProcessorOptimized )
2023-03-12 15:27:28 +00:00
{
return YieldToSharedCore ( gRuntimeConfig . threadingConfig . uSpinLoopPowerA , callback ) ;
}
else
{
return callback ( ) ;
}
}
2024-05-03 11:14:52 +00:00
template < typename T >
bool auline DoTryIfAlderLake ( T callback , const void * pWord )
{
if ( ThrdCfg : : gPlatformIsSMPProcessorOptimized )
{
return YieldToSharedCoreAlderLake ( gRuntimeConfig . threadingConfig . uSpinLoopPowerA , callback , pWord ) ;
}
else
{
return callback ( ) ;
}
}
template < typename T >
bool auline DoTryIfAlderLake ( T callback , const volatile void * pWord )
{
if ( ThrdCfg : : gPlatformIsSMPProcessorOptimized )
{
return YieldToSharedCoreAlderLake ( gRuntimeConfig . threadingConfig . uSpinLoopPowerA , callback , ( const void * ) pWord ) ;
}
else
{
return callback ( ) ;
}
}
2023-03-12 15:27:28 +00:00
}