diff --git a/Source/IO/Loop/LSLocalEvent.cpp b/Source/IO/Loop/LSLocalEvent.cpp index 2b150484..8ae2c9b6 100644 --- a/Source/IO/Loop/LSLocalEvent.cpp +++ b/Source/IO/Loop/LSLocalEvent.cpp @@ -218,10 +218,10 @@ namespace Aurora::IO::Loop bool LSLocalEvent::TryTakeSpin() { - return Threading::Primitives::DoTryIf([&] + return Threading::Primitives::DoTryIfAlderLake([&] { return this->TryTakeNoSpin(); - }); + }, &this->state_); } bool LSLocalEvent::IsSignaledNoSpinIfUserland() diff --git a/Source/IO/Loop/LSLocalMutex.cpp b/Source/IO/Loop/LSLocalMutex.cpp index e585db28..f8c30694 100644 --- a/Source/IO/Loop/LSLocalMutex.cpp +++ b/Source/IO/Loop/LSLocalMutex.cpp @@ -108,10 +108,10 @@ namespace Aurora::IO::Loop bool LSLocalMutex::TryTakeSpin() { - return Threading::Primitives::DoTryIf([&] + return Threading::Primitives::DoTryIfAlderLake([&] { return this->TryTakeNoSpin(); - }); + }, &this->uAtomicWord); } bool LSLocalMutex::TryTake() diff --git a/Source/IO/Loop/LSLocalSemaphore.cpp b/Source/IO/Loop/LSLocalSemaphore.cpp index fad5b932..968cfaa0 100644 --- a/Source/IO/Loop/LSLocalSemaphore.cpp +++ b/Source/IO/Loop/LSLocalSemaphore.cpp @@ -150,10 +150,10 @@ namespace Aurora::IO::Loop bool LSLocalSemaphore::TryTakeSpin() { - return Threading::Primitives::DoTryIf([&] + return Threading::Primitives::DoTryIfAlderLake([&] { return this->TryTakeNoSpin(); - }); + }, &this->uAtomicSemaphore); } bool LSLocalSemaphore::TryTake() diff --git a/Source/Threading/AuWakeOnAddress.cpp b/Source/Threading/AuWakeOnAddress.cpp index dab10d0b..ddcfecf2 100644 --- a/Source/Threading/AuWakeOnAddress.cpp +++ b/Source/Threading/AuWakeOnAddress.cpp @@ -58,20 +58,20 @@ namespace Aurora::Threading { if (gShouldSpinOnlyInCPU == 0) { - while (!Primitives::DoTryIf([&]() + while (!Primitives::DoTryIfAlderLake([&]() { return AuAtomicTestAndSet(uPointer, 0) == 0; - })) + }, uPointer)) { } } else if (gShouldSpinOnlyInCPU == 1) { - while (!Primitives::DoTryIf([&]() + while (!Primitives::DoTryIfAlderLake([&]() { return AuAtomicTestAndSet(uPointer, 0) == 0; - })) + }, uPointer)) { ContextYield(); } @@ -920,10 +920,10 @@ namespace Aurora::Threading { uMS = AuNSToMS(uAbsTimeSteadyClock - uNow); - if (Primitives::DoTryIf([&]() + if (Primitives::DoTryIfAlderLake([&]() { return !WaitBuffer::Compare2(pTargetAddress, uWordSize, pCompareAddress); - })) + }, pTargetAddress)) { // hit it within the span of 1 << SpinLoopPowerA SMT stalls return true; @@ -1175,10 +1175,10 @@ namespace Aurora::Threading const void *pCompareAddress, AuUInt8 uWordSize) { - return Primitives::DoTryIf([&]() + return Primitives::DoTryIfAlderLake([&]() { return !WaitBuffer::Compare2(pTargetAddress, uWordSize, pCompareAddress); - }); + }, pTargetAddress); } WOAFASTPUB bool TryWaitOnAddress(const void *pTargetAddress, @@ -1208,7 +1208,7 @@ namespace Aurora::Threading return TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize); } - return Primitives::DoTryIf([&]() + return Primitives::DoTryIfAlderLake([&]() { if (WaitBuffer::Compare2(pTargetAddress, uWordSize, pCompareAddress)) { @@ -1216,7 +1216,7 @@ namespace Aurora::Threading } return check(pTargetAddress, pCompareAddress, uWordSize); - }); + }, pTargetAddress); } template @@ -1225,7 +1225,7 @@ namespace Aurora::Threading AuUInt8 uWordSize, const AuFunction &check) { - return Primitives::DoTryIf([&]() + return Primitives::DoTryIfAlderLake([&]() { if (WaitBuffer::Compare2(pTargetAddress, uWordSize, pCompareAddress)) { @@ -1233,7 +1233,7 @@ namespace Aurora::Threading } return check(pTargetAddress, pCompareAddress, uWordSize); - }); + }, pTargetAddress); } WOAFASTPUB bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod, diff --git a/Source/Threading/Primitives/AuConditionMutex.Generic.cpp b/Source/Threading/Primitives/AuConditionMutex.Generic.cpp index e542e68f..60bac372 100644 --- a/Source/Threading/Primitives/AuConditionMutex.Generic.cpp +++ b/Source/Threading/Primitives/AuConditionMutex.Generic.cpp @@ -79,10 +79,10 @@ namespace Aurora::Threading::Primitives bool GenericConditionMutex::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->uState_); } bool GenericConditionMutex::LockAbsNS(AuUInt64 uEndTime) diff --git a/Source/Threading/Primitives/AuConditionMutex.Linux.cpp b/Source/Threading/Primitives/AuConditionMutex.Linux.cpp index d122f4ad..afe7391e 100644 --- a/Source/Threading/Primitives/AuConditionMutex.Linux.cpp +++ b/Source/Threading/Primitives/AuConditionMutex.Linux.cpp @@ -134,10 +134,10 @@ namespace Aurora::Threading::Primitives bool LinuxConditionMutex::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return TryLockNoSpin(); - }); + }, &this->uState_); } void LinuxConditionMutex::Lock() diff --git a/Source/Threading/Primitives/AuConditionMutex.NT.cpp b/Source/Threading/Primitives/AuConditionMutex.NT.cpp index 23b68393..22009410 100644 --- a/Source/Threading/Primitives/AuConditionMutex.NT.cpp +++ b/Source/Threading/Primitives/AuConditionMutex.NT.cpp @@ -46,10 +46,10 @@ namespace Aurora::Threading::Primitives bool Win32ConditionMutex::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->lock_); } bool Win32ConditionMutex::TryLockNoSpin() diff --git a/Source/Threading/Primitives/AuConditionVariable.Generic.cpp b/Source/Threading/Primitives/AuConditionVariable.Generic.cpp index c67bf4ae..1495bc3d 100644 --- a/Source/Threading/Primitives/AuConditionVariable.Generic.cpp +++ b/Source/Threading/Primitives/AuConditionVariable.Generic.cpp @@ -162,10 +162,10 @@ namespace Aurora::Threading::Primitives return this->TryTakeOneNoSpin(); } - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryTakeOneNoSpin(); - }); + }, &this->uState_); } AUKN_SYM IConditionVariable *ConditionVariableNew(const AuSPtr &pMutex) diff --git a/Source/Threading/Primitives/AuConditionVariable.Linux.cpp b/Source/Threading/Primitives/AuConditionVariable.Linux.cpp index d270d098..ba67869b 100644 --- a/Source/Threading/Primitives/AuConditionVariable.Linux.cpp +++ b/Source/Threading/Primitives/AuConditionVariable.Linux.cpp @@ -32,15 +32,20 @@ namespace Aurora::Threading::Primitives bool ConditionVariableLinux::TryTakeOneSpin() { - if (ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin) + if (this->TryTakeOneNoSpin()) { - return this->TryTakeOneNoSpin(); + return true; } - return DoTryIf([=]() + if (ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin) + { + return false; + } + + return DoTryIfAlderLake([=]() { return this->TryTakeOneNoSpin(); - }); + }, &this->uState_); } bool ConditionVariableLinux::WaitOne(AuUInt64 qwTimeoutRelative, diff --git a/Source/Threading/Primitives/AuConditionVariable.NT.cpp b/Source/Threading/Primitives/AuConditionVariable.NT.cpp index 5a9901c1..652886b9 100644 --- a/Source/Threading/Primitives/AuConditionVariable.NT.cpp +++ b/Source/Threading/Primitives/AuConditionVariable.NT.cpp @@ -343,10 +343,10 @@ namespace Aurora::Threading::Primitives #if defined(AURORA_FORCE_SRW_LOCKS) return false; #else - return DoTryIf([&]() + return DoTryIfAlderLake([&]() { return this->CheckOutNoSpin(); - }); + }, &this->signalCount); #endif } diff --git a/Source/Threading/Primitives/AuMutex.Generic.cpp b/Source/Threading/Primitives/AuMutex.Generic.cpp index ce00231a..c544bd90 100644 --- a/Source/Threading/Primitives/AuMutex.Generic.cpp +++ b/Source/Threading/Primitives/AuMutex.Generic.cpp @@ -53,10 +53,10 @@ namespace Aurora::Threading::Primitives bool MutexGenericImpl::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->state_); } bool MutexGenericImpl::LockMS(AuUInt64 uTimeout) diff --git a/Source/Threading/Primitives/AuMutex.Linux.cpp b/Source/Threading/Primitives/AuMutex.Linux.cpp index 395d9a11..2fcdc2e5 100755 --- a/Source/Threading/Primitives/AuMutex.Linux.cpp +++ b/Source/Threading/Primitives/AuMutex.Linux.cpp @@ -36,14 +36,17 @@ namespace Aurora::Threading::Primitives bool MutexImpl::TryLock() { - if (ThrdCfg::gPreferLinuxMutexSpinTryLock) + if (this->TryLockNoSpin()) { - return this->TryLockHeavy(); + return true; } - else + + if (!ThrdCfg::gPreferLinuxMutexSpinTryLock) { - return this->TryLockNoSpin(); + return false; } + + return this->TryLockHeavy(); } bool MutexImpl::TryLockNoSpin() @@ -53,10 +56,10 @@ namespace Aurora::Threading::Primitives bool MutexImpl::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->state_); } bool MutexImpl::LockMS(AuUInt64 uTimeout) diff --git a/Source/Threading/Primitives/AuMutex.NT.cpp b/Source/Threading/Primitives/AuMutex.NT.cpp index 5ab962a2..ee2f0b04 100644 --- a/Source/Threading/Primitives/AuMutex.NT.cpp +++ b/Source/Threading/Primitives/AuMutex.NT.cpp @@ -46,10 +46,10 @@ namespace Aurora::Threading::Primitives bool MutexImpl::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->state_); } bool MutexImpl::TryLock() diff --git a/Source/Threading/Primitives/AuRWLock.cpp b/Source/Threading/Primitives/AuRWLock.cpp index 3d1329af..61f64dfd 100644 --- a/Source/Threading/Primitives/AuRWLock.cpp +++ b/Source/Threading/Primitives/AuRWLock.cpp @@ -314,10 +314,10 @@ namespace Aurora::Threading::Primitives if (gUseFutexRWLock) { - if (DoTryIf([=]() + if (DoTryIfAlderLake([=]() { return this->TryLockWriteNoSpin(); - })) + }, &this->iState_)) { return true; } @@ -615,10 +615,10 @@ namespace Aurora::Threading::Primitives if (ThrdCfg::gPreferRWLockReadLockSpin && AuAtomicLoad(&this->dwWritersPending_) == 0) { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockReadNoSpin(); - }); + }, &this->iState_); } return false; diff --git a/Source/Threading/Primitives/AuSemaphore.Generic.cpp b/Source/Threading/Primitives/AuSemaphore.Generic.cpp index ed0bc25d..25efc4a1 100644 --- a/Source/Threading/Primitives/AuSemaphore.Generic.cpp +++ b/Source/Threading/Primitives/AuSemaphore.Generic.cpp @@ -32,10 +32,10 @@ namespace Aurora::Threading::Primitives bool SemaphoreGeneric::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->uAtomicState); } bool SemaphoreGeneric::TryLock() diff --git a/Source/Threading/Primitives/AuSemaphore.Linux.cpp b/Source/Threading/Primitives/AuSemaphore.Linux.cpp index e6c2e946..9832423e 100644 --- a/Source/Threading/Primitives/AuSemaphore.Linux.cpp +++ b/Source/Threading/Primitives/AuSemaphore.Linux.cpp @@ -52,10 +52,10 @@ namespace Aurora::Threading::Primitives bool SemaphoreImpl::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->dwState_); } bool SemaphoreImpl::LockMS(AuUInt64 uTimeout) diff --git a/Source/Threading/Primitives/AuSemaphore.NT.cpp b/Source/Threading/Primitives/AuSemaphore.NT.cpp index e3c1e7d7..da7ecaf6 100644 --- a/Source/Threading/Primitives/AuSemaphore.NT.cpp +++ b/Source/Threading/Primitives/AuSemaphore.NT.cpp @@ -50,10 +50,10 @@ namespace Aurora::Threading::Primitives bool SemaphoreImpl::TryLockHeavy() { - return DoTryIf([=]() + return DoTryIfAlderLake([=]() { return this->TryLockNoSpin(); - }); + }, &this->dwState_); } bool SemaphoreImpl::TryLock() diff --git a/Source/Threading/Primitives/SMTYield.cpp b/Source/Threading/Primitives/SMTYield.cpp index 24dc3cae..c5041689 100644 --- a/Source/Threading/Primitives/SMTYield.cpp +++ b/Source/Threading/Primitives/SMTYield.cpp @@ -99,6 +99,13 @@ namespace Aurora::Threading::Primitives return; } + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) + { + auto cpuId = AuHwInfo::cpuid(7); + ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1; + } + #endif + if (!ThrdCfg::gForceEnableAdaptiveSpin) { gSpinAdaptiveThreshold = 0; diff --git a/Source/Threading/Primitives/SMTYield.hpp b/Source/Threading/Primitives/SMTYield.hpp index b7891aef..4611312f 100644 --- a/Source/Threading/Primitives/SMTYield.hpp +++ b/Source/Threading/Primitives/SMTYield.hpp @@ -84,6 +84,7 @@ namespace Aurora::Threading::Primitives inline bool gPreferUnixPrimitivesNoSpin {}; inline bool gAlwaysRWLockWriteBiasOnReadLock {}; inline bool gEnableRWLockWriteBiasOnReadLock {}; + inline AuUInt32 gIsIntelAlderLakeOrGreater {}; inline AuUInt8 gCountOfPCores {}; } @@ -379,6 +380,226 @@ namespace Aurora::Threading::Primitives return callback(); } + + template + bool auline YieldToSharedCoreAlderLake(long spin, T callback, const void *pWord) + { + if (callback()) + { + return true; + } + + #if defined(AURORA_ARCH_ARM) + AuUInt64 uClockFreq { ArmQueryClockFrequency() }; + #endif + + if (kEnableSmartScheduling) + { + bool bRet { false }; + auto uWord = SMTGetAPICNumber(); + if (uWord < AuArraySize(gCoreTable) && + uWord < ThrdCfg::gCountOfPCores) + { + AuAtomicStore(&gCoreTable[uWord], 1u); + + auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u); + if (uNow <= gSpinAdaptiveThreshold) + { + auto uCount = spin; + bool bSMTProbablyHit {}; + + if (AuAtomicLoad(&gCoreTable[uWord ^ 1])) + { + uCount /= 5; + bSMTProbablyHit = true; + } + else if (gHasThreadLocalTimeout) + { + uCount += tlsSpinCountLocal; + } + + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) + if (ThrdCfg::gIsIntelAlderLakeOrGreater) + { + _umonitor((void *)AuPageRound(AuUInt(pWord), AuHWInfo::GetCPUInfo().dwCacheLine)); + + if (callback()) + { + bRet = true; + } + else + { + _umwait(/*0*/ /*1*/ bSMTProbablyHit ? 1 : 0, __rdtsc() + uCount); + bRet = callback(); + } + } + else + #endif + { + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT) + auto perfCounter = __rdtsc() + uCount; + while (__rdtsc() < perfCounter) + #else + while (uCount > 0) + #endif + { + if (callback()) + { + bRet = true; + break; + } + else + { + SMPPause(); + uCount--; + } + } + } + } + AuAtomicStore(&gCoreTable[uWord], 0u); + AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); + } + + return bRet; + } + else if (gSpinAdaptiveThreshold) + { + auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u); + + if (uNow <= gSpinAdaptiveThreshold) + { + auto uCount = spin; + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT) + auto perfCounter = __rdtsc() + uCount; + while (__rdtsc() < perfCounter) + #else + while (uCount > 0) + #endif + { + if (callback()) + { + AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); + return true; + } + else + { + #if defined(SPIN_FOUR) && SPIN_FOUR == 1 + SMPPause(); + SMPPause(); + SMPPause(); + SMPPause(); + uCount -= 4; + #else + SMPPause(); + uCount -= 1; + #endif + } + } + + if (gHasThreadLocalTimeout) + { + auto uCount = tlsSpinCountLocal; + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) + auto perfCounter = __rdtsc() + uCount; + while (__rdtsc() < perfCounter) + #else + while (uCount > 0) + #endif + { + if (callback()) + { + AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); + return true; + } + else + { + SMPPause(); + uCount--; + } + } + } + + AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); + } + else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3)) + { + auto uCount = (spin) / 3; + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) + auto perfCounter = __rdtsc() + uCount; + while (__rdtsc() < perfCounter) + #else + while (uCount > 0) + #endif + { + if (callback()) + { + AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); + return true; + } + else + { + SMPPause(); + uCount--; + } + } + } + + AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u); + } + else + { + auto uCount = spin; + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT) + auto perfCounter = __rdtsc() + uCount; + while (__rdtsc() < perfCounter) + #else + while (uCount > 0) + #endif + { + if (callback()) + { + return true; + } + else + { + #if defined(SPIN_FOUR) && SPIN_FOUR == 1 + SMPPause(); + SMPPause(); + SMPPause(); + SMPPause(); + uCount -= 4; + #else + SMPPause(); + uCount -= 1; + #endif + } + } + + if (gHasThreadLocalTimeout) + { + auto uCount = tlsSpinCountLocal; + #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT) + auto perfCounter = __rdtsc() + uCount; + while (__rdtsc() < perfCounter) + #else + while (uCount > 0) + #endif + { + if (callback()) + { + return true; + } + else + { + SMPPause(); + uCount--; + } + } + } + } + + return callback(); + } template bool auline DoTryIf(T callback) @@ -392,4 +613,30 @@ namespace Aurora::Threading::Primitives return callback(); } } + + template + bool auline DoTryIfAlderLake(T callback, const void *pWord) + { + if (ThrdCfg::gPlatformIsSMPProcessorOptimized) + { + return YieldToSharedCoreAlderLake(gRuntimeConfig.threadingConfig.uSpinLoopPowerA, callback, pWord); + } + else + { + return callback(); + } + } + + template + bool auline DoTryIfAlderLake(T callback, const volatile void *pWord) + { + if (ThrdCfg::gPlatformIsSMPProcessorOptimized) + { + return YieldToSharedCoreAlderLake(gRuntimeConfig.threadingConfig.uSpinLoopPowerA, callback, (const void *)pWord); + } + else + { + return callback(); + } + } } \ No newline at end of file