[*] Optimize primitives SMTYield for Alderlake+ user-space, BIOS-ring mwait, and AARCH
This commit is contained in:
parent
a35c1f165a
commit
134816e128
@ -218,10 +218,10 @@ namespace Aurora::IO::Loop
|
||||
|
||||
bool LSLocalEvent::TryTakeSpin()
|
||||
{
|
||||
return Threading::Primitives::DoTryIf([&]
|
||||
return Threading::Primitives::DoTryIfAlderLake([&]
|
||||
{
|
||||
return this->TryTakeNoSpin();
|
||||
});
|
||||
}, &this->state_);
|
||||
}
|
||||
|
||||
bool LSLocalEvent::IsSignaledNoSpinIfUserland()
|
||||
|
@ -108,10 +108,10 @@ namespace Aurora::IO::Loop
|
||||
|
||||
bool LSLocalMutex::TryTakeSpin()
|
||||
{
|
||||
return Threading::Primitives::DoTryIf([&]
|
||||
return Threading::Primitives::DoTryIfAlderLake([&]
|
||||
{
|
||||
return this->TryTakeNoSpin();
|
||||
});
|
||||
}, &this->uAtomicWord);
|
||||
}
|
||||
|
||||
bool LSLocalMutex::TryTake()
|
||||
|
@ -150,10 +150,10 @@ namespace Aurora::IO::Loop
|
||||
|
||||
bool LSLocalSemaphore::TryTakeSpin()
|
||||
{
|
||||
return Threading::Primitives::DoTryIf([&]
|
||||
return Threading::Primitives::DoTryIfAlderLake([&]
|
||||
{
|
||||
return this->TryTakeNoSpin();
|
||||
});
|
||||
}, &this->uAtomicSemaphore);
|
||||
}
|
||||
|
||||
bool LSLocalSemaphore::TryTake()
|
||||
|
@ -58,20 +58,20 @@ namespace Aurora::Threading
|
||||
{
|
||||
if (gShouldSpinOnlyInCPU == 0)
|
||||
{
|
||||
while (!Primitives::DoTryIf([&]()
|
||||
while (!Primitives::DoTryIfAlderLake([&]()
|
||||
{
|
||||
return AuAtomicTestAndSet(uPointer, 0) == 0;
|
||||
}))
|
||||
}, uPointer))
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
else if (gShouldSpinOnlyInCPU == 1)
|
||||
{
|
||||
while (!Primitives::DoTryIf([&]()
|
||||
while (!Primitives::DoTryIfAlderLake([&]()
|
||||
{
|
||||
return AuAtomicTestAndSet(uPointer, 0) == 0;
|
||||
}))
|
||||
}, uPointer))
|
||||
{
|
||||
ContextYield();
|
||||
}
|
||||
@ -920,10 +920,10 @@ namespace Aurora::Threading
|
||||
{
|
||||
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
|
||||
|
||||
if (Primitives::DoTryIf([&]()
|
||||
if (Primitives::DoTryIfAlderLake([&]()
|
||||
{
|
||||
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
|
||||
}))
|
||||
}, pTargetAddress))
|
||||
{
|
||||
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
|
||||
return true;
|
||||
@ -1175,10 +1175,10 @@ namespace Aurora::Threading
|
||||
const void *pCompareAddress,
|
||||
AuUInt8 uWordSize)
|
||||
{
|
||||
return Primitives::DoTryIf([&]()
|
||||
return Primitives::DoTryIfAlderLake([&]()
|
||||
{
|
||||
return !WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress);
|
||||
});
|
||||
}, pTargetAddress);
|
||||
}
|
||||
|
||||
WOAFASTPUB bool TryWaitOnAddress(const void *pTargetAddress,
|
||||
@ -1208,7 +1208,7 @@ namespace Aurora::Threading
|
||||
return TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize);
|
||||
}
|
||||
|
||||
return Primitives::DoTryIf([&]()
|
||||
return Primitives::DoTryIfAlderLake([&]()
|
||||
{
|
||||
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
|
||||
{
|
||||
@ -1216,7 +1216,7 @@ namespace Aurora::Threading
|
||||
}
|
||||
|
||||
return check(pTargetAddress, pCompareAddress, uWordSize);
|
||||
});
|
||||
}, pTargetAddress);
|
||||
}
|
||||
|
||||
template <EWaitMethod T>
|
||||
@ -1225,7 +1225,7 @@ namespace Aurora::Threading
|
||||
AuUInt8 uWordSize,
|
||||
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
|
||||
{
|
||||
return Primitives::DoTryIf([&]()
|
||||
return Primitives::DoTryIfAlderLake([&]()
|
||||
{
|
||||
if (WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress))
|
||||
{
|
||||
@ -1233,7 +1233,7 @@ namespace Aurora::Threading
|
||||
}
|
||||
|
||||
return check(pTargetAddress, pCompareAddress, uWordSize);
|
||||
});
|
||||
}, pTargetAddress);
|
||||
}
|
||||
|
||||
WOAFASTPUB bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
|
||||
|
@ -79,10 +79,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool GenericConditionMutex::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->uState_);
|
||||
}
|
||||
|
||||
bool GenericConditionMutex::LockAbsNS(AuUInt64 uEndTime)
|
||||
|
@ -134,10 +134,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool LinuxConditionMutex::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return TryLockNoSpin();
|
||||
});
|
||||
}, &this->uState_);
|
||||
}
|
||||
|
||||
void LinuxConditionMutex::Lock()
|
||||
|
@ -46,10 +46,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool Win32ConditionMutex::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->lock_);
|
||||
}
|
||||
|
||||
bool Win32ConditionMutex::TryLockNoSpin()
|
||||
|
@ -162,10 +162,10 @@ namespace Aurora::Threading::Primitives
|
||||
return this->TryTakeOneNoSpin();
|
||||
}
|
||||
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryTakeOneNoSpin();
|
||||
});
|
||||
}, &this->uState_);
|
||||
}
|
||||
|
||||
AUKN_SYM IConditionVariable *ConditionVariableNew(const AuSPtr<IConditionMutex> &pMutex)
|
||||
|
@ -32,15 +32,20 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool ConditionVariableLinux::TryTakeOneSpin()
|
||||
{
|
||||
if (ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin)
|
||||
if (this->TryTakeOneNoSpin())
|
||||
{
|
||||
return this->TryTakeOneNoSpin();
|
||||
return true;
|
||||
}
|
||||
|
||||
return DoTryIf([=]()
|
||||
if (ThrdCfg::gPreferLinuxPrimitivesFutexNoSpin)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryTakeOneNoSpin();
|
||||
});
|
||||
}, &this->uState_);
|
||||
}
|
||||
|
||||
bool ConditionVariableLinux::WaitOne(AuUInt64 qwTimeoutRelative,
|
||||
|
@ -343,10 +343,10 @@ namespace Aurora::Threading::Primitives
|
||||
#if defined(AURORA_FORCE_SRW_LOCKS)
|
||||
return false;
|
||||
#else
|
||||
return DoTryIf([&]()
|
||||
return DoTryIfAlderLake([&]()
|
||||
{
|
||||
return this->CheckOutNoSpin();
|
||||
});
|
||||
}, &this->signalCount);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -53,10 +53,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool MutexGenericImpl::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->state_);
|
||||
}
|
||||
|
||||
bool MutexGenericImpl::LockMS(AuUInt64 uTimeout)
|
||||
|
@ -36,14 +36,17 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool MutexImpl::TryLock()
|
||||
{
|
||||
if (ThrdCfg::gPreferLinuxMutexSpinTryLock)
|
||||
if (this->TryLockNoSpin())
|
||||
{
|
||||
return this->TryLockHeavy();
|
||||
return true;
|
||||
}
|
||||
else
|
||||
|
||||
if (!ThrdCfg::gPreferLinuxMutexSpinTryLock)
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
return false;
|
||||
}
|
||||
|
||||
return this->TryLockHeavy();
|
||||
}
|
||||
|
||||
bool MutexImpl::TryLockNoSpin()
|
||||
@ -53,10 +56,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool MutexImpl::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->state_);
|
||||
}
|
||||
|
||||
bool MutexImpl::LockMS(AuUInt64 uTimeout)
|
||||
|
@ -46,10 +46,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool MutexImpl::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->state_);
|
||||
}
|
||||
|
||||
bool MutexImpl::TryLock()
|
||||
|
@ -314,10 +314,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
if (gUseFutexRWLock)
|
||||
{
|
||||
if (DoTryIf([=]()
|
||||
if (DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockWriteNoSpin();
|
||||
}))
|
||||
}, &this->iState_))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@ -615,10 +615,10 @@ namespace Aurora::Threading::Primitives
|
||||
if (ThrdCfg::gPreferRWLockReadLockSpin &&
|
||||
AuAtomicLoad(&this->dwWritersPending_) == 0)
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockReadNoSpin<true>();
|
||||
});
|
||||
}, &this->iState_);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -32,10 +32,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool SemaphoreGeneric::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->uAtomicState);
|
||||
}
|
||||
|
||||
bool SemaphoreGeneric::TryLock()
|
||||
|
@ -52,10 +52,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool SemaphoreImpl::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->dwState_);
|
||||
}
|
||||
|
||||
bool SemaphoreImpl::LockMS(AuUInt64 uTimeout)
|
||||
|
@ -50,10 +50,10 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
bool SemaphoreImpl::TryLockHeavy()
|
||||
{
|
||||
return DoTryIf([=]()
|
||||
return DoTryIfAlderLake([=]()
|
||||
{
|
||||
return this->TryLockNoSpin();
|
||||
});
|
||||
}, &this->dwState_);
|
||||
}
|
||||
|
||||
bool SemaphoreImpl::TryLock()
|
||||
|
@ -99,6 +99,13 @@ namespace Aurora::Threading::Primitives
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
||||
{
|
||||
auto cpuId = AuHwInfo::cpuid(7);
|
||||
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!ThrdCfg::gForceEnableAdaptiveSpin)
|
||||
{
|
||||
gSpinAdaptiveThreshold = 0;
|
||||
|
@ -84,6 +84,7 @@ namespace Aurora::Threading::Primitives
|
||||
inline bool gPreferUnixPrimitivesNoSpin {};
|
||||
inline bool gAlwaysRWLockWriteBiasOnReadLock {};
|
||||
inline bool gEnableRWLockWriteBiasOnReadLock {};
|
||||
inline AuUInt32 gIsIntelAlderLakeOrGreater {};
|
||||
inline AuUInt8 gCountOfPCores {};
|
||||
}
|
||||
|
||||
@ -379,6 +380,226 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
return callback();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool auline YieldToSharedCoreAlderLake(long spin, T callback, const void *pWord)
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(AURORA_ARCH_ARM)
|
||||
AuUInt64 uClockFreq { ArmQueryClockFrequency() };
|
||||
#endif
|
||||
|
||||
if (kEnableSmartScheduling)
|
||||
{
|
||||
bool bRet { false };
|
||||
auto uWord = SMTGetAPICNumber();
|
||||
if (uWord < AuArraySize(gCoreTable) &&
|
||||
uWord < ThrdCfg::gCountOfPCores)
|
||||
{
|
||||
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
|
||||
|
||||
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
|
||||
if (uNow <= gSpinAdaptiveThreshold)
|
||||
{
|
||||
auto uCount = spin;
|
||||
bool bSMTProbablyHit {};
|
||||
|
||||
if (AuAtomicLoad(&gCoreTable[uWord ^ 1]))
|
||||
{
|
||||
uCount /= 5;
|
||||
bSMTProbablyHit = true;
|
||||
}
|
||||
else if (gHasThreadLocalTimeout)
|
||||
{
|
||||
uCount += tlsSpinCountLocal;
|
||||
}
|
||||
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
||||
if (ThrdCfg::gIsIntelAlderLakeOrGreater)
|
||||
{
|
||||
_umonitor((void *)AuPageRound<AuUInt>(AuUInt(pWord), AuHWInfo::GetCPUInfo().dwCacheLine));
|
||||
|
||||
if (callback())
|
||||
{
|
||||
bRet = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_umwait(/*0*/ /*1*/ bSMTProbablyHit ? 1 : 0, __rdtsc() + uCount);
|
||||
bRet = callback();
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||
auto perfCounter = __rdtsc() + uCount;
|
||||
while (__rdtsc() < perfCounter)
|
||||
#else
|
||||
while (uCount > 0)
|
||||
#endif
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
SMPPause();
|
||||
uCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 0u);
|
||||
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
else if (gSpinAdaptiveThreshold)
|
||||
{
|
||||
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
|
||||
|
||||
if (uNow <= gSpinAdaptiveThreshold)
|
||||
{
|
||||
auto uCount = spin;
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||
auto perfCounter = __rdtsc() + uCount;
|
||||
while (__rdtsc() < perfCounter)
|
||||
#else
|
||||
while (uCount > 0)
|
||||
#endif
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(SPIN_FOUR) && SPIN_FOUR == 1
|
||||
SMPPause();
|
||||
SMPPause();
|
||||
SMPPause();
|
||||
SMPPause();
|
||||
uCount -= 4;
|
||||
#else
|
||||
SMPPause();
|
||||
uCount -= 1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (gHasThreadLocalTimeout)
|
||||
{
|
||||
auto uCount = tlsSpinCountLocal;
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
||||
auto perfCounter = __rdtsc() + uCount;
|
||||
while (__rdtsc() < perfCounter)
|
||||
#else
|
||||
while (uCount > 0)
|
||||
#endif
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
SMPPause();
|
||||
uCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
|
||||
}
|
||||
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
|
||||
{
|
||||
auto uCount = (spin) / 3;
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
||||
auto perfCounter = __rdtsc() + uCount;
|
||||
while (__rdtsc() < perfCounter)
|
||||
#else
|
||||
while (uCount > 0)
|
||||
#endif
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
SMPPause();
|
||||
uCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AuAtomicSub(&gSpinAdaptiveCurrentCount, 1u);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto uCount = spin;
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||
auto perfCounter = __rdtsc() + uCount;
|
||||
while (__rdtsc() < perfCounter)
|
||||
#else
|
||||
while (uCount > 0)
|
||||
#endif
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(SPIN_FOUR) && SPIN_FOUR == 1
|
||||
SMPPause();
|
||||
SMPPause();
|
||||
SMPPause();
|
||||
SMPPause();
|
||||
uCount -= 4;
|
||||
#else
|
||||
SMPPause();
|
||||
uCount -= 1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (gHasThreadLocalTimeout)
|
||||
{
|
||||
auto uCount = tlsSpinCountLocal;
|
||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||
auto perfCounter = __rdtsc() + uCount;
|
||||
while (__rdtsc() < perfCounter)
|
||||
#else
|
||||
while (uCount > 0)
|
||||
#endif
|
||||
{
|
||||
if (callback())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
SMPPause();
|
||||
uCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return callback();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool auline DoTryIf(T callback)
|
||||
@ -392,4 +613,30 @@ namespace Aurora::Threading::Primitives
|
||||
return callback();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool auline DoTryIfAlderLake(T callback, const void *pWord)
|
||||
{
|
||||
if (ThrdCfg::gPlatformIsSMPProcessorOptimized)
|
||||
{
|
||||
return YieldToSharedCoreAlderLake(gRuntimeConfig.threadingConfig.uSpinLoopPowerA, callback, pWord);
|
||||
}
|
||||
else
|
||||
{
|
||||
return callback();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool auline DoTryIfAlderLake(T callback, const volatile void *pWord)
|
||||
{
|
||||
if (ThrdCfg::gPlatformIsSMPProcessorOptimized)
|
||||
{
|
||||
return YieldToSharedCoreAlderLake(gRuntimeConfig.threadingConfig.uSpinLoopPowerA, callback, (const void *)pWord);
|
||||
}
|
||||
else
|
||||
{
|
||||
return callback();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user