[+] WaitOnAddress[Steady](..., AuOptional<bool> optAlreadySpun = {}) arguments
[+] ...slight UWP optimization? [*] Lift WoA limitation
This commit is contained in:
parent
77253a7654
commit
e071b3d509
@ -380,7 +380,8 @@ namespace Aurora
|
|||||||
#else
|
#else
|
||||||
AuUInt64 bPreferEmulatedWakeOnAddress : 1 { !AuBuild::kIsNtDerived /*everybody else requires us to hit the kernel. */ };
|
AuUInt64 bPreferEmulatedWakeOnAddress : 1 { !AuBuild::kIsNtDerived /*everybody else requires us to hit the kernel. */ };
|
||||||
#endif
|
#endif
|
||||||
AuUInt64 bPreferWaitOnAddressAlwaysSpin : 1 { true }; // ..., if emulated! if double-spinning under higher level locks, disable me.
|
AuUInt64 bPreferWaitOnAddressAlwaysSpin : 1 { true }; // ..., if emulated! if double-spinning under higher level locks, disable me.
|
||||||
|
AuUInt64 bPreferWaitOnAddressAlwaysSpinNative : 1 { false }; // ..., if not emulated! noting that most kernels and user-schedulers will spin for you
|
||||||
AuUInt64 bPreferRWLockReadLockSpin : 1 { true };
|
AuUInt64 bPreferRWLockReadLockSpin : 1 { true };
|
||||||
AuUInt64 bUWPNanosecondEmulationCheckFirst : 1 { false };
|
AuUInt64 bUWPNanosecondEmulationCheckFirst : 1 { false };
|
||||||
AuUInt64 uUWPNanosecondEmulationMaxYields : 7 { 12 };
|
AuUInt64 uUWPNanosecondEmulationMaxYields : 7 { 12 };
|
||||||
|
@ -184,37 +184,12 @@ namespace Aurora::Threading::Waitables
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)) && !defined(AURORA_RUNTIME_FORCE_ADAPTIVE_FUTEX)
|
|
||||||
AuUInt uCount(GetTotalSpinCountTime());
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
||||||
auto uPerfCounter = __rdtsc() + uCount;
|
|
||||||
while (__rdtsc() < uPerfCounter)
|
|
||||||
#else
|
|
||||||
for (AU_ITERATE_N(i, uCount))
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
#else
|
|
||||||
// TBD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (TryLock3())
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static const AuUInt32 kRef { 0 };
|
static const AuUInt32 kRef { 0 };
|
||||||
|
|
||||||
if (TryWaitOnAddress(&this->uAtomicState, &kRef, sizeof(kRef)))
|
if (TryWaitOnAddress((const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
|
||||||
{
|
{
|
||||||
return TryLock3();
|
return TryLock3();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -227,7 +202,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
{
|
{
|
||||||
bool bStatus {};
|
bool bStatus {};
|
||||||
|
|
||||||
bStatus = WaitOnAddressSteady((void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeout);
|
bStatus = WaitOnAddressSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeout, true);
|
||||||
|
|
||||||
if (!bStatus)
|
if (!bStatus)
|
||||||
{
|
{
|
||||||
|
@ -32,36 +32,11 @@ namespace Aurora::Threading::Waitables
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)) && !defined(AURORA_RUNTIME_FORCE_ADAPTIVE_FUTEX)
|
|
||||||
AuUInt uCount(GetTotalSpinCountTime());
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
||||||
auto uPerfCounter = __rdtsc() + uCount;
|
|
||||||
while (__rdtsc() < uPerfCounter)
|
|
||||||
#else
|
|
||||||
for (AU_ITERATE_N(i, uCount))
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
#else
|
|
||||||
// TBD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (TryLockNoSpin())
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static const AuUInt32 kRef { 0 };
|
static const AuUInt32 kRef { 0 };
|
||||||
if (TryWaitOnAddress(&this->uAtomicState, &kRef, sizeof(kRef)))
|
if (TryWaitOnAddress((const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
|
||||||
{
|
{
|
||||||
return TryLockNoSpin();
|
return TryLockNoSpin();
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -108,7 +83,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
while (!TryLockNoSpin())
|
while (!TryLockNoSpin())
|
||||||
{
|
{
|
||||||
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
||||||
WaitOnAddress((void *)&this->uAtomicState, &kRef, sizeof(kRef), 0);
|
WaitOnAddress((const void *)&this->uAtomicState, &kRef, sizeof(kRef), 0, true);
|
||||||
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -144,7 +119,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
bool bStatus {};
|
bool bStatus {};
|
||||||
|
|
||||||
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
||||||
bStatus = WaitOnAddressSteady((void *)&this->uAtomicState, &kRef, sizeof(kRef), qwEndTime);
|
bStatus = WaitOnAddressSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwEndTime, true);
|
||||||
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
||||||
|
|
||||||
if (!bStatus)
|
if (!bStatus)
|
||||||
@ -165,7 +140,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
bool bStatus {};
|
bool bStatus {};
|
||||||
|
|
||||||
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
||||||
bStatus = WaitOnAddressSteady((void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs);
|
bStatus = WaitOnAddressSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs, true);
|
||||||
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
||||||
|
|
||||||
if (!bStatus)
|
if (!bStatus)
|
||||||
|
@ -31,39 +31,14 @@ namespace Aurora::Threading::Waitables
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)) && !defined(AURORA_RUNTIME_FORCE_ADAPTIVE_FUTEX)
|
|
||||||
AuUInt uCount(GetTotalSpinCountTime());
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
||||||
auto uPerfCounter = __rdtsc() + uCount;
|
|
||||||
while (__rdtsc() < uPerfCounter)
|
|
||||||
#else
|
|
||||||
for (AU_ITERATE_N(i, uCount))
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
_mm_pause();
|
|
||||||
#else
|
|
||||||
// TBD
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (TryLockNoSpin())
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static const AuUInt32 kRef { 1 };
|
static const AuUInt32 kRef { 1 };
|
||||||
if (TryWaitOnAddress((void *)&this->uAtomicState, &kRef, sizeof(kRef)))
|
if (TryWaitOnAddress((const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
|
||||||
{
|
{
|
||||||
if (TryLockNoSpin())
|
if (TryLockNoSpin())
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -100,7 +75,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
while (!TryLockNoSpin())
|
while (!TryLockNoSpin())
|
||||||
{
|
{
|
||||||
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
||||||
WaitOnAddress((void *)&this->uAtomicState, &kRef, sizeof(kRef), 0);
|
WaitOnAddress((const void *)&this->uAtomicState, &kRef, sizeof(kRef), 0, true);
|
||||||
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -136,7 +111,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
bool bStatus {};
|
bool bStatus {};
|
||||||
|
|
||||||
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
||||||
bStatus = WaitOnAddressSteady((void *)&this->uAtomicState, &kRef, sizeof(kRef), qwEndTime);
|
bStatus = WaitOnAddressSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwEndTime, true);
|
||||||
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
||||||
|
|
||||||
if (!bStatus)
|
if (!bStatus)
|
||||||
@ -162,7 +137,7 @@ namespace Aurora::Threading::Waitables
|
|||||||
bool bStatus {};
|
bool bStatus {};
|
||||||
|
|
||||||
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
AuAtomicAdd(&this->uAtomicSleeping, 1u);
|
||||||
bStatus = WaitOnAddressSteady((void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs);
|
bStatus = WaitOnAddressSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs, true);
|
||||||
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
AuAtomicSub(&this->uAtomicSleeping, 1u);
|
||||||
|
|
||||||
if (!bStatus)
|
if (!bStatus)
|
||||||
|
@ -4,6 +4,25 @@
|
|||||||
File: WakeOnAddress.hpp
|
File: WakeOnAddress.hpp
|
||||||
Date: 2023-3-11
|
Date: 2023-3-11
|
||||||
Author: Reece
|
Author: Reece
|
||||||
|
Note: In emulation mode (*):
|
||||||
|
1: Wakes occur in FIFO order
|
||||||
|
2: uWordSize can be any length not exceeding 32 bytes
|
||||||
|
otherwise
|
||||||
|
1: Wakes are orderless
|
||||||
|
2: uWordSize must be less than or equal to 8 bytes
|
||||||
|
3: only the least significant 32bits are guaranteed to be used as wake signals
|
||||||
|
|
||||||
|
* By default: UNIXes and targets below/inc Windows 7 will be in userland emulation mode for performance reasons.
|
||||||
|
* Linux and other targets can directly interface with their futex interface under a smaller wrapper;
|
||||||
|
* however, these applications are limited to internal synchronization primitives. The added bloat
|
||||||
|
* of the WaitOnAddress/FUTEX/atomic wait emulation layer improves performance in real world dumb
|
||||||
|
* code with spurious wakes, odd word sizes, and pointer alignments. Not to mention some targets
|
||||||
|
* are stuck with semaphores or condition variables to start off with, and therefore need this
|
||||||
|
* for the sake of porting modern applications. The aforementioned synchronization primitives
|
||||||
|
* are written with OS specific optimizations in mind, and therefore consider emulation bloat.
|
||||||
|
* bPreferEmulatedWakeOnAddress disables the emulation layer, if theres a reasonable native
|
||||||
|
* interfaces available.
|
||||||
|
* Defer to ThreadingConfig::bPreferEmulatedWakeOnAddress = !AuBuild::kIsNtDerived
|
||||||
***/
|
***/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
@ -20,15 +39,17 @@ namespace Aurora::Threading
|
|||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
AuUInt8 uWordSize);
|
AuUInt8 uWordSize);
|
||||||
|
|
||||||
// Relative timeout variant of nanosecond resolution WoA. nanoseconds in steady clock time. 0 = indefinite
|
// Relative timeout variant of nanosecond resolution WoA. 0 = indefinite
|
||||||
AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
|
AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
AuUInt8 uWordSize,
|
AuUInt8 uWordSize,
|
||||||
AuUInt64 qwNanoseconds);
|
AuUInt64 qwNanoseconds,
|
||||||
|
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
|
||||||
|
|
||||||
// Absolute timeout variant of nanosecond resolution WoA. Nanoseconds are in steady clock time. 0 = indefinite
|
// Absolute timeout variant of nanosecond resolution WoA. Nanoseconds are in steady clock time. 0 = indefinite
|
||||||
AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
|
AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
AuUInt8 uWordSize,
|
AuUInt8 uWordSize,
|
||||||
AuUInt64 qwNanoseconds);
|
AuUInt64 qwNanoseconds,
|
||||||
|
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
|
||||||
}
|
}
|
@ -112,7 +112,7 @@ namespace Aurora::Threading
|
|||||||
|
|
||||||
if (state.qwNanosecondsAbs)
|
if (state.qwNanosecondsAbs)
|
||||||
{
|
{
|
||||||
if (!WaitBuffer::From(this->pAddress, this->uSize).Compare(state))
|
if (!WaitBuffer::Compare(this->pAddress, this->uSize, state))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -122,7 +122,7 @@ namespace Aurora::Threading
|
|||||||
|
|
||||||
while (uNow < uEndTime)
|
while (uNow < uEndTime)
|
||||||
{
|
{
|
||||||
if (!WaitBuffer::From(this->pAddress, this->uSize).Compare(state))
|
if (!WaitBuffer::Compare(this->pAddress, this->uSize, state))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -138,11 +138,11 @@ namespace Aurora::Threading
|
|||||||
uNow = AuTime::SteadyClockNS();
|
uNow = AuTime::SteadyClockNS();
|
||||||
}
|
}
|
||||||
|
|
||||||
return !WaitBuffer::From(this->pAddress, this->uSize).Compare(state);
|
return !WaitBuffer::Compare(this->pAddress, this->uSize, state);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
while (WaitBuffer::From(this->pAddress, this->uSize).Compare(state))
|
while (WaitBuffer::Compare(this->pAddress, this->uSize, state))
|
||||||
{
|
{
|
||||||
this->variable.WaitForSignalNsEx(&this->mutex, 0);
|
this->variable.WaitForSignalNsEx(&this->mutex, 0);
|
||||||
}
|
}
|
||||||
@ -215,6 +215,28 @@ namespace Aurora::Threading
|
|||||||
return AuMove(wait);
|
return AuMove(wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool WaitBuffer::Compare(const void *pBuf, AuUInt8 uSize, WaitState &state)
|
||||||
|
{
|
||||||
|
if (!state.uDownsizeMask)
|
||||||
|
{
|
||||||
|
return AuMemcmp(pBuf, state.compare.buffer, AuMin(uSize, state.compare.uSize)) == 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto uMask = state.uDownsizeMask.value();
|
||||||
|
|
||||||
|
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(pBuf);
|
||||||
|
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(state.compare.buffer);
|
||||||
|
|
||||||
|
return (uSrcWord & uMask) == (uCmpWord & uMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool WaitBuffer::Compare(const void *pBuf, AuUInt8 uSize, const void *pBuf2)
|
||||||
|
{
|
||||||
|
return AuMemcmp(pBuf, pBuf2, uSize) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
bool WaitBuffer::Compare(const void *pBuf)
|
bool WaitBuffer::Compare(const void *pBuf)
|
||||||
{
|
{
|
||||||
return AuMemcmp(this->buffer, pBuf, this->uSize) == 0;
|
return AuMemcmp(this->buffer, pBuf, this->uSize) == 0;
|
||||||
@ -230,8 +252,8 @@ namespace Aurora::Threading
|
|||||||
{
|
{
|
||||||
auto uMask = state.uDownsizeMask.value();
|
auto uMask = state.uDownsizeMask.value();
|
||||||
|
|
||||||
auto &uSrcWord = *AuReinterpretCast<AuUInt32 *>(this->buffer);
|
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(this->buffer);
|
||||||
auto &uCmpWord = *AuReinterpretCast<AuUInt32 *>(state.compare.buffer);
|
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(state.compare.buffer);
|
||||||
|
|
||||||
return (uSrcWord & uMask) == (uCmpWord & uMask);
|
return (uSrcWord & uMask) == (uCmpWord & uMask);
|
||||||
}
|
}
|
||||||
@ -406,7 +428,7 @@ namespace Aurora::Threading
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
WaitState state;
|
WaitState state;
|
||||||
SysAssertDbg(uWordSize <= 8);
|
SysAssertDbg(uWordSize <= 32);
|
||||||
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize);
|
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize);
|
||||||
state.compare = WaitBuffer::From(pCompareAddress, uWordSize);
|
state.compare = WaitBuffer::From(pCompareAddress, uWordSize);
|
||||||
|
|
||||||
@ -524,7 +546,8 @@ namespace Aurora::Threading
|
|||||||
AuUInt8 uWordSize,
|
AuUInt8 uWordSize,
|
||||||
AuUInt64 uAbsTimeSteadyClock,
|
AuUInt64 uAbsTimeSteadyClock,
|
||||||
AuUInt64 uRelativeNanoseconds,
|
AuUInt64 uRelativeNanoseconds,
|
||||||
AuOptional<AuUInt64> uAbsTimeAltClock /* hint */)
|
AuOptional<AuUInt64> uAbsTimeAltClock /* hint */,
|
||||||
|
bool bSpun = false)
|
||||||
{
|
{
|
||||||
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
||||||
|
|
||||||
@ -547,7 +570,7 @@ namespace Aurora::Threading
|
|||||||
{
|
{
|
||||||
if (uAbsTimeSteadyClock <= uNow)
|
if (uAbsTimeSteadyClock <= uNow)
|
||||||
{
|
{
|
||||||
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
|
return !expect.Compare(pTargetAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
|
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
|
||||||
@ -589,7 +612,7 @@ namespace Aurora::Threading
|
|||||||
|
|
||||||
if (iDelta <= 0)
|
if (iDelta <= 0)
|
||||||
{
|
{
|
||||||
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
|
return !WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
uRelativeNanoseconds = iDelta;
|
uRelativeNanoseconds = iDelta;
|
||||||
@ -604,11 +627,14 @@ namespace Aurora::Threading
|
|||||||
{
|
{
|
||||||
// take a copy
|
// take a copy
|
||||||
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
|
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
|
||||||
|
|
||||||
// first: cpu spin to avoid the kernel all together
|
// first: cpu spin to avoid the kernel all together
|
||||||
if (TryWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize))
|
if (!bSpun)
|
||||||
{
|
{
|
||||||
return true;
|
if (TryWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// second: yield
|
// second: yield
|
||||||
@ -637,8 +663,11 @@ namespace Aurora::Threading
|
|||||||
// first: wait on the address with an ms scale timeout
|
// first: wait on the address with an ms scale timeout
|
||||||
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
|
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
|
||||||
|
|
||||||
|
// take a copy
|
||||||
|
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
|
||||||
|
|
||||||
// never trust the error value/status provided by wait addresses - instead, do a quick compare
|
// never trust the error value/status provided by wait addresses - instead, do a quick compare
|
||||||
if (!WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress))
|
if (!expect.Compare(pTargetAddress))
|
||||||
{
|
{
|
||||||
// best case: we woke up during the ms-res waitonaddress
|
// best case: we woke up during the ms-res waitonaddress
|
||||||
return true;
|
return true;
|
||||||
@ -651,9 +680,9 @@ namespace Aurora::Threading
|
|||||||
{
|
{
|
||||||
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
|
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
|
||||||
|
|
||||||
if (Primitives::DoTryIf([=]()
|
if (Primitives::DoTryIf([&]()
|
||||||
{
|
{
|
||||||
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
|
return !expect.Compare(pTargetAddress);
|
||||||
}))
|
}))
|
||||||
{
|
{
|
||||||
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
|
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
|
||||||
@ -722,14 +751,14 @@ namespace Aurora::Threading
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
|
return !WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void RunOSWaitOnAddressNoTimedNoErrors(const void *pTargetAddress,
|
static void RunOSWaitOnAddressNoTimedNoErrors(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
WaitState &state)
|
WaitState &state)
|
||||||
{
|
{
|
||||||
while (WaitBuffer::From(pTargetAddress, state.uWordSize).Compare(state))
|
while (WaitBuffer::Compare(pTargetAddress, state.uWordSize, state))
|
||||||
{
|
{
|
||||||
if (!RunOSWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
|
if (!RunOSWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
|
||||||
{
|
{
|
||||||
@ -739,16 +768,17 @@ namespace Aurora::Threading
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool RunOSWaitOnAddressTimedSteady(const void *pTargetAddress,
|
static bool RunOSWaitOnAddressTimedSteady(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
WaitState &state)
|
WaitState &state,
|
||||||
|
bool bSpun = false)
|
||||||
{
|
{
|
||||||
if (!WaitBuffer::From(pTargetAddress, state.uWordSize).Compare(state))
|
if (!WaitBuffer::Compare(pTargetAddress, state.uWordSize, state))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { });
|
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
|
||||||
return !WaitBuffer::From(pTargetAddress, state.uWordSize).Compare(state);
|
return !WaitBuffer::Compare(pTargetAddress, state.uWordSize, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void RunOSWakeNOnAddress(const void *pAddress,
|
static void RunOSWakeNOnAddress(const void *pAddress,
|
||||||
@ -828,7 +858,7 @@ namespace Aurora::Threading
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
state.qwNanosecondsAbs = qwNanosecondsAbs;
|
state.qwNanosecondsAbs = qwNanosecondsAbs;
|
||||||
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state);
|
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -853,55 +883,30 @@ namespace Aurora::Threading
|
|||||||
AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
|
AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
AuUInt8 uWordSize,
|
AuUInt8 uWordSize,
|
||||||
AuUInt64 qwNanoseconds)
|
AuUInt64 qwNanoseconds,
|
||||||
|
AuOptional<bool> optAlreadySpun)
|
||||||
{
|
{
|
||||||
bool bWaitOnAddress = IsWaitOnRecommended();
|
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
|
||||||
if (bWaitOnAddress)
|
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress))
|
||||||
{
|
{
|
||||||
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
|
return true;
|
||||||
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
|
|
||||||
|
|
||||||
WaitState state;
|
|
||||||
state.uDownsizeMask = uMask;
|
|
||||||
state.compare = uMask ?
|
|
||||||
WaitBuffer::From(pCompareAddress2, 4) :
|
|
||||||
WaitBuffer::From(pCompareAddress2, uWordSize);
|
|
||||||
state.uWordSize = uMask ? 4 : uWordSize;
|
|
||||||
|
|
||||||
if (!qwNanoseconds)
|
|
||||||
{
|
|
||||||
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
state.qwNanosecondsAbs = qwNanoseconds + AuTime::SteadyClockNS();
|
|
||||||
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin)
|
|
||||||
{
|
|
||||||
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return WaitOnAddressWide(pTargetAddress, pCompareAddress, uWordSize, qwNanoseconds, {}, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return WaitOnAddressSteady(pTargetAddress,
|
||||||
|
pCompareAddress,
|
||||||
|
uWordSize,
|
||||||
|
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
|
||||||
|
optAlreadySpun);
|
||||||
}
|
}
|
||||||
|
|
||||||
AUKN_SYM bool TryWaitOnAddress(const void *pTargetAddress,
|
AUKN_SYM bool TryWaitOnAddress(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
AuUInt8 uWordSize)
|
AuUInt8 uWordSize)
|
||||||
{
|
{
|
||||||
return Primitives::DoTryIf([=]()
|
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
|
||||||
|
return Primitives::DoTryIf([&]()
|
||||||
{
|
{
|
||||||
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
|
return !expect.Compare(pTargetAddress);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -959,7 +964,8 @@ namespace Aurora::Threading
|
|||||||
AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
|
AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
|
||||||
const void *pCompareAddress,
|
const void *pCompareAddress,
|
||||||
AuUInt8 uWordSize,
|
AuUInt8 uWordSize,
|
||||||
AuUInt64 qwNanoseconds)
|
AuUInt64 qwNanoseconds,
|
||||||
|
AuOptional<bool> optAlreadySpun)
|
||||||
{
|
{
|
||||||
bool bWaitOnAddress = IsWaitOnRecommended();
|
bool bWaitOnAddress = IsWaitOnRecommended();
|
||||||
if (bWaitOnAddress)
|
if (bWaitOnAddress)
|
||||||
@ -974,6 +980,18 @@ namespace Aurora::Threading
|
|||||||
WaitBuffer::From(pCompareAddress2, uWordSize);
|
WaitBuffer::From(pCompareAddress2, uWordSize);
|
||||||
state.uWordSize = uMask ? 4 : uWordSize;
|
state.uWordSize = uMask ? 4 : uWordSize;
|
||||||
|
|
||||||
|
bool bSpun {};
|
||||||
|
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
|
||||||
|
optAlreadySpun.value_or(false))
|
||||||
|
{
|
||||||
|
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bSpun = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (!qwNanoseconds)
|
if (!qwNanoseconds)
|
||||||
{
|
{
|
||||||
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
|
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
|
||||||
@ -982,12 +1000,13 @@ namespace Aurora::Threading
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
state.qwNanosecondsAbs = qwNanoseconds;
|
state.qwNanosecondsAbs = qwNanoseconds;
|
||||||
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state);
|
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, bSpun);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin)
|
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
|
||||||
|
optAlreadySpun.value_or(false))
|
||||||
{
|
{
|
||||||
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
|
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
|
||||||
{
|
{
|
||||||
|
@ -18,10 +18,13 @@ namespace Aurora::Threading
|
|||||||
|
|
||||||
struct WaitBuffer
|
struct WaitBuffer
|
||||||
{
|
{
|
||||||
char buffer[8];
|
char buffer[32];
|
||||||
AuUInt8 uSize;
|
AuUInt8 uSize;
|
||||||
|
|
||||||
static WaitBuffer From(const void *pBuf, AuUInt8 uSize);
|
static WaitBuffer From(const void *pBuf, AuUInt8 uSize);
|
||||||
|
static bool Compare(const void *pBuf, AuUInt8 uSize, WaitState &state);
|
||||||
|
static bool Compare(const void *pBuf, AuUInt8 uSize, const void *pBuf2);
|
||||||
|
|
||||||
bool Compare(const void *pBuf);
|
bool Compare(const void *pBuf);
|
||||||
bool Compare(WaitState &state);
|
bool Compare(WaitState &state);
|
||||||
};
|
};
|
||||||
|
@ -164,6 +164,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
ThrdCfg::gPreferLinuxCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxCondMutexSpinTryLock;
|
ThrdCfg::gPreferLinuxCondMutexSpinTryLock = gRuntimeConfig.threadingConfig.bPreferLinuxCondMutexSpinTryLock;
|
||||||
ThrdCfg::gPreferEmulatedWakeOnAddress = gRuntimeConfig.threadingConfig.bPreferEmulatedWakeOnAddress;
|
ThrdCfg::gPreferEmulatedWakeOnAddress = gRuntimeConfig.threadingConfig.bPreferEmulatedWakeOnAddress;
|
||||||
ThrdCfg::gPreferWaitOnAddressAlwaysSpin = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpin;
|
ThrdCfg::gPreferWaitOnAddressAlwaysSpin = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpin;
|
||||||
|
ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative = gRuntimeConfig.threadingConfig.bPreferWaitOnAddressAlwaysSpinNative;
|
||||||
ThrdCfg::gPreferRWLockReadLockSpin = gRuntimeConfig.threadingConfig.bPreferRWLockReadLockSpin;
|
ThrdCfg::gPreferRWLockReadLockSpin = gRuntimeConfig.threadingConfig.bPreferRWLockReadLockSpin;
|
||||||
ThrdCfg::gUWPNanosecondEmulationCheckFirst = gRuntimeConfig.threadingConfig.bUWPNanosecondEmulationCheckFirst;
|
ThrdCfg::gUWPNanosecondEmulationCheckFirst = gRuntimeConfig.threadingConfig.bUWPNanosecondEmulationCheckFirst;
|
||||||
ThrdCfg::gUWPNanosecondEmulationMaxYields = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
|
ThrdCfg::gUWPNanosecondEmulationMaxYields = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
|
||||||
|
@ -32,6 +32,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
inline bool gPreferLinuxCondMutexSpinTryLock {};
|
inline bool gPreferLinuxCondMutexSpinTryLock {};
|
||||||
inline bool gPreferEmulatedWakeOnAddress {};
|
inline bool gPreferEmulatedWakeOnAddress {};
|
||||||
inline bool gPreferWaitOnAddressAlwaysSpin {};
|
inline bool gPreferWaitOnAddressAlwaysSpin {};
|
||||||
|
inline bool gPreferWaitOnAddressAlwaysSpinNative {};
|
||||||
inline bool gPreferRWLockReadLockSpin {};
|
inline bool gPreferRWLockReadLockSpin {};
|
||||||
inline bool gUWPNanosecondEmulationCheckFirst {};
|
inline bool gUWPNanosecondEmulationCheckFirst {};
|
||||||
inline AuUInt32 gUWPNanosecondEmulationMaxYields {};
|
inline AuUInt32 gUWPNanosecondEmulationMaxYields {};
|
||||||
|
Loading…
Reference in New Issue
Block a user