AuroraRuntime/Source/Threading/AuWakeOnAddress.cpp
Jamie Reece Wilson 451b9025c0 [*] Fix major recent regressions
amend: 48075bfd
amend: 25b933aa
amend: f50067e6 (to be overwritten)
et al
2023-06-17 15:12:16 +01:00

1155 lines
33 KiB
C++

/***
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuWakeOnAddress.cpp
Date: 2023-3-10
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "AuWakeOnAddress.hpp"
#include "Primitives/SMTYield.hpp"
#if defined(AURORA_PLATFORM_WIN32)
#include <timeapi.h>
#endif
#include <Time/Time.hpp>
namespace Aurora::Threading
{
static thread_local WaitEntry tlsWaitEntry;
#if defined(AURORA_IS_LINUX_DERIVED)
static int futex_wait(uint32_t *addr, uint32_t expected, const struct timespec *timeout)
{
if (timeout)
{
return futex(addr, FUTEX_WAIT_BITSET, expected, timeout, 0, FUTEX_BITSET_MATCH_ANY);
}
else
{
return futex(addr, FUTEX_WAIT, expected, timeout, 0, 0);
}
}
static int futex_wake(uint32_t *addr, uint32_t nthreads)
{
return futex(addr, FUTEX_WAKE, nthreads, 0, 0, 0);
}
#endif
static ProcessWaitContainer gProcessWaitables;
static int gShouldSpinOnlyInCPU = 1; // TODO: havent decided
template<typename T>
static void DoSpinLockOnVar(T *uPointer)
{
if (gShouldSpinOnlyInCPU == 0)
{
while (!Primitives::DoTryIf([&]()
{
return AuAtomicTestAndSet(uPointer, 0) == 0;
}))
{
}
}
else if (gShouldSpinOnlyInCPU == 1)
{
while (!Primitives::DoTryIf([&]()
{
return AuAtomicTestAndSet(uPointer, 0) == 0;
}))
{
ContextYield();
}
}
else if (gShouldSpinOnlyInCPU == 2)
{
while (AuAtomicTestAndSet(uPointer, 0))
{
while (*uPointer)
{
ContextYield();
}
}
}
else
{
SysUnreachable();
}
}
bool WaitEntry::TryAcquire(const void *pAddress, AuUInt8 uSize)
{
DoSpinLockOnVar(&this->uAtomic);
//AU_LOCK_GUARD(this->mutex);
if (this->pAddress)
{
this->uAtomic = 0;
return false;
}
this->pAddress = pAddress;
this->uSize = uSize;
this->uAtomic = 0;
return true;
}
void WaitEntry::Release()
{
#if 0
if (this->bOverflow)
{
gProcessWaitables.Remove(this);
this->bOverflow = false;
}
#endif
AuResetMember(this->uSize);
AuResetMember(this->pAddress);
}
WaitEntry::WaitEntry() :
variable(AuUnsafeRaiiToShared(&this->mutex))
{
}
WaitEntry::~WaitEntry()
{
this->Release();
}
bool WaitEntry::SleepOn(WaitState &state)
{
AU_LOCK_GUARD(this->mutex);
if (state.qwNanosecondsAbs)
{
if (!WaitBuffer::From(this->pAddress, this->uSize).Compare(state))
{
return true;
}
AuUInt64 uEndTime {};
auto uNow = AuTime::SteadyClockNS();
if (state.qwNanosecondsAbs)
{
uEndTime = state.qwNanosecondsAbs.value();
}
#if defined(AURORA_IS_POSIX_DERIVED)
struct timespec tspec;
Time::auabsns2ts(&tspec, uEndTime);
#endif
while (uNow < uEndTime)
{
if (!WaitBuffer::From(this->pAddress, this->uSize).Compare(state))
{
return true;
}
auto uTimeRemNS = uEndTime - uNow;
#if defined(AURORA_IS_POSIX_DERIVED)
auto pCond = reinterpret_cast<pthread_cond_t *>(&this->variable.pthreadCv_);
auto mutex = reinterpret_cast<pthread_mutex_t *>(this->mutex->GetOSHandle());
int ret {};
do
{
ret = ::pthread_cond_timedwait(pCond, mutex, &tspec);
if (ret == 0)
{
break;
}
if (ret == ETIMEDOUT)
{
return !WaitBuffer::From(this->pAddress, this->uSize).Compare(state);
}
}
while (ret == EINTR);
#else
#if defined(AURORA_PLATFORM_WIN32)
if (::timeBeginPeriod(0) == TIMERR_NOCANDO)
{
::timeBeginPeriod(1);
}
#endif
this->variable.WaitForSignalNS(uTimeRemNS);
#endif
uNow = AuTime::SteadyClockNS();
}
return !WaitBuffer::From(this->pAddress, this->uSize).Compare(state);
}
else
{
while (WaitBuffer::From(this->pAddress, this->uSize).Compare(state))
{
this->variable.WaitForSignal(0);
}
return true;
}
return false;
}
bool WaitEntry::TryWake(const void *pAddress)
{
DoSpinLockOnVar(&this->uAtomic);
auto bRet = TryWakeNoLock(pAddress);
if (!bRet)
{
this->uAtomic = 0;
}
return bRet;
}
bool WaitEntry::TryWakeNoLockNoReallyNoLock(const void *pAddress)
{
if (AuReinterpretCast<const char *>(this->pAddress) > AuReinterpretCast<const char *>(pAddress) ||
AuReinterpretCast<const char *>(this->pAddress) + this->uSize <= AuReinterpretCast<const char *>(pAddress))
{
return false;
}
this->uAtomic = 0;
this->variable.Signal();
return true;
}
bool WaitEntry::TryWakeNoLock(const void *pAddress)
{
if (AuReinterpretCast<const char *>(this->pAddress) > AuReinterpretCast<const char *>(pAddress) ||
AuReinterpretCast<const char *>(this->pAddress) + this->uSize <= AuReinterpretCast<const char *>(pAddress))
{
return false;
}
AU_LOCK_GUARD(this->mutex);
this->uAtomic = 0;
this->variable.Signal();
return true;
}
WaitBuffer WaitBuffer::From(const void *pBuf, AuUInt8 uSize)
{
WaitBuffer wait;
AuMemcpy(wait.buffer, pBuf, uSize);
wait.uSize = uSize;
return AuMove(wait);
}
bool WaitBuffer::Compare(const void *pBuf)
{
return AuMemcmp(this->buffer, pBuf, this->uSize) == 0;
}
bool WaitBuffer::Compare(WaitState &state)
{
if (!state.uDownsizeMask)
{
return AuMemcmp(this->buffer, state.compare.buffer, AuMin(this->uSize, state.compare.uSize)) == 0;
}
else
{
auto uMask = state.uDownsizeMask.value();
auto &uSrcWord = *AuReinterpretCast<AuUInt32 *>(this->buffer);
auto &uCmpWord = *AuReinterpretCast<AuUInt32 *>(state.compare.buffer);
return (uSrcWord & uMask) == (uCmpWord & uMask);
}
}
WaitEntry *ProcessWaitContainer::WaitBufferFrom(void *pAddress, AuUInt8 uSize)
{
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
{
if (this->entries[i].TryAcquire(pAddress, uSize))
{
return &this->entries[i];
}
}
#endif
auto pReturn = &tlsWaitEntry;
pReturn->bReleaseOnWake = true;
pReturn->pAddress = pAddress;
pReturn->uSize = uSize;
pReturn->uAtomic = 0;
{
Lock();
if (auto pLoadFromMemory = this->waitList.pHead)
{
pReturn->pNext = pLoadFromMemory;
pLoadFromMemory->pBefore = pReturn;
}
else
{
this->waitList.pTail = pReturn;
}
this->waitList.pHead = pReturn;
Unlock();
}
return pReturn;
}
template <typename T>
bool ProcessWaitContainer::IterateAll(T callback)
{
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
{
auto &entry = this->entries[i];
{
DoSpinLockOnVar(&entry.uAtomic);
if (entry.pAddress)
{
// Intentional lock/unlock order:
AU_LOCK_GUARD(entry.mutex);
entry.uAtomic = 0;
if (!callback(entry))
{
return false;
}
}
else
{
entry.uAtomic = 0;
}
}
}
#endif
{
Lock();
auto pCurrentHead = this->waitList.pHead;
while (pCurrentHead)
{
AU_LOCK_GUARD(pCurrentHead->mutex);
if (!callback(pCurrentHead))
{
Unlock();
return false;
}
pCurrentHead = pCurrentHead->pNext;
}
Unlock();
}
return true;
}
template <typename T>
bool ProcessWaitContainer::IterateForceNoCreateDuringOp(T callback)
{
bool bRetStatus { true };
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
{
auto &entry = this->entries[i];
{
DoSpinLockOnVar(&entry.uAtomic);
if (entry.pAddress)
{
AU_LOCK_GUARD(entry.mutex);
if (!callback(entry))
{
for (AU_ITERATE_N(z, i + 1))
{
this->entries[z].uAtomic = 0;
}
return false;
}
}
}
}
#endif
{
Lock();
auto pCurrentHead = this->waitList.pHead;
while (pCurrentHead)
{
AU_LOCK_GUARD(pCurrentHead->mutex);
if (!callback(*pCurrentHead))
{
bRetStatus = false;
break;
}
pCurrentHead = pCurrentHead->pNext;
}
Unlock();
}
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
{
auto &entry = this->entries[i];
{
entry.uAtomic = 0;
}
}
#endif
return bRetStatus;
}
template <typename T>
bool ProcessWaitContainer::IterateWake(T callback)
{
bool bRetStatus { true };
Lock();
{
// FIFO
auto pCurrentHead = this->waitList.pTail;
decltype(pCurrentHead) pLast {};
while (pCurrentHead)
{
AU_LOCK_GUARD(pCurrentHead->mutex);
auto [bCont, bRemove] = callback(*pCurrentHead);
if (bRemove)
{
if (pLast)
{
pLast->pNext = pCurrentHead->pNext;
}
if (this->waitList.pHead == pCurrentHead)
{
this->waitList.pHead = pCurrentHead->pNext;
}
if (pCurrentHead->pNext)
{
pCurrentHead->pNext->pBefore = pCurrentHead->pBefore;
}
if (this->waitList.pTail == pCurrentHead)
{
this->waitList.pTail = pCurrentHead->pBefore;
}
}
if (!bCont)
{
bRetStatus = false;
break;
}
pLast = pCurrentHead;
pCurrentHead = pCurrentHead->pBefore;
}
}
Unlock();
// meh - just so i can experiment with changes
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
{
auto &entry = this->entries[i];
{
DoSpinLockOnVar(&entry.uAtomic);
if (entry.pAddress)
{
AU_LOCK_GUARD(entry.mutex);
auto [bCont, bRemove] = callback(*entry);
if (!bCont)
{
for (AU_ITERATE_N(z, i + 1))
{
this->entries[z].uAtomic = 0;
}
return false;
}
}
}
}
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
{
auto &entry = this->entries[i];
{
entry.uAtomic = 0;
}
}
#endif
return bRetStatus;
}
void ProcessWaitContainer::Lock()
{
DoSpinLockOnVar(&this->uAtomic);
}
void ProcessWaitContainer::Unlock()
{
this->uAtomic = 0;
}
void ProcessWaitContainer::Remove(WaitEntry *pParent)
{
Lock();
{
auto pCurrent = this->waitList.pTail;
decltype(pCurrent) pLast {};
while (pCurrent)
{
if (pCurrent == pParent)
{
if (pLast)
{
pLast->pNext = pCurrent->pNext;
}
else if (this->waitList.pHead == pCurrent)
{
this->waitList.pHead = pCurrent->pNext;
}
if (pCurrent->pNext)
{
pCurrent->pNext->pBefore = pCurrent->pBefore;
}
if (this->waitList.pTail == pParent)
{
this->waitList.pTail = pLast;
}
break;
}
pLast = pCurrent;
pCurrent = pCurrent->pBefore;
}
}
Unlock();
}
AUKN_SYM bool IsWaitOnRecommended()
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return pWaitOnAddress &&
AuSwInfo::IsWindows8Point1OrGreater();
#elif defined(AURORA_PLATFORM_LINUX)
return true;
#endif
return false;
}
/// @deprecated
AUKN_SYM const AuList<AuUInt8> &GetValidWordSizes()
{
static const AuList<AuUInt8> kArray =
#if defined(AURORA_IS_MODERNNT_DERIVED)
{ 1, 2, 4, 8 };
#else
{ 4 };
#endif
return kArray;
}
bool WaitOnAddressWide(void *pTargetAddress,
void *pCompareAddress,
AuUInt8 uWordSize,
AuOptional<AuUInt64> qwNanoseconds,
AuOptional<AuUInt64> qwNanosecondsAbs,
bool bOSSupportsWait
)
{
WaitState state;
SysAssertDbg(uWordSize <= 8);
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize);
state.compare = WaitBuffer::From(pCompareAddress, uWordSize);
if (qwNanoseconds)
{
state.qwNanosecondsAbs = AuTime::SteadyClockNS() + qwNanoseconds.value();
}
else if (qwNanosecondsAbs)
{
state.qwNanosecondsAbs = qwNanosecondsAbs.value();
}
auto bResult = pWaitEntry->SleepOn(state);
#if defined(WOA_USE_DEFERRED_REL)
pWaitEntry->Release();
#endif
return bResult;
}
AuTuple<const void *, AuUInt8, AuOptionalEx<AuUInt32>> DecodeAddress(const void *pAddress,
AuUInt32 uWordSize)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return AuMakeTuple(pAddress, 0, AuOptionalEx<AuUInt32> {});
#endif
if (uWordSize == 8)
{
return AuMakeTuple(pAddress, 0xFFFFFFFF, 0xFFFFFFFF);
}
auto pRounded = AuPageRound(AuUInt(pAddress), AuUInt(4));
auto uDelta = (AuUInt)(pAddress) - (AuUInt)(pRounded);
AuUInt32 uSizeMask = std::pow(AuUInt64(2), AuUInt64(uWordSize * 8)) - 1ull;
switch (uDelta)
{
case 0:
return AuMakeTuple(pAddress, 0, 0xFFFFFFFF & (uSizeMask << 0));
case 1:
return AuMakeTuple(pAddress, 1, 0xFFFFFF00 & (uSizeMask << 8));
case 2:
return AuMakeTuple(pAddress, 2, 0xFFFF0000 & (uSizeMask << 16));
case 3:
return AuMakeTuple(pAddress, 3, 0xFF000000 & (uSizeMask << 24));
default:
SysPanic("Invalid Branch");
}
}
static bool RunOSWaitOnAddressNoTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 dwWordSize)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, dwWordSize, INFINITE);
#endif
#if defined(AURORA_IS_LINUX_DERIVED)
int ret {};
#if defined(AU_CPU_ENDIAN_BIG)
if (dwWordSize == 8)
{
pTargetAddress = AuReinterpretCast<const char *>(pTargetAddress) + 4;
pCompareAddress = AuReinterpretCast<const char *>(pCompareAddress) + 4;
}
#endif
auto uCurrent = *(AuUInt32 *)pCompareAddress;
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
do
{
ret = futex_wait((AuUInt32 *)pTargetAddress, uCurrent, nullptr);
if (ret == 0)
{
continue;
}
if (ret == EAGAIN || errno == EAGAIN)
{
continue;
}
if (ret == ETIMEDOUT || errno == ETIMEDOUT)
{
return false;
}
}
while (ret == EINTR);
return !expect.Compare(pTargetAddress);
#endif
return false;
}
static bool RunOSWaitOnAddressTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 uAbsTimeSteadyClock,
AuUInt64 uRelativeNanoseconds,
AuOptional<AuUInt64> uAbsTimeAltClock /* hint */)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
if (pRtlWaitOnAddress)
{
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
AuUInt64 uNow {};
while (uAbsTimeSteadyClock ?
(uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS())) :
true)
{
LARGE_INTEGER word {};
if (uAbsTimeAltClock)
{
word.QuadPart = AuTime::ConvertTimestampNs(uAbsTimeAltClock.value());
}
else if (uAbsTimeSteadyClock)
{
if (uAbsTimeSteadyClock <= uNow)
{
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
}
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
if (!word.QuadPart)
{
word.QuadPart = 1;
}
}
if (expect.Compare(pTargetAddress))
{
pRtlWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize, &word);
if (!expect.Compare(pTargetAddress))
{
return true;
}
else if (!uAbsTimeSteadyClock)
{
return false;
}
}
else
{
return true;
}
}
return false;
}
else
{
// ~~some paths might miss the uRelativeNanoseconds, like cas loops.~~
// most paths will now skimp on the relative values
if (uAbsTimeSteadyClock && !uRelativeNanoseconds)
{
AuInt64 iDelta = uAbsTimeSteadyClock;
iDelta -= AuTime::SteadyClockNS();
if (iDelta <= 0)
{
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
}
uRelativeNanoseconds = iDelta;
}
// LockN(<1MS) on a platform without that resolution of yielding... damn
auto uMS = AuNSToMS<AuUInt32>(uRelativeNanoseconds);
if (!uMS)
{
// take a copy
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
// first: cpu spin to avoid the kernel all together
if (TryWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize))
{
return true;
}
// second: yield
do
{
if (!expect.Compare(pTargetAddress))
{
break;
}
AuThreading::ContextYield();
}
while (uAbsTimeSteadyClock > AuTime::SteadyClockNS()); // ...until times up
}
else // high level lock function was called with ms scale resolution
{
// first: wait on the address with an ms scale timeout
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
// never trust the error value/status provided by wait addresses - instead, do a quick compare
if (!WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress))
{
// best case: we woke up during the ms-res waitonaddress
return true;
}
// attempt to yield again, potentially context switching a few times to hit any NS remainder
AuUInt64 uNow {};
unsigned uLimit {};
while (uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS()))
{
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
if (Primitives::DoTryIf([=]()
{
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
}))
{
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
return true;
}
if (!uMS)
{
// burn off any remainder cycles by switching contexts (this isnt a very long time usually)
if (uLimit++ < 4)
{
AuThreading::ContextYield();
}
else
{
// do not burn the cpu to meet the timeout. we'll just undershoot.
return false;
}
}
else
{
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
}
}
}
}
#endif
#if defined(AURORA_IS_LINUX_DERIVED)
int ret {};
#if defined(AU_CPU_ENDIAN_BIG)
if (uWordSize == 8)
{
pTargetAddress = AuReinterpretCast<const char *>(pTargetAddress) + 4;
pCompareAddress = AuReinterpretCast<const char *>(pCompareAddress) + 4;
}
#endif
auto uCurrent = *(AuUInt32 *)pCompareAddress;
struct timespec tspec;
Time::auabsns2ts(&tspec, uAbsTimeAltClock ? uAbsTimeAltClock.value() : uAbsTimeSteadyClock);
do
{
ret = futex_wait((AuUInt32 *)pTargetAddress, uCurrent, &tspec);
if (ret == 0)
{
continue;
}
if (ret == EAGAIN || errno == EAGAIN)
{
continue;
}
if (ret == ETIMEDOUT || errno == ETIMEDOUT)
{
return false;
}
}
while (ret == EINTR);
#endif
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
}
static void RunOSWaitOnAddressNoTimedNoErrors(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
{
while (WaitBuffer::From(pTargetAddress, state.uWordSize).Compare(state))
{
if (!RunOSWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
{
AuThreading::ContextYield();
}
}
}
static bool RunOSWaitOnAddressTimedSteady(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
{
if (!WaitBuffer::From(pTargetAddress, state.uWordSize).Compare(state))
{
return true;
}
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { });
return !WaitBuffer::From(pTargetAddress, state.uWordSize).Compare(state);
}
static void RunOSWakeNOnAddress(const void *pAddress,
AuUInt32 dwCount)
{
#if defined(AURORA_IS_LINUX_DERIVED)
futex_wake((AuUInt32 *)pAddress, dwCount);
#endif
#if defined(AURORA_IS_MODERNNT_DERIVED)
for (AuUInt i = 0; i < dwCount; i++)
{
pWakeByAddressSingle((void *)pAddress);
}
#endif
}
static void RunOSWakeAllOnAddress(const void *pAddress)
{
#if defined(AURORA_IS_LINUX_DERIVED)
futex_wake((AuUInt32 *)pAddress, INT_MAX);
#endif
#if defined(AURORA_IS_MODERNNT_DERIVED)
pWakeByAddressAll((void *)pAddress);
#endif
}
// Windows 8+ thread primitives might use me instead of the public API
// it does work on Linux and Windows 8+
// it does not, however, work on emulated platforms
// this is intentional
bool InternalLTSWaitOnAddressHighRes(void *pTargetAddress,
void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanosecondsAbs)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask ? 4 : uWordSize;
if (!qwNanosecondsAbs)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanosecondsAbs;
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state);
}
}
AUKN_SYM bool WaitOnAddress(void *pTargetAddress,
void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds)
{
bool bWaitOnAddress = IsWaitOnRecommended();
if (bWaitOnAddress)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask ? 4 : uWordSize;
if (!qwNanoseconds)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds + AuTime::SteadyClockNS();
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state);
}
}
else
{
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
return WaitOnAddressWide(pTargetAddress, pCompareAddress, uWordSize, qwNanoseconds, {}, false);
}
return false;
}
AUKN_SYM bool TryWaitOnAddress(void *pTargetAddress,
void *pCompareAddress,
AuUInt8 uWordSize)
{
return Primitives::DoTryIf([=]()
{
return !WaitBuffer::From(pCompareAddress, uWordSize).Compare(pTargetAddress);
});
}
AUKN_SYM void WakeNOnAddress(void *pTargetAddress,
AuUInt8 uNMaximumThreads)
{
if (IsWaitOnRecommended())
{
RunOSWakeNOnAddress(pTargetAddress, uNMaximumThreads);
}
else
{
#if defined(WOA_USE_DEFERRED_REL)
(void)gProcessWaitables.IterateForceNoCreateDuringOp([&](WaitEntry &entry) -> bool
#else
(void)gProcessWaitables.IterateWake([&](WaitEntry &entry) -> AuPair<bool, bool>
#endif
{
if (!uNMaximumThreads)
{
#if defined(WOA_USE_DEFERRED_REL)
return false;
#else
return AuMakePair(false, false);
#endif
}
bool bWake {};
if (entry.TryWakeNoLockNoReallyNoLock(pTargetAddress))
{
bWake = true;
uNMaximumThreads--;
}
bool bCont = uNMaximumThreads != 0;
#if defined(WOA_USE_DEFERRED_REL)
return bCont;
#else
return AuMakePair(bCont, bWake);
#endif
});
}
}
AUKN_SYM void WakeOnAddress(void *pTargetAddress)
{
WakeNOnAddress(pTargetAddress, 1);
}
AUKN_SYM void WakeAllOnAddress(void *pTargetAddress)
{
if (IsWaitOnRecommended())
{
RunOSWakeAllOnAddress(pTargetAddress);
}
else
{
#if defined(WOA_USE_DEFERRED_REL)
(void)gProcessWaitables.IterateForceNoCreateDuringOp([&](WaitEntry &entry) -> bool
#else
(void)gProcessWaitables.IterateWake([&](WaitEntry &entry) -> AuPair<bool, bool>
#endif
{
#if defined(WOA_USE_DEFERRED_REL)
entry.TryWakeNoLockNoReallyNoLock(pTargetAddress);
return true;
#else
return AuMakePair(true, entry.TryWakeNoLockNoReallyNoLock(pTargetAddress));
#endif
});
}
}
AUKN_SYM bool WaitOnAddressSteady(void *pTargetAddress,
void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds)
{
bool bWaitOnAddress = IsWaitOnRecommended();
if (bWaitOnAddress)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask ? 4 : uWordSize;
if (!qwNanoseconds)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds;
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state);
}
}
else
{
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
return WaitOnAddressWide(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds, false);
}
return false;
}
// Future (Reece): AuThread aware (safe force-terminate)
// There are three ways we can go about this:
// Shared pointers
// Shared pointers such that we dont need to remove the raw pointer optimization
// Callback on thread death
//
// 1st would increase overhead for a case i dont want to condone
// 2nd would work but would probably require a callback on death
// 3rd would work.
//
// to be addressed later
}