AuroraRuntime/Source/Threading/AuWakeOnAddress.cpp

1582 lines
56 KiB
C++
Raw Normal View History

/***
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuWakeOnAddress.cpp
Date: 2023-3-10
Author: Reece
***/
#if defined(AURORA_COMPILER_MSVC)
#pragma strict_gs_check(off)
#pragma check_stack(off)
#endif
#include <Source/RuntimeInternal.hpp>
#include "AuWakeOnAddress.hpp"
#include "Primitives/SMTYield.hpp"
#include <Time/Time.hpp>
#define HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD
2023-12-21 15:54:05 +00:00
// WOA_ALWAYS_DUMB_OS_TARGET -> iOS, notarized MacOS, Win9x, Xbox 360, etc
2024-11-21 17:17:09 +00:00
// This will tank the average case:
// > fast paths will be disabled,
// > lock barriers turn into full lock guards,
// > and every sleep will require a condvar or semaphore wakeup.
// Every perf trick will be bypassed.
// Update: partially removed. its not ready yet
2024-11-21 17:17:09 +00:00
//#define WOA_STRICTER_FIFO
namespace Aurora::Threading
{
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
static thread_local AuSPtr<WaitEntry> tlsWaitEntry = AuMakeSharedPanic<WaitEntry>();
#else
static thread_local WaitEntry tlsWaitEntry;
#endif
#define DO_OF_METHOD_TYPE(preface, DoOfMethodType, ...) \
switch (eMethod) \
{ \
case EWaitMethod::eNotEqual: \
preface DoOfMethodType<EWaitMethod::eNotEqual>(__VA_ARGS__); \
break; \
case EWaitMethod::eEqual: \
preface DoOfMethodType<EWaitMethod::eEqual>(__VA_ARGS__); \
break; \
case EWaitMethod::eGreaterThanCompare: \
preface DoOfMethodType<EWaitMethod::eGreaterThanCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eGreaterThanOrEqualsCompare: \
preface DoOfMethodType<EWaitMethod::eGreaterThanOrEqualsCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eLessThanCompare: \
preface DoOfMethodType<EWaitMethod::eLessThanCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eLessThanOrEqualsCompare: \
preface DoOfMethodType<EWaitMethod::eLessThanOrEqualsCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eAnd: \
preface DoOfMethodType<EWaitMethod::eAnd>(__VA_ARGS__); \
break; \
case EWaitMethod::eNotAnd: \
preface DoOfMethodType<EWaitMethod::eNotAnd>(__VA_ARGS__); \
break; \
}
static const int gShouldSpinOnlyInCPU = 1; // TODO: havent decided
// UPDATE: 1 paranoia just in case we get preempted (rare).
template<typename T>
static void DoSpinLockOnVar(T *uPointer)
{
if (gShouldSpinOnlyInCPU == 0)
{
while (!Primitives::DoTryIfAlderLake([&]()
{
return AuAtomicTestAndSet(uPointer, 0) == 0;
}, uPointer))
{
}
}
else if (gShouldSpinOnlyInCPU == 1)
{
while (!Primitives::DoTryIfAlderLake([&]()
{
return AuAtomicTestAndSet(uPointer, 0) == 0;
}, uPointer))
{
ContextYield();
}
}
else if (gShouldSpinOnlyInCPU == 2)
{
while (AuAtomicTestAndSet(uPointer, 0))
{
while (*uPointer)
{
ContextYield();
}
}
}
else
{
SysUnreachable();
}
}
void WaitEntry::Release()
{
AuResetMember(this->uSize);
AuResetMember(this->pAddress);
}
2023-08-21 16:34:24 +00:00
WaitEntry::WaitEntry()
{
}
WaitEntry::~WaitEntry()
{
this->Release();
}
template <EWaitMethod eMethod>
bool WaitEntry::SleepOn(WaitState &state)
{
2023-12-10 19:25:31 +00:00
#if !defined(WOA_SEMAPHORE_MODE)
AU_LOCK_GUARD(this->mutex);
2023-12-10 19:25:31 +00:00
#endif
if (state.qwNanosecondsAbs)
{
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
auto uNow = AuTime::SteadyClockNS();
auto uEndTime = state.qwNanosecondsAbs.value();
while (uNow < uEndTime)
{
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
#if defined(AURORA_PLATFORM_WIN32)
Win32DropSchedulerResolution();
#endif
// potentially release/acquire-less by virtue of the lockless semaphore mode
if (!AuAtomicLoad(&this->bAlive))
2023-12-10 18:21:35 +00:00
{
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Unlock();
#endif
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2, eMethod);
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Lock();
#endif
2024-11-21 17:17:09 +00:00
#if defined(WOA_STRICTER_FIFO)
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
#endif
continue;
2023-12-10 18:21:35 +00:00
}
2024-11-21 17:17:09 +00:00
2023-12-10 18:21:35 +00:00
{
2023-12-10 19:25:31 +00:00
#if defined(WOA_SEMAPHORE_MODE)
this->semaphore->LockAbsNS(uEndTime);
#else
auto uTimeRemNS = uEndTime - uNow;
this->variable.WaitForSignalNsEx(&this->mutex, uTimeRemNS, false);
2023-12-10 19:25:31 +00:00
#endif
2023-12-10 18:21:35 +00:00
}
uNow = AuTime::SteadyClockNS();
}
return !WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask);
}
else
{
while (WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
if (!AuAtomicLoad(&this->bAlive))
2023-12-10 18:21:35 +00:00
{
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Unlock();
#endif
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2, eMethod);
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Lock();
#endif
2024-11-21 17:17:09 +00:00
continue;
2023-12-10 18:21:35 +00:00
}
2024-11-21 17:17:09 +00:00
2023-12-10 18:21:35 +00:00
{
2023-12-10 19:25:31 +00:00
#if defined(WOA_SEMAPHORE_MODE)
this->semaphore->Lock();
#else
this->variable.WaitForSignalNsEx(&this->mutex, 0, false);
2023-12-10 19:25:31 +00:00
#endif
2023-12-10 18:21:35 +00:00
}
}
return true;
}
return false;
}
bool WaitEntry::TrySignalAddress(const void *pAddress)
{
if (this->pAddress != pAddress)
{
return false;
}
if (this->pCompareAddress)
{
if (WaitBuffer::Compare(pAddress, this->uSize, this->pCompareAddress, kMax64, this->eWaitMethod))
{
return false;
}
}
2023-12-10 19:25:31 +00:00
#if defined(WOA_SEMAPHORE_MODE)
this->semaphore->Unlock(1);
#else
this->variable.Signal();
2023-12-10 19:25:31 +00:00
#endif
return true;
}
WaitBuffer WaitBuffer::From(const void *pBuf, AuUInt8 uSize)
{
WaitBuffer wait;
AuMemcpy(wait.buffer, pBuf, uSize);
wait.uSize = uSize;
return AuMove(wait);
}
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, WaitState &state)
{
auto eMethod = state.eWaitMethod;
return WaitBuffer::Compare(pHotAddress, uSize, state.compare.buffer, state.uDownsizeMask, eMethod);
}
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, const void *pCompare, AuUInt64 uMask, EWaitMethod eMethod)
{
bool bRet {};
2024-03-21 00:28:50 +00:00
AURORA_COMPILER_VOLATILE_BARRIER();
#if 0
switch (eMethod)
{
case EWaitMethod::eEqual:
case EWaitMethod::eNotEqual:
{
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(pHotAddress);
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(pCompare);
bRet = (uSrcWord & uMask) == (uCmpWord & uMask);
bRet ^= bool(eMethod == EWaitMethod::eEqual);
break;
};
default:
{
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
}
}
#else
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
#endif
return bRet;
}
template <EWaitMethod eMethod, bool bFast>
bool WaitBuffer::Compare2(const void *pHot, AuUInt8 uSize, const void *pBuf2, AuUInt64 uMask)
{
return Compare2<eMethod, bFast>((const volatile void *)pHot, uSize, pBuf2, uMask);
}
template <EWaitMethod eMethod, bool bFast>
bool WaitBuffer::Compare2(const volatile void *pHot, AuUInt8 uSize, const void *pBuf2, AuUInt64 uMask)
{
#if !defined(AURORA_COMPILER_CLANG) && !defined(AURORA_COMPILER_MSVC)
2024-03-21 00:28:50 +00:00
AURORA_COMPILER_VOLATILE_BARRIER();
#endif
if constexpr (!bFast)
2023-11-29 05:34:57 +00:00
{
if constexpr (eMethod == EWaitMethod::eNotEqual)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask);
case 2:
return (AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask);
case 4:
return (AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask);
case 8:
return (AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask);
default:
return (AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eEqual)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask));
default:
return !(AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) > (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) > (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) > (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) > (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) >= (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) >= (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) >= (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) >= (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) < (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) < (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) < (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) < (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) <= (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) <= (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) <= (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) <= (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eAnd)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eNotAnd)
{
switch (uSize)
{
case 1:
return ((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask));
case 2:
return ((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask));
case 4:
return ((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask));
case 8:
return ((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
}
else
{
if constexpr (eMethod == EWaitMethod::eNotEqual)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0));
case 2:
return (AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0));
case 4:
return (AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0));
case 8:
return (AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0));
default:
return (AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eEqual)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0)));
default:
return !(AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) > (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) > (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) > (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) > (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) >= (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) >= (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) >= (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) >= (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) < (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) < (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) < (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) < (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) <= (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) <= (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) <= (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) <= (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eAnd)
{
switch (uSize)
{
case 1:
return !(AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0));
case 2:
return !(AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0));
case 4:
return !(AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0));
case 8:
return !(AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eNotAnd)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0));
case 2:
return (AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0));
case 4:
return (AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0));
case 8:
return (AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0));
default:
return false;
}
}
}
return false;
}
WaitEntry *ProcessWaitNodeContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress, EWaitMethod eWaitMethod)
{
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
auto pReturn = tlsWaitEntry.get();
#else
auto pReturn = &tlsWaitEntry;
#endif
pReturn->pAddress = pAddress;
pReturn->uSize = uSize;
pReturn->pCompareAddress = pCompareAddress;
pReturn->eWaitMethod = eWaitMethod;
2023-12-11 18:22:38 +00:00
if (bScheduleFirst /*First in, First Out*/)
{
Lock();
2023-12-11 20:29:45 +00:00
if (!pReturn->bAlive)
{
2023-12-11 20:29:45 +00:00
pReturn->bAlive = true;
2023-12-11 18:22:38 +00:00
if (auto pLoadFromMemory = this->waitList.pHead)
{
pLoadFromMemory->pBefore = pReturn;
pReturn->pNext = pLoadFromMemory;
}
else
{
this->waitList.pTail = pReturn;
}
this->waitList.pHead = pReturn;
}
2023-12-11 18:22:38 +00:00
Unlock();
}
else /*Last In, First Out*/
{
Lock();
if (!pReturn->bAlive)
{
2023-12-11 20:29:45 +00:00
pReturn->bAlive = true;
2023-12-11 18:22:38 +00:00
if (auto pLoadFromMemory = this->waitList.pTail)
{
pLoadFromMemory->pNext = pReturn;
pReturn->pBefore = pLoadFromMemory;
}
else
{
this->waitList.pHead = pReturn;
}
this->waitList.pTail = pReturn;
}
Unlock();
}
return pReturn;
}
template <typename T>
bool ProcessWaitNodeContainer::IterateWake(T callback)
{
bool bRetStatus { true };
if (AuAtomicLoad((AuUInt *)&this->waitList.pTail) == 0)
{
return true;
}
Lock();
{
// FIFO
auto pCurrentHead = this->waitList.pTail;
while (pCurrentHead)
{
2023-12-21 15:54:05 +00:00
decltype(pCurrentHead) pBefore {};
#if !defined(WOA_SEMAPHORE_MODE) && defined(WOA_STRICTER_FIFO)
// do { ... | bAlive consumer | } while (...) lock guard
AU_LOCK_GUARD(pCurrentHead->mutex);
#elif !defined(WOA_SEMAPHORE_MODE) && !defined(WOA_STRICTER_FIFO)
// Condvar wait-list insertion barrier required for binary-semaphore-like conditions.
// We only need to lock against the { lock() if (should Sleep) { *** waitList++; *** unlock(); yield(); lock() } else { ... } unlocks() } condvar pattern.
// I often only care about the order of ->Signal() after the CondVar::waitList++ to ensure one state change is paired with one waitList signal.
// Don't block during the wakeup check, we only care about observing the condvar or semaphore waitlist, no codeguarding mutex required.
2023-12-05 07:15:54 +00:00
{
AU_LOCK_GUARD(pCurrentHead->mutex);
}
2023-12-10 19:25:31 +00:00
#endif
AuAtomicStore<AuUInt8>(&pCurrentHead->bAlive, 0);
auto [bCont, bRemove] = callback(*pCurrentHead);
2023-12-21 15:54:05 +00:00
pBefore = pCurrentHead->pBefore;
if (bRemove)
{
this->RemoveEntry<true>(pCurrentHead);
}
else
{
AuAtomicStore<AuUInt8>(&pCurrentHead->bAlive, 1);
}
if (!bCont)
{
bRetStatus = false;
break;
}
2023-12-21 15:54:05 +00:00
if (pBefore == pCurrentHead)
{
break;
}
2023-12-21 15:54:05 +00:00
pCurrentHead = pBefore;
}
}
Unlock();
return bRetStatus;
}
template <bool bAllUnderLock>
void ProcessWaitNodeContainer::RemoveEntry(WaitEntry *pEntry)
{
2023-12-21 15:54:05 +00:00
if (this->waitList.pHead == pEntry)
{
2023-12-21 15:54:05 +00:00
this->waitList.pHead = pEntry->pNext;
}
2023-12-21 15:54:05 +00:00
if (this->waitList.pTail == pEntry)
{
this->waitList.pTail = pEntry->pBefore;
}
if (auto pBefore = pEntry->pBefore)
2023-12-21 15:54:05 +00:00
{
pBefore->pNext = pEntry->pNext;
2023-12-21 15:54:05 +00:00
}
if (auto pNext = pEntry->pNext)
2023-12-21 15:54:05 +00:00
{
pNext->pBefore = pEntry->pBefore;
2023-12-21 15:54:05 +00:00
}
2023-12-21 15:54:05 +00:00
if (bAllUnderLock)
{
pEntry->pBefore = nullptr;
pEntry->pNext = nullptr;
//pEntry->bAlive = false; - redundant
2023-12-21 15:54:05 +00:00
}
}
2023-12-21 15:54:05 +00:00
void ProcessWaitNodeContainer::RemoveSelf(WaitEntry *pSelf)
{
{
this->Lock();
this->RemoveEntry<false>(pSelf);
2023-12-21 15:54:05 +00:00
this->Unlock();
}
2023-12-21 15:54:05 +00:00
pSelf->pBefore = nullptr;
pSelf->pNext = nullptr;
pSelf->bAlive = false;
}
void ProcessWaitNodeContainer::Lock()
{
DoSpinLockOnVar(&this->uAtomic);
}
void ProcessWaitNodeContainer::Unlock()
{
AuAtomicClearU8Lock(&this->uAtomic);
}
#define AddressToIndex AuHashCode(pAddress) & (AuArraySize(this->list) - 1)
WaitEntry *ProcessWaitContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress, EWaitMethod eWaitMethod)
{
return this->list[AddressToIndex].WaitBufferFrom(pAddress, uSize, bScheduleFirst, pCompareAddress, eWaitMethod);
}
template <typename T>
bool ProcessWaitContainer::IterateWake(const void *pAddress, T callback)
{
return this->list[AddressToIndex].IterateWake(callback);
}
void ProcessWaitContainer::RemoveSelf(const void *pAddress, WaitEntry *pSelf)
{
return this->list[AddressToIndex].RemoveSelf(pSelf);
}
bool IsNativeWaitOnSupported()
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return pWaitOnAddress &&
AuSwInfo::IsWindows8Point1OrGreater();
#elif defined(AURORA_PLATFORM_LINUX)
return true;
#else
return SysNativeWaitOnAddressFutexSupported();
#endif
}
2024-03-12 23:32:32 +00:00
AUKN_SYM bool IsWaitOnRecommended()
{
2023-12-21 15:54:05 +00:00
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
return false;
#endif
2024-11-21 17:17:09 +00:00
#if defined(WOA_STRICTER_FIFO)
return false;
#endif
2023-08-19 17:49:16 +00:00
static AuOptionalEx<bool> gIsWaitOnRecommendedCache {};
if (gIsWaitOnRecommendedCache)
{
return gIsWaitOnRecommendedCache.value();
}
if (Primitives::ThrdCfg::gPreferEmulatedWakeOnAddress)
{
return false;
}
bool bState = IsNativeWaitOnSupported();
2023-08-19 17:49:16 +00:00
gIsWaitOnRecommendedCache = bState;
return bState;
}
/// @deprecated
AUKN_SYM const AuList<AuUInt8> &GetValidWordSizes()
{
static const AuList<AuUInt8> kArray =
#if defined(AURORA_IS_MODERNNT_DERIVED)
{ 1, 2, 4, 8 };
#else
{ 4 };
#endif
return kArray;
}
template <EWaitMethod T>
bool WaitOnAddressWide(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuOptional<AuUInt64> qwNanoseconds,
AuOptional<AuUInt64> qwNanosecondsAbs,
bool bOSSupportsWait,
const void *pCompareAddress2)
{
WaitState state;
SysAssertDbg(uWordSize <= 32);
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize, true, pCompareAddress2, T);
// Unlocked update to a safer comparison address; hardens against bad code
{
state.compare = WaitBuffer::From(pCompareAddress, uWordSize);
// Replace from pCompareAddress2 to our own memory to harden against bad volatile comparison pointers
pWaitEntry->pCompareAddress = state.pCompare2 =
pCompareAddress2 ? state.compare.buffer : nullptr;
}
if (qwNanoseconds)
{
state.qwNanosecondsAbs = AuTime::SteadyClockNS() + qwNanoseconds.value();
}
else if (qwNanosecondsAbs)
{
state.qwNanosecondsAbs = qwNanosecondsAbs.value();
}
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
auto pTempHoldMe = tlsWaitEntry;
#endif
auto bResult = pWaitEntry->SleepOn<T>(state);
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
pTempHoldMe.reset();
#endif
if (!bResult)
{
gProcessWaitables.RemoveSelf(pTargetAddress, pWaitEntry);
}
return bResult;
}
AuTuple<const void *, AuUInt8, AuUInt64> DecodeAddress(const void *pAddress,
AuUInt32 uWordSize)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return AuMakeTuple(pAddress, 0, kMax64);
#endif
auto pRounded = AuPageRound(AuUInt(pAddress), AuUInt(4));
auto uDelta = (AuUInt)pAddress - pRounded;
if (uWordSize == 8)
{
return AuMakeTuple((const void *)pRounded, uDelta, kMax64);
}
AuUInt32 uSizeMask = (1ull << (uWordSize * 8)) - 1ull;
switch (uDelta)
{
case 0:
return AuMakeTuple(pAddress, 0, 0xFFFFFFFF & (uSizeMask << 0));
case 1:
return AuMakeTuple(pAddress, 1, 0xFFFFFF00 & (uSizeMask << 8));
case 2:
return AuMakeTuple(pAddress, 2, 0xFFFF0000 & (uSizeMask << 16));
case 3:
return AuMakeTuple(pAddress, 3, 0xFF000000 & (uSizeMask << 24));
default:
SysPanic("Invalid Branch");
}
}
static bool RunOSWaitOnAddressTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 uAbsTimeSteadyClock,
AuUInt64 uRelativeNanoseconds,
AuOptional<AuUInt64> uAbsTimeAltClock /* hint */,
bool bSpun = false)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
if (pRtlWaitOnAddress)
{
AuUInt64 uNow {};
while (uAbsTimeSteadyClock ?
(uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS())) :
true)
{
LARGE_INTEGER word {};
if (uAbsTimeAltClock)
{
word.QuadPart = AuTime::ConvertTimestampNs(uAbsTimeAltClock.value());
}
else if (uAbsTimeSteadyClock)
{
if (uAbsTimeSteadyClock <= uNow)
{
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
if (!word.QuadPart)
{
word.QuadPart = 1;
}
}
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
if (pRtlWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize, &word))
{
return true;
}
else if (!uAbsTimeSteadyClock)
{
return false;
}
}
else
{
return true;
}
}
return false;
}
else
{
// ~~some paths might miss the uRelativeNanoseconds, like cas loops.~~
// most paths will now skimp on the relative values
if (uAbsTimeSteadyClock && !uRelativeNanoseconds)
{
AuInt64 iDelta = uAbsTimeSteadyClock;
iDelta -= AuTime::SteadyClockNS();
if (iDelta <= 0)
{
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}
uRelativeNanoseconds = iDelta;
}
auto uMaxSwitches = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
auto bUWPNanosecondEmulationCheckFirst = Primitives::ThrdCfg::gUWPNanosecondEmulationCheckFirst;
// LockN(<1MS) on a platform without that resolution of yielding... damn
auto uMS = AuNSToMS<AuUInt32>(uRelativeNanoseconds);
if (!uMS)
{
// first: cpu spin to avoid the kernel all together
if (!bSpun)
{
if (TryWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize))
{
return true;
}
}
// second: yield
unsigned uLimit {};
do
{
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
break;
}
AuThreading::ContextYield();
if (bUWPNanosecondEmulationCheckFirst)
{
if (uLimit++ > uMaxSwitches)
{
break;
}
}
}
while (uAbsTimeSteadyClock > AuTime::SteadyClockNS()); // ...until times up
}
else // high level lock function was called with ms scale resolution
{
// first: wait on the address with an ms scale timeout
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
// take a copy
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
// never trust the error value/status provided by wait addresses - instead, do a quick compare
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
// best case: we woke up during the ms-res waitonaddress
return true;
}
// attempt to yield again, potentially context switching a few times to hit any NS remainder
AuUInt64 uNow {};
unsigned uLimit {};
while (uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS()))
{
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
if (Primitives::DoTryIfAlderLake([&]()
{
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}, pTargetAddress))
{
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
return true;
}
if (!uMS)
{
// burn off any remainder cycles by switching contexts (this isnt a very long time usually)
if (uLimit++ < uMaxSwitches)
{
AuThreading::ContextYield();
}
else
{
// do not burn the cpu to meet the timeout. we'll just undershoot.
return false;
}
}
else
{
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
}
}
}
}
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
#else
return SysWaitOnAddressTimed(pTargetAddress,
pCompareAddress,
uWordSize,
uAbsTimeSteadyClock,
uRelativeNanoseconds,
uAbsTimeAltClock,
bSpun);
#endif
}
static void RunOSWaitOnAddressNoTimedNoErrors(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
{
while (WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
if (!SysWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
{
//AuThreading::ContextYield();
}
}
}
static bool RunOSWaitOnAddressTimedSteady(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state,
bool bSpun = false)
{
#if 1
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return true;
}
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask);
#else
return RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
#endif
}
template <EWaitMethod T>
static void RunOSWaitOnAddressEQNoTimedNoErrors(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
{
while (true)
{
WaitBuffer wb = WaitBuffer::From(pTargetAddress, state.uWordSize);
if (!WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return;
}
(void)SysWaitOnAddressNoTimed(pTargetAddress, wb.buffer, state.uWordSize);
if (WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
SysWakeOneOnAddress(pTargetAddress);
}
else
{
return;
}
}
}
template <EWaitMethod T>
static bool RunOSWaitOnAddressEQTimedSteady(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state,
bool bSpun = false)
{
while (true)
{
WaitBuffer wb = WaitBuffer::From(pTargetAddress, state.uWordSize);
if (!WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return true;
}
bool bResult = RunOSWaitOnAddressTimed(pTargetAddress, wb.buffer, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
if (WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
SysWakeOneOnAddress(pTargetAddress);
if (!bResult)
{
return false;
}
}
else
{
return true;
}
}
}
// Windows 8+ thread primitives might use me instead of the public API
// it does work on Linux and Windows 8+
// it does not, however, work on emulated platforms
// this is intentional
bool InternalLTSWaitOnAddressHighRes(const void *pTargetAddress,
2023-09-02 03:55:43 +00:00
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanosecondsAbs)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
if (!qwNanosecondsAbs)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanosecondsAbs;
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, true);
}
}
void InternalLTSWakeAll(const void *pTargetAddress)
{
2023-12-21 15:54:05 +00:00
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
WakeAllOnAddress(pTargetAddress);
#else
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
SysWakeAllOnAddress(pWakeAddress);
2023-12-21 15:54:05 +00:00
#endif
}
void InternalLTSWakeOne(const void *pTargetAddress)
{
2023-12-21 15:54:05 +00:00
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
WakeOnAddress(pTargetAddress);
#else
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
if (uDelta)
{
SysWakeAllOnAddress(pWakeAddress);
}
else
{
SysWakeNOnAddress(pWakeAddress, 1);
}
2023-12-21 15:54:05 +00:00
#endif
}
void InternalLTSWakeCount(const void *pTargetAddress, AuUInt32 uCount)
{
2023-12-21 15:54:05 +00:00
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
WakeNOnAddress(pTargetAddress, uCount);
#else
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
if (uDelta)
{
SysWakeAllOnAddress(pWakeAddress);
}
else
{
SysWakeNOnAddress(pWakeAddress, uCount);
}
2023-12-21 15:54:05 +00:00
#endif
}
WOAFASTPUB bool WaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
{
return true;
}
2024-11-21 17:17:09 +00:00
#endif
return WaitOnAddressSteady(pTargetAddress,
pCompareAddress,
uWordSize,
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
optAlreadySpun);
}
WOAFASTPUB bool WaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
{
return true;
}
2024-11-21 17:17:09 +00:00
#endif
return WaitOnAddressSpecialSteady(eMethod,
pTargetAddress,
pCompareAddress,
uWordSize,
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
optAlreadySpun);
}
template <EWaitMethod T>
auline bool TryWaitOnAddressSpecialTmpl(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
return Primitives::DoTryIfAlderLake([&]()
{
return !WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress);
}, pTargetAddress);
2024-11-21 17:17:09 +00:00
#else
return false;
#endif
}
WOAFASTPUB bool TryWaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
return TryWaitOnAddressSpecialTmpl<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize);
}
WOAFASTPUB bool TryWaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialTmpl, pTargetAddress, pCompareAddress, uWordSize);
2024-11-21 17:17:09 +00:00
#endif
return false;
}
WOAFASTPUB bool TryWaitOnAddressEx(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
if (!check)
{
return TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize);
}
return Primitives::DoTryIfAlderLake([&]()
{
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
return false;
}
return check(pTargetAddress, pCompareAddress, uWordSize);
}, pTargetAddress);
}
template <EWaitMethod T>
bool TryWaitOnAddressSpecialExTmpl(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
return Primitives::DoTryIfAlderLake([&]()
{
if (WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress))
{
return false;
}
return check(pTargetAddress, pCompareAddress, uWordSize);
}, pTargetAddress);
2024-11-21 17:17:09 +00:00
#else
return false;
#endif
}
WOAFASTPUB bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
if (!check)
{
return TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize);
}
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialExTmpl, pTargetAddress, pCompareAddress, uWordSize, check);
2024-11-21 17:17:09 +00:00
#endif
return false;
}
WOAFASTPUB void WakeNOnAddress(const void *pTargetAddress,
AuUInt8 uNMaximumThreads)
{
if (IsWaitOnRecommended())
{
2023-08-30 23:50:54 +00:00
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
if (uDelta)
{
SysWakeAllOnAddress(pWakeAddress);
}
else
{
SysWakeNOnAddress(pWakeAddress, uNMaximumThreads);
}
}
else
{
(void)gProcessWaitables.IterateWake(pTargetAddress, [&](WaitEntry &entry) -> AuPair<bool, bool>
{
if (!uNMaximumThreads)
{
return AuMakePair(false, false);
}
bool bWake {};
if (entry.TrySignalAddress(pTargetAddress))
{
bWake = true;
uNMaximumThreads--;
}
bool bCont = uNMaximumThreads != 0;
return AuMakePair(bCont, bWake);
});
}
}
WOAFASTPUB void WakeOnAddress(const void *pTargetAddress)
{
WakeNOnAddress(pTargetAddress, 1);
}
WOAFASTPUB void WakeAllOnAddress(const void *pTargetAddress)
{
if (IsWaitOnRecommended())
{
2023-08-30 23:50:54 +00:00
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
SysWakeAllOnAddress(pWakeAddress);
}
else
{
(void)gProcessWaitables.IterateWake(pTargetAddress, [&](WaitEntry &entry) -> AuPair<bool, bool>
{
return AuMakePair(true, entry.TrySignalAddress(pTargetAddress));
});
}
}
WOAFASTPUB bool WaitOnAddressSteady(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
2024-11-21 17:17:09 +00:00
{
#if !defined(WOA_STRICTER_FIFO)
2023-12-10 18:21:35 +00:00
// Avoid emulated path dynamic TLS fetch without TLS section
// or various security checks
// or other such bloated thunks
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
2023-12-10 18:21:35 +00:00
{
return true;
}
2024-11-21 17:17:09 +00:00
#endif
2023-12-10 18:21:35 +00:00
bool bWaitOnAddress = IsWaitOnRecommended();
if (bWaitOnAddress)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
bool bSpun {};
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
bSpun = true;
}
if (!qwNanoseconds)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds;
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, bSpun);
}
}
else
{
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
}
return WaitOnAddressWide<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64>{}, false, nullptr);
}
return false;
}
WOAFASTPUB bool WaitOnAddressSpecialSteady(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
2024-11-21 17:17:09 +00:00
#if !defined(WOA_STRICTER_FIFO)
// Avoid emulated path dynamic TLS fetch without TLS section
// or various security checks
// or other such bloated thunks
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
{
return true;
}
2024-11-21 17:17:09 +00:00
#endif
bool bWaitOnAddress = IsWaitOnRecommended();
if (bWaitOnAddress)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
state.pCompare2 = pCompareAddress;
state.eWaitMethod = eMethod;
bool bSpun {};
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
bSpun = true;
}
if (!qwNanoseconds)
{
DO_OF_METHOD_TYPE(, RunOSWaitOnAddressEQNoTimedNoErrors, pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds;
DO_OF_METHOD_TYPE(return, RunOSWaitOnAddressEQTimedSteady, pWaitAddress, pCompareAddress2, state, bSpun);
}
}
else
{
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
}
DO_OF_METHOD_TYPE(return, WaitOnAddressWide, pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64> {}, false, pCompareAddress);
}
return false;
}
}