2363 lines
79 KiB
C++
2363 lines
79 KiB
C++
/***
|
|
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: AuWakeOnAddress.cpp
|
|
Date: 2023-3-10
|
|
Author: Reece
|
|
***/
|
|
|
|
#if defined(AURORA_COMPILER_MSVC)
|
|
#pragma strict_gs_check(off)
|
|
#pragma check_stack(off)
|
|
#endif
|
|
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "AuWakeOnAddress.hpp"
|
|
#include "Primitives/SMTYield.hpp"
|
|
|
|
#include <Time/Time.hpp>
|
|
#define HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD
|
|
// WOA_ALWAYS_DUMB_OS_TARGET -> iOS, notarized MacOS, Win9x, Xbox 360, etc
|
|
|
|
// This will tank the average case:
|
|
// > fast paths will be disabled,
|
|
// > lock barriers turn into full lock guards,
|
|
// > and every sleep will require a condvar or semaphore wakeup.
|
|
// Every perf trick will be bypassed.
|
|
// Update: partially removed. its not ready yet
|
|
//#define WOA_STRICTER_FIFO
|
|
|
|
//#define WOA_WAIT_SIDE_NO_ABSORB
|
|
|
|
namespace Aurora::Threading
|
|
{
|
|
static WaitEntry **GetPBeforeFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress);
|
|
static WaitEntry **GetPNextFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress);
|
|
static WaitEntry **GetPLastFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress);
|
|
static WaitEntry **GetPFirstFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress);
|
|
static const void *GetPCompareFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress);
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
static thread_local AuSPtr<WaitEntry> tlsWaitEntry = AuMakeSharedPanic<WaitEntry>();
|
|
#else
|
|
static thread_local WaitEntry tlsWaitEntry;
|
|
#endif
|
|
|
|
#define DO_OF_METHOD_TYPE(preface, DoOfMethodType, ...) \
|
|
switch (eMethod) \
|
|
{ \
|
|
case EWaitMethod::eNotEqual: \
|
|
preface DoOfMethodType<EWaitMethod::eNotEqual>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eEqual: \
|
|
preface DoOfMethodType<EWaitMethod::eEqual>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eGreaterThanCompare: \
|
|
preface DoOfMethodType<EWaitMethod::eGreaterThanCompare>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eGreaterThanOrEqualsCompare: \
|
|
preface DoOfMethodType<EWaitMethod::eGreaterThanOrEqualsCompare>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eLessThanCompare: \
|
|
preface DoOfMethodType<EWaitMethod::eLessThanCompare>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eLessThanOrEqualsCompare: \
|
|
preface DoOfMethodType<EWaitMethod::eLessThanOrEqualsCompare>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eAnd: \
|
|
preface DoOfMethodType<EWaitMethod::eAnd>(__VA_ARGS__); \
|
|
break; \
|
|
case EWaitMethod::eNotAnd: \
|
|
preface DoOfMethodType<EWaitMethod::eNotAnd>(__VA_ARGS__); \
|
|
break; \
|
|
}
|
|
|
|
static const int gShouldSpinOnlyInCPU = 1; // TODO: havent decided
|
|
// UPDATE: 1 paranoia just in case we get preempted (rare).
|
|
template<typename T>
|
|
static void DoSpinLockOnVar(T *uPointer)
|
|
{
|
|
if (gShouldSpinOnlyInCPU == 0)
|
|
{
|
|
while (!Primitives::DoTryIfAlderLake([&]()
|
|
{
|
|
return AuAtomicTestAndSet(uPointer, 0) == 0;
|
|
}, uPointer))
|
|
{
|
|
|
|
}
|
|
}
|
|
else if (gShouldSpinOnlyInCPU == 1)
|
|
{
|
|
while (!Primitives::DoTryIfAlderLake([&]()
|
|
{
|
|
return AuAtomicTestAndSet(uPointer, 0) == 0;
|
|
}, uPointer))
|
|
{
|
|
ContextYield();
|
|
}
|
|
}
|
|
else if (gShouldSpinOnlyInCPU == 2)
|
|
{
|
|
while (AuAtomicTestAndSet(uPointer, 0))
|
|
{
|
|
while (*uPointer)
|
|
{
|
|
ContextYield();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
SysUnreachable();
|
|
}
|
|
}
|
|
|
|
void WaitEntry::Release()
|
|
{
|
|
AuResetMember(this->uSize);
|
|
AuResetMember(this->pAddress);
|
|
}
|
|
|
|
WaitEntry::WaitEntry()
|
|
{
|
|
|
|
}
|
|
|
|
WaitEntry::~WaitEntry()
|
|
{
|
|
this->Release();
|
|
}
|
|
|
|
template <EWaitMethod eMethod>
|
|
bool WaitEntry::SleepOn(WaitState &state)
|
|
{
|
|
if (state.qwNanosecondsAbs)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.pCompare2))
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Lock();
|
|
#endif
|
|
|
|
auto uNow = AuTime::SteadyClockNS();
|
|
auto uEndTime = state.qwNanosecondsAbs.value();
|
|
|
|
while (uNow < uEndTime)
|
|
{
|
|
bool bStatus {};
|
|
|
|
#if defined(AURORA_PLATFORM_WIN32)
|
|
Win32DropSchedulerResolution();
|
|
#endif
|
|
|
|
if (!AuAtomicLoad(&this->bAlive))
|
|
{
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Unlock();
|
|
#endif
|
|
|
|
(void)gProcessWaitables.WaitBufferFrom<eMethod>(this->pAddress, this->uSize, false, state.pCompare2);
|
|
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Lock();
|
|
#endif
|
|
|
|
continue;
|
|
}
|
|
|
|
{
|
|
#if defined(WOA_SEMAPHORE_MODE)
|
|
|
|
bStatus = this->semaphore->LockAbsNS(uEndTime);
|
|
|
|
#if defined(WOA_WAIT_SIDE_NO_ABSORB) || defined(WOA_STRICTER_FIFO)
|
|
if (bStatus)
|
|
{
|
|
return true;
|
|
}
|
|
#else
|
|
if (!bStatus && !WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.pCompare2))
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
#else
|
|
|
|
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.pCompare2))
|
|
{
|
|
this->mutex.Unlock();
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
|
|
auto uTimeRemNS = uEndTime - uNow;
|
|
bStatus = this->variable.WaitForSignalNsEx(&this->mutex, uTimeRemNS, false);
|
|
|
|
#if defined(WOA_WAIT_SIDE_NO_ABSORB)
|
|
if (bStatus)
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
uNow = AuTime::SteadyClockNS();
|
|
}
|
|
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Unlock();
|
|
#endif
|
|
|
|
return !WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.pCompare2);
|
|
}
|
|
else
|
|
{
|
|
while (true)
|
|
{
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Lock();
|
|
#endif
|
|
|
|
if (!AuAtomicLoad(&this->bAlive))
|
|
{
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Unlock();
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
{
|
|
#if defined(WOA_SEMAPHORE_MODE)
|
|
this->semaphore->Lock();
|
|
#else
|
|
this->variable.WaitForSignalNsEx(&this->mutex, 0, false);
|
|
#endif
|
|
}
|
|
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
this->mutex.Unlock();
|
|
#endif
|
|
|
|
#if defined(WOA_WAIT_SIDE_NO_ABSORB)
|
|
|
|
return true;
|
|
|
|
#else
|
|
|
|
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.pCompare2))
|
|
{
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
(void)gProcessWaitables.WaitBufferFrom<eMethod>(this->pAddress, this->uSize, false, state.pCompare2);
|
|
}
|
|
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
bool WaitEntry::SleepLossy(AuUInt64 qwNanosecondsAbs)
|
|
{
|
|
#if !defined(WOA_SEMAPHORE_MODE)
|
|
AU_LOCK_GUARD(this->mutex);
|
|
#endif
|
|
|
|
if (qwNanosecondsAbs)
|
|
{
|
|
#if defined(WOA_SEMAPHORE_MODE)
|
|
return this->semaphore->LockAbsNS(qwNanosecondsAbs);
|
|
#else
|
|
auto uNow = AuTime::SteadyClockNS();
|
|
|
|
while (uNow < qwNanosecondsAbs)
|
|
{
|
|
if (!AuAtomicLoad(&this->bAlive))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
auto uTimeRemNS = qwNanosecondsAbs - uNow;
|
|
if (this->variable.WaitForSignalNsEx(&this->mutex, uTimeRemNS, false))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
uNow = AuTime::SteadyClockNS();
|
|
}
|
|
|
|
return false;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
if (!AuAtomicLoad(&this->bAlive))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
#if defined(WOA_SEMAPHORE_MODE)
|
|
this->semaphore->Lock();
|
|
#else
|
|
this->variable.WaitForSignalNsEx(&this->mutex, 0, false);
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
WaitEntry *WaitEntry::GetBefore(const void *pAddress)
|
|
{
|
|
if (auto pSpecial = this->pSpecial)
|
|
{
|
|
return *GetPBeforeFromContainer(pSpecial, pAddress);
|
|
}
|
|
else
|
|
{
|
|
return this->pBefore;
|
|
}
|
|
}
|
|
|
|
WaitEntry *WaitEntry::GetSimilarFirstItr(const void *pAddress)
|
|
{
|
|
if (auto pSpecial = this->pSpecial)
|
|
{
|
|
// non-tls double-linked list recovery: traverse the WaitMulipleContainer given the collisions for pAddress
|
|
if (auto ppNextHead = GetPFirstFromContainer(pSpecial, pAddress))
|
|
{
|
|
return *ppNextHead;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// old logic / no wait multiple: traverse backwards
|
|
return this->pBefore;
|
|
}
|
|
}
|
|
|
|
void WaitEntry::SetBefore(const void *pAddress, WaitEntry *pNext)
|
|
{
|
|
if (auto pSpecial = this->pSpecial)
|
|
{
|
|
if (auto pNextEntry = GetPBeforeFromContainer(pSpecial, pAddress))
|
|
{
|
|
*pNextEntry = pNext;
|
|
}
|
|
else
|
|
{
|
|
SysUnreachable();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
this->pBefore = pNext;
|
|
}
|
|
}
|
|
|
|
WaitEntry *WaitEntry::GetNext(const void *pAddress)
|
|
{
|
|
if (auto pSpecial = this->pSpecial)
|
|
{
|
|
return *GetPNextFromContainer(pSpecial, pAddress);
|
|
}
|
|
else
|
|
{
|
|
return this->pNext;
|
|
}
|
|
}
|
|
|
|
WaitEntry *WaitEntry::GetSimilarLastItr(const void *pAddress)
|
|
{
|
|
if (auto pSpecial = this->pSpecial)
|
|
{
|
|
if (auto ppNextHead = GetPLastFromContainer(pSpecial, pAddress))
|
|
{
|
|
return *ppNextHead;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return this->pNext;
|
|
}
|
|
}
|
|
|
|
void WaitEntry::SetNext(const void *pAddress, WaitEntry *pNext)
|
|
{
|
|
if (auto pSpecial = this->pSpecial)
|
|
{
|
|
if (auto pNextEntry = GetPNextFromContainer(pSpecial, pAddress))
|
|
{
|
|
*pNextEntry = pNext;
|
|
}
|
|
else
|
|
{
|
|
SysUnreachable();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
this->pNext = pNext;
|
|
}
|
|
}
|
|
|
|
bool WaitEntry::TrySignalAddress(const void *pAddress)
|
|
{
|
|
if (auto &pSpecial = this->pSpecial) AU_EOB_UNLIKELY
|
|
{
|
|
if (auto pCompare = GetPCompareFromContainer(pSpecial, pAddress))
|
|
{
|
|
if (WaitBuffer::Compare(pAddress, this->uSize, pCompare, kMax64, this->eWaitMethod))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (this->pSpecial->waitArray && gRuntimeConfig.threadingConfig.bEnableWaitOnAddressAndSurpression)
|
|
{
|
|
auto pFirst = AuReinterpretCast<MultipleInternalContext>(this->pSpecial->waitArray.Begin<WaitMultipleEntry>()->internalContext);
|
|
|
|
if (AuAtomicAdd(&pFirst->uCounter, 1u) < pFirst->uMinTrigger)
|
|
{
|
|
// "Successful" - do not dequeue, but consume -1 from "N" of WakeNOnAddress
|
|
return true;
|
|
}
|
|
}
|
|
|
|
AuAtomicSet(&this->bAlive, 0u);
|
|
|
|
if (this->bSemaphoreActive &&
|
|
this->pSemaphore)
|
|
{
|
|
this->pSemaphore->AddOne();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (this->pAddress != pAddress)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (WaitBuffer::Compare(pAddress, this->uSize, this->pCompareAddress, kMax64, this->eWaitMethod))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
AuAtomicSet(&this->bAlive, 0u);
|
|
}
|
|
|
|
#if defined(WOA_SEMAPHORE_MODE)
|
|
this->semaphore->Unlock(1);
|
|
#else
|
|
this->variable.Signal();
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
WaitBufferLegacy WaitBufferLegacy::From(const void *pBuf, AuUInt8 uSize)
|
|
{
|
|
WaitBufferLegacy wait;
|
|
AuMemcpy(wait.buffer, pBuf, AuMin(uSize, AuArraySize(wait.buffer)));
|
|
wait.uSize = uSize;
|
|
return AuMove(wait);
|
|
}
|
|
|
|
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, WaitState &state)
|
|
{
|
|
return WaitBuffer::Compare(pHotAddress, uSize, state.pCompare2, state.uDownsizeMask, state.eWaitMethod);
|
|
}
|
|
|
|
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, const void *pCompare, AuUInt64 uMask, EWaitMethod eMethod)
|
|
{
|
|
bool bRet {};
|
|
AURORA_COMPILER_VOLATILE_BARRIER();
|
|
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
|
|
return bRet;
|
|
}
|
|
|
|
template <EWaitMethod eMethod, bool bFast>
|
|
bool WaitBuffer::Compare2(const void *pHot, AuUInt8 uSize, const void *pBuf2, AuUInt64 uMask)
|
|
{
|
|
return Compare2<eMethod, bFast>((const volatile void *)pHot, uSize, pBuf2, uMask);
|
|
}
|
|
|
|
template <EWaitMethod eMethod, bool bFast>
|
|
bool WaitBuffer::Compare2(const volatile void *pHot, AuUInt8 uSize, const void *pBuf2, AuUInt64 uMask)
|
|
{
|
|
#if !defined(AURORA_COMPILER_CLANG) && !defined(AURORA_COMPILER_MSVC)
|
|
AURORA_COMPILER_VOLATILE_BARRIER();
|
|
#endif
|
|
|
|
if constexpr (!bFast)
|
|
{
|
|
|
|
if constexpr (eMethod == EWaitMethod::eNotEqual)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return (AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask);
|
|
case 2:
|
|
return (AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask);
|
|
case 4:
|
|
return (AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask);
|
|
case 8:
|
|
return (AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask);
|
|
default:
|
|
return (AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eEqual)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return !(AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0) & uMask) > (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0) & uMask) > (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0) & uMask) > (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0) & uMask) > (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0) & uMask) >= (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0) & uMask) >= (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0) & uMask) >= (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0) & uMask) >= (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0) & uMask) < (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0) & uMask) < (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0) & uMask) < (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0) & uMask) < (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0) & uMask) <= (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0) & uMask) <= (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0) & uMask) <= (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0) & uMask) <= (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eAnd)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eNotAnd)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return ((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask));
|
|
case 2:
|
|
return ((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask));
|
|
case 4:
|
|
return ((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask));
|
|
case 8:
|
|
return ((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
|
|
if constexpr (eMethod == EWaitMethod::eNotEqual)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return (AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0));
|
|
case 2:
|
|
return (AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0));
|
|
case 4:
|
|
return (AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0));
|
|
case 8:
|
|
return (AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0));
|
|
default:
|
|
return (AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eEqual)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0)));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0)));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0)));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0)));
|
|
default:
|
|
return !(AuMemcmp((const void *)pHot, pBuf2, uSize) == 0);
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0)) > (AuReadU8(pBuf2, 0)));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0)) > (AuReadU16(pBuf2, 0)));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0)) > (AuReadU32(pBuf2, 0)));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0)) > (AuReadU64(pBuf2, 0)));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0)) >= (AuReadU8(pBuf2, 0)));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0)) >= (AuReadU16(pBuf2, 0)));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0)) >= (AuReadU32(pBuf2, 0)));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0)) >= (AuReadU64(pBuf2, 0)));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0)) < (AuReadU8(pBuf2, 0)));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0)) < (AuReadU16(pBuf2, 0)));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0)) < (AuReadU32(pBuf2, 0)));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0)) < (AuReadU64(pBuf2, 0)));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !((AuReadU8(pHot, 0)) <= (AuReadU8(pBuf2, 0)));
|
|
case 2:
|
|
return !((AuReadU16(pHot, 0)) <= (AuReadU16(pBuf2, 0)));
|
|
case 4:
|
|
return !((AuReadU32(pHot, 0)) <= (AuReadU32(pBuf2, 0)));
|
|
case 8:
|
|
return !((AuReadU64(pHot, 0)) <= (AuReadU64(pBuf2, 0)));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eAnd)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return !(AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0));
|
|
case 2:
|
|
return !(AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0));
|
|
case 4:
|
|
return !(AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0));
|
|
case 8:
|
|
return !(AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if constexpr (eMethod == EWaitMethod::eNotAnd)
|
|
{
|
|
switch (uSize)
|
|
{
|
|
case 1:
|
|
return (AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0));
|
|
case 2:
|
|
return (AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0));
|
|
case 4:
|
|
return (AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0));
|
|
case 8:
|
|
return (AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0));
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
template <EWaitMethod eMethod>
|
|
WaitEntry *ProcessWaitNodeContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress)
|
|
{
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
auto pReturn = tlsWaitEntry.get();
|
|
#else
|
|
auto pReturn = &tlsWaitEntry;
|
|
#endif
|
|
|
|
pReturn->pAddress = pAddress;
|
|
pReturn->uSize = uSize;
|
|
pReturn->pCompareAddress = pCompareAddress;
|
|
pReturn->eWaitMethod = eMethod;
|
|
pReturn->pSpecial = nullptr;
|
|
|
|
Lock();
|
|
|
|
if (!WaitBuffer::Compare2<eMethod, true>(pAddress, uSize, pCompareAddress))
|
|
{
|
|
pReturn->bAlive = false;
|
|
Unlock();
|
|
return nullptr;
|
|
}
|
|
|
|
if (!pReturn->bAlive)
|
|
{
|
|
pReturn->bAlive = true;
|
|
|
|
if (bScheduleFirst /*First in, First Out*/)
|
|
{
|
|
if (auto pLoadFromMemory = this->waitList.pHead)
|
|
{
|
|
pLoadFromMemory->SetBefore(pAddress, pReturn);
|
|
pReturn->SetNext(pAddress, pLoadFromMemory);
|
|
}
|
|
else
|
|
{
|
|
this->waitList.pTail = pReturn;
|
|
}
|
|
this->waitList.pHead = pReturn;
|
|
}
|
|
else /*Last In, First Out*/
|
|
{
|
|
if (auto pLoadFromMemory = this->waitList.pTail)
|
|
{
|
|
pLoadFromMemory->SetNext(pAddress, pReturn);
|
|
pReturn->SetBefore(pAddress, pLoadFromMemory);
|
|
}
|
|
else
|
|
{
|
|
this->waitList.pHead = pReturn;
|
|
}
|
|
this->waitList.pTail = pReturn;
|
|
}
|
|
}
|
|
|
|
Unlock();
|
|
|
|
return pReturn;
|
|
}
|
|
|
|
WaitEntry *ProcessWaitNodeContainer::WaitBufferFrom2(const void *pAddress,
|
|
AuUInt8 uSize,
|
|
const void *pAddressCompare,
|
|
EWaitMethod eWaitMethod,
|
|
MultipleInternalContext *pContext,
|
|
const WaitMulipleContainer *pContainer)
|
|
{
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
auto pReturn = tlsWaitEntry.get();
|
|
#else
|
|
auto pReturn = &tlsWaitEntry;
|
|
#endif
|
|
|
|
pReturn->pAddress = pAddress;
|
|
pReturn->uSize = uSize;
|
|
pReturn->pCompareAddress = pAddressCompare;
|
|
pReturn->eWaitMethod = eWaitMethod;
|
|
|
|
Lock();
|
|
|
|
if (!WaitBuffer::Compare(pAddress, uSize, pAddressCompare, kMax64, eWaitMethod))
|
|
{
|
|
pReturn->bAlive = false;
|
|
Unlock();
|
|
return nullptr;
|
|
}
|
|
|
|
bool bAddToArray {};
|
|
if (!pReturn->bAlive)
|
|
{
|
|
pReturn->bAlive = true;
|
|
bAddToArray = true;
|
|
}
|
|
else
|
|
{
|
|
// TODO: traverse list and reject duplicates
|
|
bAddToArray = true;
|
|
}
|
|
|
|
if (bAddToArray)
|
|
{
|
|
if (auto pLoadFromMemory = this->waitList.pHead)
|
|
{
|
|
pLoadFromMemory->SetBefore(pAddress, pReturn);
|
|
pReturn->SetNext(pAddress, pLoadFromMemory);
|
|
}
|
|
else
|
|
{
|
|
this->waitList.pTail = pReturn;
|
|
}
|
|
this->waitList.pHead = pReturn;
|
|
}
|
|
|
|
pReturn->pSpecial = pContainer;
|
|
Unlock();
|
|
|
|
return pReturn;
|
|
}
|
|
|
|
template <typename T>
|
|
bool ProcessWaitNodeContainer::IterateWake(const void *pAddress, T callback)
|
|
{
|
|
bool bRetStatus { true };
|
|
|
|
if (AuAtomicLoad((AuUInt *)&this->waitList.pTail) == 0)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
Lock();
|
|
{
|
|
// FIFO
|
|
auto pCurrentHead = this->waitList.pTail;
|
|
while (pCurrentHead)
|
|
{
|
|
decltype(pCurrentHead) pBefore {};
|
|
|
|
#if !defined(WOA_SEMAPHORE_MODE) && defined(WOA_STRICTER_FIFO)
|
|
AU_LOCK_GUARD(pCurrentHead->mutex);
|
|
#elif !defined(WOA_SEMAPHORE_MODE) && !defined(WOA_STRICTER_FIFO)
|
|
// Condvar wait-list insertion barrier required for binary-semaphore-like conditions.
|
|
// We only need to lock against the { lock() if (should Sleep) { *** waitList++; *** unlock(); yield(); lock() } else { ... } unlocks() } condvar pattern.
|
|
// I often only care about the order of ->Signal() after the CondVar::waitList++ to ensure one state change is paired with one waitList signal.
|
|
// Don't block during the wakeup check, we only care about observing the condvar or semaphore waitlist, no codeguarding mutex required.
|
|
{
|
|
AU_LOCK_GUARD(pCurrentHead->mutex);
|
|
}
|
|
#endif
|
|
|
|
auto [bCont, bRemove] = callback(*pCurrentHead);
|
|
|
|
pBefore = pCurrentHead->GetBefore(pAddress);
|
|
|
|
if (bRemove)
|
|
{
|
|
this->RemoveEntry<true>(pAddress, pCurrentHead);
|
|
}
|
|
|
|
if (!bCont)
|
|
{
|
|
bRetStatus = false;
|
|
break;
|
|
}
|
|
|
|
if (pBefore == pCurrentHead)
|
|
{
|
|
break;
|
|
}
|
|
|
|
pCurrentHead = pBefore;
|
|
}
|
|
}
|
|
Unlock();
|
|
|
|
return bRetStatus;
|
|
}
|
|
|
|
template <bool bAllUnderLock>
|
|
void ProcessWaitNodeContainer::RemoveEntry(const void *pAddress, WaitEntry *pEntry)
|
|
{
|
|
|
|
#if 0
|
|
auto pNext = pEntry->GetNext(pAddress);
|
|
auto pBefore = pEntry->GetBefore(pAddress);
|
|
|
|
if (this->waitList.pHead == pEntry)
|
|
{
|
|
this->waitList.pHead = pNext;
|
|
}
|
|
|
|
if (this->waitList.pTail == pEntry)
|
|
{
|
|
this->waitList.pTail = pBefore;
|
|
}
|
|
|
|
// handle shared heads by tranversing collisions within the callers list instead of TLS.
|
|
// for the best case and original logic, the above still applies.
|
|
#else
|
|
|
|
WaitEntry *pNext, *pBefore;
|
|
if (auto pSpecial = pEntry->pSpecial)
|
|
{
|
|
pBefore = *GetPBeforeFromContainer(pSpecial, pAddress);
|
|
pNext = *GetPNextFromContainer(pSpecial, pAddress);
|
|
|
|
if (this->waitList.pHead == pEntry)
|
|
{
|
|
WaitEntry *pNextNextIterator { pEntry }, *pNext {};
|
|
while (pNextNextIterator &&
|
|
(pNextNextIterator = pNextNextIterator->GetSimilarLastItr(pAddress)))
|
|
{
|
|
pNext = pNextNextIterator;
|
|
}
|
|
this->waitList.pHead = pNext;
|
|
}
|
|
|
|
if (this->waitList.pTail == pEntry)
|
|
{
|
|
WaitEntry *pNextBeforeIterator { pEntry }, *pBefore {};
|
|
while (pNextBeforeIterator &&
|
|
(pNextBeforeIterator = pNextBeforeIterator->GetSimilarFirstItr(pAddress)))
|
|
{
|
|
pBefore = pNextBeforeIterator;
|
|
}
|
|
this->waitList.pTail = pBefore;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pBefore = pEntry->pBefore;
|
|
pNext = pEntry->pNext;
|
|
|
|
if (this->waitList.pHead == pEntry)
|
|
{
|
|
this->waitList.pHead = pNext;
|
|
}
|
|
|
|
if (this->waitList.pTail == pEntry)
|
|
{
|
|
this->waitList.pTail = pBefore;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
if (pBefore)
|
|
{
|
|
pBefore->SetNext(pAddress, pNext);
|
|
}
|
|
|
|
if (pNext)
|
|
{
|
|
pNext->SetBefore(pAddress, pBefore);
|
|
}
|
|
|
|
if (bAllUnderLock)
|
|
{
|
|
pEntry->SetNext(pAddress, nullptr);
|
|
pEntry->SetBefore(pAddress, nullptr);
|
|
//pEntry->bAlive = false; - redundant
|
|
}
|
|
}
|
|
|
|
void ProcessWaitNodeContainer::RemoveSelf(const void *pAddress, WaitEntry *pSelf)
|
|
{
|
|
{
|
|
this->Lock();
|
|
this->RemoveEntry<false>(pAddress, pSelf);
|
|
this->Unlock();
|
|
}
|
|
|
|
pSelf->SetBefore(pAddress, nullptr);
|
|
pSelf->SetNext(pAddress, nullptr);
|
|
pSelf->bAlive = false;
|
|
}
|
|
|
|
void ProcessWaitNodeContainer::Lock()
|
|
{
|
|
DoSpinLockOnVar(&this->uAtomic);
|
|
}
|
|
|
|
void ProcessWaitNodeContainer::Unlock()
|
|
{
|
|
AuAtomicClearU8Lock(&this->uAtomic);
|
|
}
|
|
|
|
#define AddressToIndexOp(pAddress) (AuHashCode(pAddress) & (kDefaultWaitPerProcess - 1))
|
|
#define AddressToIndex AddressToIndexOp(pAddress)
|
|
|
|
template <EWaitMethod eMethod>
|
|
WaitEntry *ProcessWaitContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress)
|
|
{
|
|
return this->list[AddressToIndex].WaitBufferFrom<eMethod>(pAddress, uSize, bScheduleFirst, pCompareAddress);
|
|
}
|
|
|
|
WaitEntry *ProcessWaitContainer::WaitBufferFrom2(const void *pAddress, AuUInt8 uSize, const void *pAddressCompare, EWaitMethod eWaitMethod, MultipleInternalContext *pContext, const WaitMulipleContainer *pContainer)
|
|
{
|
|
return this->list[AddressToIndex].WaitBufferFrom2(pAddress, uSize, pAddressCompare, eWaitMethod, pContext, pContainer);
|
|
}
|
|
|
|
template <typename T>
|
|
bool ProcessWaitContainer::IterateWake(const void *pAddress, T callback)
|
|
{
|
|
return this->list[AddressToIndex].IterateWake(pAddress, callback);
|
|
}
|
|
|
|
void ProcessWaitContainer::RemoveSelf(const void *pAddress, WaitEntry *pSelf)
|
|
{
|
|
return this->list[AddressToIndex].RemoveSelf(pAddress, pSelf);
|
|
}
|
|
|
|
bool IsNativeWaitOnSupported()
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
return pWaitOnAddress &&
|
|
AuSwInfo::IsWindows8Point1OrGreater();
|
|
#elif defined(AURORA_PLATFORM_LINUX)
|
|
return true;
|
|
#else
|
|
return SysNativeWaitOnAddressFutexSupported();
|
|
#endif
|
|
}
|
|
|
|
AUKN_SYM bool IsWaitOnRecommended()
|
|
{
|
|
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
|
|
return false;
|
|
#endif
|
|
|
|
#if defined(WOA_STRICTER_FIFO)
|
|
return false;
|
|
#endif
|
|
|
|
static AuOptionalEx<bool> gIsWaitOnRecommendedCache {};
|
|
|
|
if (gIsWaitOnRecommendedCache)
|
|
{
|
|
return gIsWaitOnRecommendedCache.value();
|
|
}
|
|
|
|
if (Primitives::ThrdCfg::gPreferEmulatedWakeOnAddress)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
bool bState = IsNativeWaitOnSupported();
|
|
gIsWaitOnRecommendedCache = bState;
|
|
return bState;
|
|
}
|
|
|
|
/// @deprecated
|
|
AUKN_SYM const AuList<AuUInt8> &GetValidWordSizes()
|
|
{
|
|
static const AuList<AuUInt8> kArray =
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
{ 1, 2, 4, 8 };
|
|
#else
|
|
{ 4 };
|
|
#endif
|
|
return kArray;
|
|
}
|
|
|
|
template <EWaitMethod T>
|
|
bool WaitOnAddressWide(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuOptional<AuUInt64> qwNanoseconds,
|
|
AuOptional<AuUInt64> qwNanosecondsAbs,
|
|
bool bOSSupportsWait,
|
|
const void *pCompareAddress2)
|
|
{
|
|
WaitState state;
|
|
|
|
auto pWaitEntry = gProcessWaitables.template WaitBufferFrom<T>(pTargetAddress, uWordSize, true, pCompareAddress2);
|
|
if (!pWaitEntry)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
state.pCompare2 = pCompareAddress2;
|
|
|
|
if (qwNanoseconds)
|
|
{
|
|
state.qwNanosecondsAbs = AuTime::SteadyClockNS() + qwNanoseconds.value();
|
|
}
|
|
else if (qwNanosecondsAbs)
|
|
{
|
|
state.qwNanosecondsAbs = qwNanosecondsAbs.value();
|
|
}
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
auto pTempHoldMe = tlsWaitEntry;
|
|
#endif
|
|
|
|
auto bResult = pWaitEntry->template SleepOn<T>(state);
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
pTempHoldMe.reset();
|
|
#endif
|
|
|
|
if (AuAtomicLoad(&pWaitEntry->bAlive))
|
|
{
|
|
gProcessWaitables.RemoveSelf(pTargetAddress, pWaitEntry);
|
|
}
|
|
|
|
return bResult;
|
|
}
|
|
|
|
AuTuple<const void *, AuUInt8, AuUInt64> DecodeAddress(const void *pAddress,
|
|
AuUInt32 uWordSize)
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
return AuMakeTuple(pAddress, 0, kMax64);
|
|
#endif
|
|
|
|
auto pRounded = AuPageRound(AuUInt(pAddress), AuUInt(4));
|
|
auto uDelta = (AuUInt)pAddress - pRounded;
|
|
|
|
if (uWordSize == 8)
|
|
{
|
|
return AuMakeTuple((const void *)pRounded, uDelta, kMax64);
|
|
}
|
|
|
|
AuUInt32 uSizeMask = (1ull << (uWordSize * 8)) - 1ull;
|
|
|
|
switch (uDelta)
|
|
{
|
|
case 0:
|
|
return AuMakeTuple(pAddress, 0, 0xFFFFFFFF & (uSizeMask << 0));
|
|
case 1:
|
|
return AuMakeTuple(pAddress, 1, 0xFFFFFF00 & (uSizeMask << 8));
|
|
case 2:
|
|
return AuMakeTuple(pAddress, 2, 0xFFFF0000 & (uSizeMask << 16));
|
|
case 3:
|
|
return AuMakeTuple(pAddress, 3, 0xFF000000 & (uSizeMask << 24));
|
|
default:
|
|
SysPanic("Invalid Branch");
|
|
}
|
|
}
|
|
|
|
static bool RunOSWaitOnAddressTimed(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuUInt64 uAbsTimeSteadyClock,
|
|
AuUInt64 uRelativeNanoseconds,
|
|
AuOptional<AuUInt64> uAbsTimeAltClock /* hint */,
|
|
bool bSpun = false)
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
|
|
if (pRtlWaitOnAddress)
|
|
{
|
|
AuUInt64 uNow {};
|
|
while (uAbsTimeSteadyClock ?
|
|
(uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS())) :
|
|
true)
|
|
{
|
|
LARGE_INTEGER word {};
|
|
|
|
if (uAbsTimeAltClock)
|
|
{
|
|
word.QuadPart = AuTime::ConvertTimestampNs(uAbsTimeAltClock.value());
|
|
}
|
|
else if (uAbsTimeSteadyClock)
|
|
{
|
|
if (uAbsTimeSteadyClock <= uNow)
|
|
{
|
|
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
|
|
}
|
|
|
|
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
|
|
|
|
if (!word.QuadPart)
|
|
{
|
|
word.QuadPart = 1;
|
|
}
|
|
}
|
|
|
|
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
|
|
{
|
|
if (pRtlWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize, &word))
|
|
{
|
|
return true;
|
|
}
|
|
else if (!uAbsTimeSteadyClock)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
// ~~some paths might miss the uRelativeNanoseconds, like cas loops.~~
|
|
// most paths will now skimp on the relative values
|
|
if (uAbsTimeSteadyClock && !uRelativeNanoseconds)
|
|
{
|
|
AuInt64 iDelta = uAbsTimeSteadyClock;
|
|
iDelta -= AuTime::SteadyClockNS();
|
|
|
|
if (iDelta <= 0)
|
|
{
|
|
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
|
|
}
|
|
|
|
uRelativeNanoseconds = iDelta;
|
|
}
|
|
|
|
auto uMaxSwitches = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
|
|
auto bUWPNanosecondEmulationCheckFirst = Primitives::ThrdCfg::gUWPNanosecondEmulationCheckFirst;
|
|
|
|
// LockN(<1MS) on a platform without that resolution of yielding... damn
|
|
auto uMS = AuNSToMS<AuUInt32>(uRelativeNanoseconds);
|
|
if (!uMS)
|
|
{
|
|
// first: cpu spin to avoid the kernel all together
|
|
if (!bSpun)
|
|
{
|
|
if (TryWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// second: yield
|
|
unsigned uLimit {};
|
|
do
|
|
{
|
|
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
|
|
{
|
|
break;
|
|
}
|
|
|
|
AuThreading::ContextYield();
|
|
|
|
if (bUWPNanosecondEmulationCheckFirst)
|
|
{
|
|
if (uLimit++ > uMaxSwitches)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while (uAbsTimeSteadyClock > AuTime::SteadyClockNS()); // ...until times up
|
|
}
|
|
else // high level lock function was called with ms scale resolution
|
|
{
|
|
// first: wait on the address with an ms scale timeout
|
|
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
|
|
|
|
// never trust the error value/status provided by wait addresses - instead, do a quick compare
|
|
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
|
|
{
|
|
// best case: we woke up during the ms-res waitonaddress
|
|
return true;
|
|
}
|
|
|
|
// attempt to yield again, potentially context switching a few times to hit any NS remainder
|
|
AuUInt64 uNow {};
|
|
unsigned uLimit {};
|
|
while (uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS()))
|
|
{
|
|
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
|
|
|
|
if (Primitives::DoTryIfAlderLake([&]()
|
|
{
|
|
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
|
|
}, pTargetAddress))
|
|
{
|
|
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
|
|
return true;
|
|
}
|
|
|
|
if (!uMS)
|
|
{
|
|
// burn off any remainder cycles by switching contexts (this isnt a very long time usually)
|
|
if (uLimit++ < uMaxSwitches)
|
|
{
|
|
AuThreading::ContextYield();
|
|
}
|
|
else
|
|
{
|
|
// do not burn the cpu to meet the timeout. we'll just undershoot.
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
|
|
|
|
#else
|
|
|
|
return SysWaitOnAddressTimed(pTargetAddress,
|
|
pCompareAddress,
|
|
uWordSize,
|
|
uAbsTimeSteadyClock,
|
|
uRelativeNanoseconds,
|
|
uAbsTimeAltClock,
|
|
bSpun);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void RunOSWaitOnAddressNoTimedNoErrors(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
WaitState &state)
|
|
{
|
|
while (WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
|
|
{
|
|
if (!SysWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
|
|
{
|
|
//AuThreading::ContextYield();
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool RunOSWaitOnAddressTimedSteady(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
WaitState &state,
|
|
bool bSpun = false)
|
|
{
|
|
#if 1
|
|
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
|
|
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask);
|
|
#else
|
|
return RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
|
|
#endif
|
|
}
|
|
|
|
template <EWaitMethod T>
|
|
static void RunOSWaitOnAddressEQNoTimedNoErrors(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
WaitState &state)
|
|
{
|
|
while (true)
|
|
{
|
|
WaitBufferLegacy wb = WaitBufferLegacy::From(pTargetAddress, state.uWordSize);
|
|
|
|
if (!WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
|
|
{
|
|
return;
|
|
}
|
|
|
|
(void)SysWaitOnAddressNoTimed(pTargetAddress, wb.buffer, state.uWordSize);
|
|
|
|
if (WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
|
|
{
|
|
SysWakeOneOnAddress(pTargetAddress);
|
|
}
|
|
else
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <EWaitMethod T>
|
|
static bool RunOSWaitOnAddressEQTimedSteady(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
WaitState &state,
|
|
bool bSpun = false)
|
|
{
|
|
while (true)
|
|
{
|
|
WaitBufferLegacy wb = WaitBufferLegacy::From(pTargetAddress, state.uWordSize);
|
|
|
|
if (!WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bool bResult = RunOSWaitOnAddressTimed(pTargetAddress, wb.buffer, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
|
|
|
|
if (WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
|
|
{
|
|
SysWakeOneOnAddress(pTargetAddress);
|
|
if (!bResult)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Windows 8+ thread primitives might use me instead of the public API
|
|
// it does work on Linux and Windows 8+
|
|
// it does not, however, work on emulated platforms
|
|
// this is intentional
|
|
bool InternalLTSWaitOnAddressHighRes(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuUInt64 qwNanosecondsAbs)
|
|
{
|
|
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
|
|
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
|
|
|
|
WaitState state;
|
|
state.uDownsizeMask = uMask;
|
|
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
|
|
|
|
if (!qwNanosecondsAbs)
|
|
{
|
|
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
state.qwNanosecondsAbs = qwNanosecondsAbs;
|
|
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, true);
|
|
}
|
|
}
|
|
|
|
void InternalLTSWakeAll(const void *pTargetAddress)
|
|
{
|
|
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
|
|
WakeAllOnAddress(pTargetAddress);
|
|
#else
|
|
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
|
|
SysWakeAllOnAddress(pWakeAddress);
|
|
#endif
|
|
}
|
|
|
|
void InternalLTSWakeOne(const void *pTargetAddress)
|
|
{
|
|
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
|
|
WakeOnAddress(pTargetAddress);
|
|
#else
|
|
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
|
|
if (uDelta)
|
|
{
|
|
SysWakeAllOnAddress(pWakeAddress);
|
|
}
|
|
else
|
|
{
|
|
SysWakeNOnAddress(pWakeAddress, 1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void InternalLTSWakeCount(const void *pTargetAddress, AuUInt32 uCount)
|
|
{
|
|
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
|
|
WakeNOnAddress(pTargetAddress, uCount);
|
|
#else
|
|
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
|
|
if (uDelta)
|
|
{
|
|
SysWakeAllOnAddress(pWakeAddress);
|
|
}
|
|
else
|
|
{
|
|
SysWakeNOnAddress(pWakeAddress, uCount);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
WOAFASTPUB bool WaitOnAddress(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuUInt64 qwNanoseconds,
|
|
AuOptional<bool> optAlreadySpun)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
|
|
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
return WaitOnAddressSteady(pTargetAddress,
|
|
pCompareAddress,
|
|
uWordSize,
|
|
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
|
|
optAlreadySpun);
|
|
}
|
|
|
|
WOAFASTPUB bool WaitOnAddressSpecial(EWaitMethod eMethod,
|
|
const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuUInt64 qwNanoseconds,
|
|
AuOptional<bool> optAlreadySpun)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
|
|
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
return WaitOnAddressSpecialSteady(eMethod,
|
|
pTargetAddress,
|
|
pCompareAddress,
|
|
uWordSize,
|
|
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
|
|
optAlreadySpun);
|
|
}
|
|
|
|
template <EWaitMethod T>
|
|
auline bool TryWaitOnAddressSpecialTmpl(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
return Primitives::DoTryIfAlderLake([&]()
|
|
{
|
|
return !WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress);
|
|
}, pTargetAddress);
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
WOAFASTPUB bool TryWaitOnAddress(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize)
|
|
{
|
|
return TryWaitOnAddressSpecialTmpl<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize);
|
|
}
|
|
|
|
WOAFASTPUB bool TryWaitOnAddressSpecial(EWaitMethod eMethod,
|
|
const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialTmpl, pTargetAddress, pCompareAddress, uWordSize);
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
WOAFASTPUB bool TryWaitOnAddressEx(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
|
|
{
|
|
if (!check)
|
|
{
|
|
return TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize);
|
|
}
|
|
|
|
return Primitives::DoTryIfAlderLake([&]()
|
|
{
|
|
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return check(pTargetAddress, pCompareAddress, uWordSize);
|
|
}, pTargetAddress);
|
|
}
|
|
|
|
template <EWaitMethod T>
|
|
bool TryWaitOnAddressSpecialExTmpl(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
return Primitives::DoTryIfAlderLake([&]()
|
|
{
|
|
if (WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return check(pTargetAddress, pCompareAddress, uWordSize);
|
|
}, pTargetAddress);
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
WOAFASTPUB bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
|
|
const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
|
|
{
|
|
if (!check)
|
|
{
|
|
return TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize);
|
|
}
|
|
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialExTmpl, pTargetAddress, pCompareAddress, uWordSize, check);
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
WOAFASTPUB void WakeNOnAddress(const void *pTargetAddress,
|
|
AuUInt8 uNMaximumThreads)
|
|
{
|
|
if (IsWaitOnRecommended())
|
|
{
|
|
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
|
|
if (uDelta)
|
|
{
|
|
SysWakeAllOnAddress(pWakeAddress);
|
|
}
|
|
else
|
|
{
|
|
SysWakeNOnAddress(pWakeAddress, uNMaximumThreads);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
(void)gProcessWaitables.IterateWake(pTargetAddress, [&](WaitEntry &entry) -> AuPair<bool, bool>
|
|
{
|
|
if (!uNMaximumThreads)
|
|
{
|
|
return AuMakePair(false, false);
|
|
}
|
|
|
|
bool bWake {};
|
|
if (entry.TrySignalAddress(pTargetAddress))
|
|
{
|
|
bWake = true;
|
|
uNMaximumThreads--;
|
|
}
|
|
|
|
bool bCont = uNMaximumThreads != 0;
|
|
return AuMakePair(bCont, bWake);
|
|
});
|
|
}
|
|
}
|
|
|
|
WOAFASTPUB void WakeOnAddress(const void *pTargetAddress)
|
|
{
|
|
WakeNOnAddress(pTargetAddress, 1);
|
|
}
|
|
|
|
WOAFASTPUB void WakeAllOnAddress(const void *pTargetAddress)
|
|
{
|
|
if (IsWaitOnRecommended())
|
|
{
|
|
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
|
|
SysWakeAllOnAddress(pWakeAddress);
|
|
}
|
|
else
|
|
{
|
|
(void)gProcessWaitables.IterateWake(pTargetAddress, [&](WaitEntry &entry) -> AuPair<bool, bool>
|
|
{
|
|
return AuMakePair(true, entry.TrySignalAddress(pTargetAddress));
|
|
});
|
|
}
|
|
}
|
|
|
|
WOAFASTPUB bool WaitOnAddressSteady(const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuUInt64 qwNanoseconds,
|
|
AuOptional<bool> optAlreadySpun)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
// Avoid emulated path dynamic TLS fetch without TLS section
|
|
// or various security checks
|
|
// or other such bloated thunks
|
|
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
bool bWaitOnAddress = IsWaitOnRecommended();
|
|
if (bWaitOnAddress)
|
|
{
|
|
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
|
|
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
|
|
|
|
WaitState state;
|
|
state.uDownsizeMask = uMask;
|
|
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
|
|
|
|
bool bSpun {};
|
|
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
|
|
optAlreadySpun.value_or(false))
|
|
{
|
|
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bSpun = true;
|
|
}
|
|
|
|
if (!qwNanoseconds)
|
|
{
|
|
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
state.qwNanosecondsAbs = qwNanoseconds;
|
|
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, bSpun);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
|
|
optAlreadySpun.value_or(false))
|
|
{
|
|
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return WaitOnAddressWide<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64>{}, false, pCompareAddress);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
WOAFASTPUB bool WaitOnAddressSpecialSteady(EWaitMethod eMethod,
|
|
const void *pTargetAddress,
|
|
const void *pCompareAddress,
|
|
AuUInt8 uWordSize,
|
|
AuUInt64 qwNanoseconds,
|
|
AuOptional<bool> optAlreadySpun)
|
|
{
|
|
#if !defined(WOA_STRICTER_FIFO)
|
|
// Avoid emulated path dynamic TLS fetch without TLS section
|
|
// or various security checks
|
|
// or other such bloated thunks
|
|
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
|
|
{
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
bool bWaitOnAddress = IsWaitOnRecommended();
|
|
if (bWaitOnAddress)
|
|
{
|
|
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
|
|
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
|
|
|
|
WaitState state;
|
|
state.uDownsizeMask = uMask;
|
|
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
|
|
state.pCompare2 = pCompareAddress;
|
|
state.eWaitMethod = eMethod;
|
|
|
|
bool bSpun {};
|
|
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
|
|
optAlreadySpun.value_or(false))
|
|
{
|
|
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bSpun = true;
|
|
}
|
|
|
|
if (!qwNanoseconds)
|
|
{
|
|
DO_OF_METHOD_TYPE(, RunOSWaitOnAddressEQNoTimedNoErrors, pWaitAddress, pCompareAddress2, state);
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
state.qwNanosecondsAbs = qwNanoseconds;
|
|
DO_OF_METHOD_TYPE(return, RunOSWaitOnAddressEQTimedSteady, pWaitAddress, pCompareAddress2, state, bSpun);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
|
|
optAlreadySpun.value_or(false))
|
|
{
|
|
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
DO_OF_METHOD_TYPE(return, WaitOnAddressWide, pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64> {}, false, pCompareAddress);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static WaitEntry **GetPNextFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress)
|
|
{
|
|
auto uCount = pContainer->waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = pContainer->waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
if (pBase[i].pTargetAddress == pAddress)
|
|
{
|
|
return &pCurrent->pNext;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static WaitEntry **GetPBeforeFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress)
|
|
{
|
|
auto uCount = pContainer->waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = pContainer->waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
if (pBase[i].pTargetAddress == pAddress)
|
|
{
|
|
return &pCurrent->pBefore;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static WaitEntry **GetPLastFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress)
|
|
{
|
|
auto uCount = pContainer->waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = pContainer->waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
for (AU_ITERATE_FROM_COUNT(i, uCount))
|
|
{
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
if (AddressToIndexOp(pBase[i].pTargetAddress) == AddressToIndexOp(pAddress))
|
|
{
|
|
return &pCurrent->pNext;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static WaitEntry **GetPFirstFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress)
|
|
{
|
|
auto uCount = pContainer->waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = pContainer->waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
if (AddressToIndexOp(pBase[i].pTargetAddress) == AddressToIndexOp(pAddress))
|
|
{
|
|
return &pCurrent->pBefore;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static const void *GetPCompareFromContainer(const WaitMulipleContainer *pContainer, const void *pAddress)
|
|
{
|
|
auto uCount = pContainer->waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = pContainer->waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
if (pBase[i].pTargetAddress == pAddress)
|
|
{
|
|
return pBase[i].pCompareAddress;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static_assert(sizeof(MultipleInternalContext) <= sizeof(WaitMultipleEntry::internalContext));
|
|
|
|
static bool WaitForMultipleAddressesOrEx(const WaitMulipleContainer &waitMultipleOnAddress, AuUInt32 uMinTrigger)
|
|
{
|
|
bool bResult {}, bAny {}, bSleepStatus {};
|
|
WaitEntry *pWaitEntryMain {}, *pWaitEntryAux {};
|
|
|
|
SysAssertDbg(!IsWaitOnRecommended(), "WoA not in emulation mode");
|
|
|
|
auto uCount = waitMultipleOnAddress.waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = waitMultipleOnAddress.waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
auto pTempHoldMe = tlsWaitEntry;
|
|
#endif
|
|
|
|
do
|
|
{
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
auto &state = pCurrent->state;
|
|
|
|
if (current.bIgnoreCurrentFlag)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
pCurrent->pBefore = nullptr;
|
|
pCurrent->pNext = nullptr;
|
|
pCurrent->uMinTrigger = uMinTrigger;
|
|
|
|
// Prevent inc overflows without interfering with and callers, should these word sizes decrease over time
|
|
if (!uMinTrigger)
|
|
{
|
|
pCurrent->uCounter = 0;
|
|
}
|
|
|
|
pWaitEntryAux = gProcessWaitables.WaitBufferFrom2(current.pTargetAddress, current.uSize, current.pCompareAddress, current.eMethod, pCurrent, &waitMultipleOnAddress);
|
|
if (!pWaitEntryAux)
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
pWaitEntryMain = pWaitEntryAux;
|
|
}
|
|
|
|
state.qwNanosecondsAbs = waitMultipleOnAddress.qwNanoseconds;
|
|
bAny = true;
|
|
}
|
|
|
|
if (!bAny)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bSleepStatus = pWaitEntryAux && pWaitEntryAux->SleepLossy(waitMultipleOnAddress.qwNanoseconds);
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
auto &state = pCurrent->state;
|
|
|
|
if (current.bIgnoreCurrentFlag)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (!WaitBuffer::Compare(current.pTargetAddress, current.uSize, current.pCompareAddress, kMax64, current.eMethod))
|
|
{
|
|
current.uHasStateChangedCounter++;
|
|
bResult = true;
|
|
}
|
|
|
|
gProcessWaitables.RemoveSelf(current.pTargetAddress, pWaitEntryMain);
|
|
}
|
|
}
|
|
while (!bResult && (!waitMultipleOnAddress.qwNanoseconds || bSleepStatus));
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
pTempHoldMe.reset();
|
|
#endif
|
|
|
|
return bResult;
|
|
}
|
|
|
|
AUKN_SYM bool WaitForMultipleAddressesOr(const WaitMulipleContainer &waitMultipleOnAddress)
|
|
{
|
|
return WaitForMultipleAddressesOrEx(waitMultipleOnAddress, 0);
|
|
}
|
|
|
|
AUKN_SYM bool WaitForMultipleAddressesAnd(const WaitMulipleContainer &waitMultipleOnAddress)
|
|
{
|
|
auto uCount = waitMultipleOnAddress.waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = waitMultipleOnAddress.waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
pCurrent->bOldIgnore = current.bIgnoreCurrentFlag;
|
|
pCurrent->uOldStateChangedCounter = current.uHasStateChangedCounter;
|
|
pCurrent->uMinTrigger = uCount;
|
|
pCurrent->uCounter = 0;
|
|
}
|
|
|
|
bool bFoundNotTriggered {}, bTimeout {}, bStatus {};
|
|
do
|
|
{
|
|
bool bRet = WaitForMultipleAddressesOrEx(waitMultipleOnAddress, uCount);
|
|
|
|
bFoundNotTriggered = false;
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
if (current.bIgnoreCurrentFlag)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
bool bTriggered = current.uHasStateChangedCounter - pCurrent->uOldStateChangedCounter;
|
|
|
|
if (bTriggered)
|
|
{
|
|
current.bIgnoreCurrentFlag = true;
|
|
}
|
|
else
|
|
{
|
|
bFoundNotTriggered = true;
|
|
}
|
|
}
|
|
|
|
if (waitMultipleOnAddress.qwNanoseconds && !bRet)
|
|
{
|
|
bTimeout = true;
|
|
break;
|
|
}
|
|
}
|
|
while (bFoundNotTriggered);
|
|
|
|
if (bTimeout)
|
|
{
|
|
bStatus = false;
|
|
}
|
|
else
|
|
{
|
|
bStatus = !bFoundNotTriggered;
|
|
}
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
|
|
current.bIgnoreCurrentFlag = pCurrent->bOldIgnore;
|
|
|
|
if (current.bIgnoreCurrentFlag)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
current.uHasStateChangedCounter = pCurrent->uOldStateChangedCounter + (bStatus ? 1 : 0);
|
|
}
|
|
|
|
return bStatus;
|
|
}
|
|
|
|
AUKN_SYM bool WaitForMultipleAddressesOrWithIO(const WaitMulipleContainerWithIO &waitMultipleOnAddress)
|
|
{
|
|
AuLoop::WaitForMultipleLoopSourcesParameters parameters;
|
|
AuList<AuSPtr<AuLoop::ILoopSource>> loopSourceVec;
|
|
AuList<AuLoop::WaitForMultipleLoopSourcesInOutOpt> outVec;
|
|
bool bResult {}, bAny {}, bSleepStatus {};
|
|
AuUInt32 uTicks {};
|
|
WaitEntry *pWaitEntryMain {}, *pWaitEntryAux {};
|
|
|
|
SysAssertDbg(!IsWaitOnRecommended(), "WoA not in emulation mode");
|
|
|
|
auto uCountOfIO = waitMultipleOnAddress.lsArray.Count<WaitMultipleIOTrigger>();
|
|
auto pBaseOfIO = waitMultipleOnAddress.lsArray.Begin<WaitMultipleIOTrigger>();
|
|
|
|
if (waitMultipleOnAddress.pTriggeredIndex)
|
|
{
|
|
*waitMultipleOnAddress.pTriggeredIndex = AuUInt32(-1);
|
|
}
|
|
|
|
if (!AuTryResize(loopSourceVec, uCountOfIO + 1))
|
|
{
|
|
SysPushErrorMemory();
|
|
return false;
|
|
}
|
|
|
|
if (!AuTryResize(outVec, uCountOfIO + 1))
|
|
{
|
|
SysPushErrorMemory();
|
|
return false;
|
|
}
|
|
|
|
for (AU_ITERATE_N(i, uCountOfIO))
|
|
{
|
|
loopSourceVec[i + 1] = pBaseOfIO[i].pLoopSource;
|
|
outVec [i + 1].bSkip = pBaseOfIO[i].bIgnoreCurrentFlag;
|
|
}
|
|
|
|
auto uCount = waitMultipleOnAddress.waitArray.Count<WaitMultipleEntry>();
|
|
auto pBase = waitMultipleOnAddress.waitArray.Begin<WaitMultipleEntry>();
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
auto pTempHoldMe = tlsWaitEntry;
|
|
auto pHead = pTempHoldMe.get();
|
|
#else
|
|
auto pHead = &tlsWaitEntry;
|
|
#endif
|
|
|
|
if (!pHead->pSemaphore)
|
|
{
|
|
pHead->pSemaphore = AuLoop::NewLSSemaphoreSlow(0u);
|
|
}
|
|
|
|
pHead->bSemaphoreActive = true;
|
|
|
|
loopSourceVec[0] = pHead->pSemaphore;
|
|
outVec [0].bSkip = false;
|
|
|
|
parameters.bIsVectorShared = true;
|
|
parameters.uFlags = AuLoop::kWaitMultipleFlagAny;
|
|
|
|
if (waitMultipleOnAddress.bWaitOnlyOne)
|
|
{
|
|
parameters.uFlags |= AuLoop::kWaitMultipleFlagBreakAfterOne;
|
|
}
|
|
|
|
parameters.vecArray = loopSourceVec;
|
|
parameters.vecArray2 = outVec;
|
|
|
|
do
|
|
{
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
auto &state = pCurrent->state;
|
|
|
|
if (current.bIgnoreCurrentFlag)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
pCurrent->pBefore = nullptr;
|
|
pCurrent->pNext = nullptr;
|
|
pCurrent->uMinTrigger = 0;
|
|
pCurrent->uCounter = 0;
|
|
|
|
pWaitEntryAux = gProcessWaitables.WaitBufferFrom2(current.pTargetAddress, current.uSize, current.pCompareAddress, current.eMethod, pCurrent, &waitMultipleOnAddress);
|
|
if (!pWaitEntryAux)
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
pWaitEntryMain = pWaitEntryAux;
|
|
}
|
|
|
|
state.qwNanosecondsAbs = waitMultipleOnAddress.qwNanoseconds;
|
|
bAny = true;
|
|
}
|
|
|
|
if (!bAny && !uCountOfIO)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (!pWaitEntryAux)
|
|
{
|
|
bSleepStatus = false;
|
|
}
|
|
else if (!AuAtomicLoad(&pWaitEntryAux->bAlive))
|
|
{
|
|
bSleepStatus = false;
|
|
}
|
|
else
|
|
{
|
|
auto uNow = AuTime::SteadyClockNS();
|
|
bool bTimeOutEarly {};
|
|
|
|
if (!waitMultipleOnAddress.qwNanoseconds)
|
|
{
|
|
parameters.optTimeoutMS = {};
|
|
}
|
|
else if (uNow >= waitMultipleOnAddress.qwNanoseconds)
|
|
{
|
|
if (uTicks)
|
|
{
|
|
bTimeOutEarly = true;
|
|
}
|
|
else
|
|
{
|
|
parameters.optTimeoutMS = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
parameters.optTimeoutMS = AuNSToMS<AuUInt32>(waitMultipleOnAddress.qwNanoseconds - uNow);
|
|
}
|
|
|
|
if (bTimeOutEarly)
|
|
{
|
|
bSleepStatus = false;
|
|
}
|
|
else
|
|
{
|
|
bSleepStatus = AuLoop::WaitMultipleLoopSources2(parameters);
|
|
}
|
|
}
|
|
|
|
for (AU_ITERATE_N(i, uCount))
|
|
{
|
|
auto ¤t = pBase[i];
|
|
auto pCurrent = AuReinterpretCast<MultipleInternalContext>(pBase[i].internalContext);
|
|
auto &state = pCurrent->state;
|
|
|
|
if (current.bIgnoreCurrentFlag)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (!WaitBuffer::Compare(current.pTargetAddress, current.uSize, current.pCompareAddress, kMax64, current.eMethod))
|
|
{
|
|
current.uHasStateChangedCounter++;
|
|
bResult = true;
|
|
}
|
|
|
|
gProcessWaitables.RemoveSelf(current.pTargetAddress, pWaitEntryMain);
|
|
}
|
|
|
|
for (AU_ITERATE_N(i, uCountOfIO))
|
|
{
|
|
auto &refTriggered = outVec[i + 1].uTriggered;
|
|
|
|
if (refTriggered)
|
|
{
|
|
pBaseOfIO[i].uHasStateChangedCounter += refTriggered;
|
|
refTriggered = 0;
|
|
bResult = true;
|
|
|
|
if (waitMultipleOnAddress.pTriggeredIndex)
|
|
{
|
|
*waitMultipleOnAddress.pTriggeredIndex = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
uTicks++;
|
|
}
|
|
while (!bResult && (!waitMultipleOnAddress.qwNanoseconds || bSleepStatus));
|
|
|
|
pHead->bSemaphoreActive = false;
|
|
|
|
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
|
|
pTempHoldMe.reset();
|
|
#endif
|
|
|
|
return bResult;
|
|
}
|
|
} |