AuroraRuntime/Source/Threading/AuWakeOnAddress.cpp
J Reece Wilson 67894b399b [*] Revert clang 'optimization' because this piece of shit compiler wont listen to me.
Even worse, im just going to fucking nuke all clang related checks from orbit in our global build_scripts (8b00dc69fceea62ecbbf5a21255a41e2f23921a4), because they admit they cause a 2x slowdown.
2024-05-13 23:43:19 +01:00

1447 lines
50 KiB
C++

/***
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuWakeOnAddress.cpp
Date: 2023-3-10
Author: Reece
***/
#if defined(AURORA_COMPILER_MSVC)
#pragma strict_gs_check(off)
#pragma check_stack(off)
#endif
#include <Source/RuntimeInternal.hpp>
#include "AuWakeOnAddress.hpp"
#include "Primitives/SMTYield.hpp"
#include <Time/Time.hpp>
#define HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD
// WOA_ALWAYS_DUMB_OS_TARGET -> iOS, notarized MacOS, Win9x, Xbox 360, etc
namespace Aurora::Threading
{
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
static thread_local AuSPtr<WaitEntry> tlsWaitEntry = AuMakeSharedPanic<WaitEntry>();
#else
static thread_local WaitEntry tlsWaitEntry;
#endif
#define DO_OF_METHOD_TYPE(preface, DoOfMethodType, ...) \
switch (eMethod) \
{ \
case EWaitMethod::eNotEqual: \
preface DoOfMethodType<EWaitMethod::eNotEqual>(__VA_ARGS__); \
break; \
case EWaitMethod::eEqual: \
preface DoOfMethodType<EWaitMethod::eEqual>(__VA_ARGS__); \
break; \
case EWaitMethod::eGreaterThanCompare: \
preface DoOfMethodType<EWaitMethod::eGreaterThanCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eGreaterThanOrEqualsCompare: \
preface DoOfMethodType<EWaitMethod::eGreaterThanOrEqualsCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eLessThanCompare: \
preface DoOfMethodType<EWaitMethod::eLessThanCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eLessThanOrEqualsCompare: \
preface DoOfMethodType<EWaitMethod::eLessThanOrEqualsCompare>(__VA_ARGS__); \
break; \
}
static ProcessWaitContainer gProcessWaitables;
static const int gShouldSpinOnlyInCPU = 1; // TODO: havent decided
// UPDATE: 1 paranoia just in case we get preempted (rare).
template<typename T>
static void DoSpinLockOnVar(T *uPointer)
{
if (gShouldSpinOnlyInCPU == 0)
{
while (!Primitives::DoTryIfAlderLake([&]()
{
return AuAtomicTestAndSet(uPointer, 0) == 0;
}, uPointer))
{
}
}
else if (gShouldSpinOnlyInCPU == 1)
{
while (!Primitives::DoTryIfAlderLake([&]()
{
return AuAtomicTestAndSet(uPointer, 0) == 0;
}, uPointer))
{
ContextYield();
}
}
else if (gShouldSpinOnlyInCPU == 2)
{
while (AuAtomicTestAndSet(uPointer, 0))
{
while (*uPointer)
{
ContextYield();
}
}
}
else
{
SysUnreachable();
}
}
void WaitEntry::Release()
{
AuResetMember(this->uSize);
AuResetMember(this->pAddress);
}
WaitEntry::WaitEntry()
{
}
WaitEntry::~WaitEntry()
{
this->Release();
}
template <EWaitMethod eMethod>
bool WaitEntry::SleepOn(WaitState &state)
{
#if !defined(WOA_SEMAPHORE_MODE)
AU_LOCK_GUARD(this->mutex);
#endif
if (state.qwNanosecondsAbs)
{
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
auto uNow = AuTime::SteadyClockNS();
auto uEndTime = state.qwNanosecondsAbs.value();
while (uNow < uEndTime)
{
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
#if defined(AURORA_PLATFORM_WIN32)
Win32DropSchedulerResolution();
#endif
if (!this->bAlive)
{
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Unlock();
#endif
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2, eMethod);
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Lock();
#endif
}
else
{
#if defined(WOA_SEMAPHORE_MODE)
this->semaphore->LockAbsNS(uEndTime);
#else
auto uTimeRemNS = uEndTime - uNow;
this->variable.WaitForSignalNsEx(&this->mutex, uTimeRemNS, false);
#endif
}
uNow = AuTime::SteadyClockNS();
}
return !WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask);
}
else
{
while (WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
if (!this->bAlive)
{
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Unlock();
#endif
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2, eMethod);
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Lock();
#endif
}
else
{
#if defined(WOA_SEMAPHORE_MODE)
this->semaphore->Lock();
#else
this->variable.WaitForSignalNsEx(&this->mutex, 0, false);
#endif
}
}
return true;
}
return false;
}
bool WaitEntry::TrySignalAddress(const void *pAddress)
{
if (this->pAddress != pAddress)
{
return false;
}
if (this->pCompareAddress)
{
if (WaitBuffer::Compare(pAddress, this->uSize, this->pCompareAddress, kMax64, this->eWaitMethod))
{
return false;
}
}
#if defined(WOA_SEMAPHORE_MODE)
this->semaphore->Unlock(1);
#else
this->variable.Signal();
#endif
return true;
}
WaitBuffer WaitBuffer::From(const void *pBuf, AuUInt8 uSize)
{
WaitBuffer wait;
AuMemcpy(wait.buffer, pBuf, uSize);
wait.uSize = uSize;
return AuMove(wait);
}
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, WaitState &state)
{
auto eMethod = state.eWaitMethod;
return WaitBuffer::Compare(pHotAddress, uSize, state.compare.buffer, state.uDownsizeMask, eMethod);
}
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, const void *pCompare, AuUInt64 uMask, EWaitMethod eMethod)
{
bool bRet {};
AURORA_COMPILER_VOLATILE_BARRIER();
#if 0
switch (eMethod)
{
case EWaitMethod::eEqual:
case EWaitMethod::eNotEqual:
{
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(pHotAddress);
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(pCompare);
bRet = (uSrcWord & uMask) == (uCmpWord & uMask);
bRet ^= bool(eMethod == EWaitMethod::eEqual);
break;
};
default:
{
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
}
}
#else
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
#endif
return bRet;
}
template <EWaitMethod eMethod, bool bFast>
bool WaitBuffer::Compare2(const void *pHot, AuUInt8 uSize, const void *pBuf2, AuUInt64 uMask)
{
AURORA_COMPILER_VOLATILE_BARRIER();
if constexpr (!bFast)
{
if constexpr (eMethod == EWaitMethod::eNotEqual)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask);
case 2:
return (AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask);
case 4:
return (AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask);
case 8:
return (AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask);
default:
return (AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eEqual)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask));
default:
return !(AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) > (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) > (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) > (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) > (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) >= (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) >= (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) >= (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) >= (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) < (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) < (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) < (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) < (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) <= (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) <= (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) <= (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) <= (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
}
else
{
if constexpr (eMethod == EWaitMethod::eNotEqual)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0));
case 2:
return (AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0));
case 4:
return (AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0));
case 8:
return (AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0));
default:
return (AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eEqual)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0)));
default:
return !(AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) > (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) > (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) > (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) > (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) >= (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) >= (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) >= (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) >= (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) < (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) < (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) < (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) < (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) <= (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) <= (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) <= (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) <= (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
}
return false;
}
WaitEntry *ProcessWaitNodeContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress, EWaitMethod eWaitMethod)
{
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
auto pReturn = tlsWaitEntry.get();
#else
auto pReturn = &tlsWaitEntry;
#endif
pReturn->pAddress = pAddress;
pReturn->uSize = uSize;
pReturn->pCompareAddress = pCompareAddress;
pReturn->eWaitMethod = eWaitMethod;
if (bScheduleFirst /*First in, First Out*/)
{
Lock();
if (!pReturn->bAlive)
{
pReturn->bAlive = true;
if (auto pLoadFromMemory = this->waitList.pHead)
{
pLoadFromMemory->pBefore = pReturn;
pReturn->pNext = pLoadFromMemory;
}
else
{
this->waitList.pTail = pReturn;
}
this->waitList.pHead = pReturn;
}
Unlock();
}
else /*Last In, First Out*/
{
Lock();
if (!pReturn->bAlive)
{
pReturn->bAlive = true;
if (auto pLoadFromMemory = this->waitList.pTail)
{
pLoadFromMemory->pNext = pReturn;
pReturn->pBefore = pLoadFromMemory;
}
else
{
this->waitList.pHead = pReturn;
}
this->waitList.pTail = pReturn;
}
Unlock();
}
return pReturn;
}
template <typename T>
bool ProcessWaitNodeContainer::IterateWake(T callback)
{
bool bRetStatus { true };
if (AuAtomicLoad((AuUInt *)&this->waitList.pTail) == 0)
{
return true;
}
Lock();
{
// FIFO
auto pCurrentHead = this->waitList.pTail;
while (pCurrentHead)
{
decltype(pCurrentHead) pBefore {};
#if !defined(WOA_SEMAPHORE_MODE)
// Insertion barrier
{
AU_LOCK_GUARD(pCurrentHead->mutex);
}
#endif
auto [bCont, bRemove] = callback(*pCurrentHead);
pBefore = pCurrentHead->pBefore;
if (bRemove)
{
this->RemoveEntry(pCurrentHead, true);
}
if (!bCont)
{
bRetStatus = false;
break;
}
if (pBefore == pCurrentHead)
{
break;
}
pCurrentHead = pBefore;
}
}
Unlock();
return bRetStatus;
}
void ProcessWaitNodeContainer::RemoveEntry(WaitEntry *pEntry,
bool bAllUnderLock)
{
if (this->waitList.pHead == pEntry)
{
this->waitList.pHead = pEntry->pNext;
}
if (this->waitList.pTail == pEntry)
{
this->waitList.pTail = pEntry->pBefore;
}
if (pEntry->pBefore)
{
pEntry->pBefore->pNext = pEntry->pNext;
}
if (pEntry->pNext)
{
pEntry->pNext->pBefore = pEntry->pBefore;
}
if (bAllUnderLock)
{
pEntry->pBefore = nullptr;
pEntry->pNext = nullptr;
pEntry->bAlive = false;
}
}
void ProcessWaitNodeContainer::RemoveSelf(WaitEntry *pSelf)
{
{
this->Lock();
this->RemoveEntry(pSelf, false);
this->Unlock();
}
pSelf->pBefore = nullptr;
pSelf->pNext = nullptr;
pSelf->bAlive = false;
}
void ProcessWaitNodeContainer::Lock()
{
DoSpinLockOnVar(&this->uAtomic);
}
void ProcessWaitNodeContainer::Unlock()
{
this->uAtomic = 0;
}
#define AddressToIndex AuHashCode(pAddress) & (AuArraySize(this->list) - 1)
WaitEntry *ProcessWaitContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress, EWaitMethod eWaitMethod)
{
return this->list[AddressToIndex].WaitBufferFrom(pAddress, uSize, bScheduleFirst, pCompareAddress, eWaitMethod);
}
template <typename T>
bool ProcessWaitContainer::IterateWake(const void *pAddress, T callback)
{
return this->list[AddressToIndex].IterateWake(callback);
}
void ProcessWaitContainer::RemoveSelf(const void *pAddress, WaitEntry *pSelf)
{
return this->list[AddressToIndex].RemoveSelf(pSelf);
}
bool IsNativeWaitOnSupported()
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return pWaitOnAddress &&
AuSwInfo::IsWindows8Point1OrGreater();
#elif defined(AURORA_PLATFORM_LINUX)
return true;
#else
return SysNativeWaitOnAddressFutexSupported();
#endif
}
AUKN_SYM bool IsWaitOnRecommended()
{
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
return false;
#endif
static AuOptionalEx<bool> gIsWaitOnRecommendedCache {};
if (gIsWaitOnRecommendedCache)
{
return gIsWaitOnRecommendedCache.value();
}
if (Primitives::ThrdCfg::gPreferEmulatedWakeOnAddress)
{
return false;
}
bool bState = IsNativeWaitOnSupported();
gIsWaitOnRecommendedCache = bState;
return bState;
}
/// @deprecated
AUKN_SYM const AuList<AuUInt8> &GetValidWordSizes()
{
static const AuList<AuUInt8> kArray =
#if defined(AURORA_IS_MODERNNT_DERIVED)
{ 1, 2, 4, 8 };
#else
{ 4 };
#endif
return kArray;
}
template <EWaitMethod T>
bool WaitOnAddressWide(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuOptional<AuUInt64> qwNanoseconds,
AuOptional<AuUInt64> qwNanosecondsAbs,
bool bOSSupportsWait,
const void *pCompareAddress2)
{
WaitState state;
SysAssertDbg(uWordSize <= 32);
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize, true, pCompareAddress2, T);
// Unlocked update to a safer comparison address; hardens against bad code
{
state.compare = WaitBuffer::From(pCompareAddress, uWordSize);
// Replace from pCompareAddress2 to our own memory to harden against bad volatile comparison pointers
pWaitEntry->pCompareAddress = state.pCompare2 =
pCompareAddress2 ? state.compare.buffer : nullptr;
}
if (qwNanoseconds)
{
state.qwNanosecondsAbs = AuTime::SteadyClockNS() + qwNanoseconds.value();
}
else if (qwNanosecondsAbs)
{
state.qwNanosecondsAbs = qwNanosecondsAbs.value();
}
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
auto pTempHoldMe = tlsWaitEntry;
#endif
auto bResult = pWaitEntry->SleepOn<T>(state);
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
pTempHoldMe.reset();
#endif
if (!bResult)
{
gProcessWaitables.RemoveSelf(pTargetAddress, pWaitEntry);
}
return bResult;
}
AuTuple<const void *, AuUInt8, AuUInt64> DecodeAddress(const void *pAddress,
AuUInt32 uWordSize)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return AuMakeTuple(pAddress, 0, kMax64);
#endif
auto pRounded = AuPageRound(AuUInt(pAddress), AuUInt(4));
auto uDelta = (AuUInt)pAddress - pRounded;
if (uWordSize == 8)
{
return AuMakeTuple((const void *)pRounded, uDelta, kMax64);
}
AuUInt32 uSizeMask = (1ull << (uWordSize * 8)) - 1ull;
switch (uDelta)
{
case 0:
return AuMakeTuple(pAddress, 0, 0xFFFFFFFF & (uSizeMask << 0));
case 1:
return AuMakeTuple(pAddress, 1, 0xFFFFFF00 & (uSizeMask << 8));
case 2:
return AuMakeTuple(pAddress, 2, 0xFFFF0000 & (uSizeMask << 16));
case 3:
return AuMakeTuple(pAddress, 3, 0xFF000000 & (uSizeMask << 24));
default:
SysPanic("Invalid Branch");
}
}
static bool RunOSWaitOnAddressTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 uAbsTimeSteadyClock,
AuUInt64 uRelativeNanoseconds,
AuOptional<AuUInt64> uAbsTimeAltClock /* hint */,
bool bSpun = false)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
if (pRtlWaitOnAddress)
{
AuUInt64 uNow {};
while (uAbsTimeSteadyClock ?
(uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS())) :
true)
{
LARGE_INTEGER word {};
if (uAbsTimeAltClock)
{
word.QuadPart = AuTime::ConvertTimestampNs(uAbsTimeAltClock.value());
}
else if (uAbsTimeSteadyClock)
{
if (uAbsTimeSteadyClock <= uNow)
{
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
if (!word.QuadPart)
{
word.QuadPart = 1;
}
}
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
if (pRtlWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize, &word))
{
return true;
}
else if (!uAbsTimeSteadyClock)
{
return false;
}
}
else
{
return true;
}
}
return false;
}
else
{
// ~~some paths might miss the uRelativeNanoseconds, like cas loops.~~
// most paths will now skimp on the relative values
if (uAbsTimeSteadyClock && !uRelativeNanoseconds)
{
AuInt64 iDelta = uAbsTimeSteadyClock;
iDelta -= AuTime::SteadyClockNS();
if (iDelta <= 0)
{
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}
uRelativeNanoseconds = iDelta;
}
auto uMaxSwitches = gRuntimeConfig.threadingConfig.uUWPNanosecondEmulationMaxYields;
auto bUWPNanosecondEmulationCheckFirst = Primitives::ThrdCfg::gUWPNanosecondEmulationCheckFirst;
// LockN(<1MS) on a platform without that resolution of yielding... damn
auto uMS = AuNSToMS<AuUInt32>(uRelativeNanoseconds);
if (!uMS)
{
// first: cpu spin to avoid the kernel all together
if (!bSpun)
{
if (TryWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize))
{
return true;
}
}
// second: yield
unsigned uLimit {};
do
{
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
break;
}
AuThreading::ContextYield();
if (bUWPNanosecondEmulationCheckFirst)
{
if (uLimit++ > uMaxSwitches)
{
break;
}
}
}
while (uAbsTimeSteadyClock > AuTime::SteadyClockNS()); // ...until times up
}
else // high level lock function was called with ms scale resolution
{
// first: wait on the address with an ms scale timeout
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
// take a copy
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
// never trust the error value/status provided by wait addresses - instead, do a quick compare
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
// best case: we woke up during the ms-res waitonaddress
return true;
}
// attempt to yield again, potentially context switching a few times to hit any NS remainder
AuUInt64 uNow {};
unsigned uLimit {};
while (uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS()))
{
uMS = AuNSToMS<AuUInt32>(uAbsTimeSteadyClock - uNow);
if (Primitives::DoTryIfAlderLake([&]()
{
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}, pTargetAddress))
{
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
return true;
}
if (!uMS)
{
// burn off any remainder cycles by switching contexts (this isnt a very long time usually)
if (uLimit++ < uMaxSwitches)
{
AuThreading::ContextYield();
}
else
{
// do not burn the cpu to meet the timeout. we'll just undershoot.
return false;
}
}
else
{
(void)pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, uMS);
}
}
}
}
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
#else
return SysWaitOnAddressTimed(pTargetAddress,
pCompareAddress,
uWordSize,
uAbsTimeSteadyClock,
uRelativeNanoseconds,
uAbsTimeAltClock,
bSpun);
#endif
}
static void RunOSWaitOnAddressNoTimedNoErrors(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
{
while (WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
if (!SysWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
{
//AuThreading::ContextYield();
}
}
}
static bool RunOSWaitOnAddressTimedSteady(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state,
bool bSpun = false)
{
#if 1
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return true;
}
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask);
#else
return RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
#endif
}
template <EWaitMethod T>
static void RunOSWaitOnAddressEQNoTimedNoErrors(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
{
while (true)
{
WaitBuffer wb = WaitBuffer::From(pTargetAddress, state.uWordSize);
if (!WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return;
}
(void)SysWaitOnAddressNoTimed(pTargetAddress, wb.buffer, state.uWordSize);
if (WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
SysWakeOneOnAddress(pTargetAddress);
}
else
{
return;
}
}
}
template <EWaitMethod T>
static bool RunOSWaitOnAddressEQTimedSteady(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state,
bool bSpun = false)
{
while (true)
{
WaitBuffer wb = WaitBuffer::From(pTargetAddress, state.uWordSize);
if (!WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return true;
}
bool bResult = RunOSWaitOnAddressTimed(pTargetAddress, wb.buffer, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
if (WaitBuffer::Compare2<T, kPlatformFutexNoForcedAlignedU32>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
SysWakeOneOnAddress(pTargetAddress);
if (!bResult)
{
return false;
}
}
else
{
return true;
}
}
}
// Windows 8+ thread primitives might use me instead of the public API
// it does work on Linux and Windows 8+
// it does not, however, work on emulated platforms
// this is intentional
bool InternalLTSWaitOnAddressHighRes(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanosecondsAbs)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
if (!qwNanosecondsAbs)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanosecondsAbs;
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, true);
}
}
void InternalLTSWakeAll(const void *pTargetAddress)
{
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
WakeAllOnAddress(pTargetAddress);
#else
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
SysWakeAllOnAddress(pWakeAddress);
#endif
}
void InternalLTSWakeOne(const void *pTargetAddress)
{
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
WakeOnAddress(pTargetAddress);
#else
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
if (uDelta)
{
SysWakeAllOnAddress(pWakeAddress);
}
else
{
SysWakeNOnAddress(pWakeAddress, 1);
}
#endif
}
void InternalLTSWakeCount(const void *pTargetAddress, AuUInt32 uCount)
{
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
WakeNOnAddress(pTargetAddress, uCount);
#else
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
if (uDelta)
{
SysWakeAllOnAddress(pWakeAddress);
}
else
{
SysWakeNOnAddress(pWakeAddress, uCount);
}
#endif
}
WOAFASTPUB bool WaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
{
return true;
}
return WaitOnAddressSteady(pTargetAddress,
pCompareAddress,
uWordSize,
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
optAlreadySpun);
}
WOAFASTPUB bool WaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
{
return true;
}
return WaitOnAddressSpecialSteady(eMethod,
pTargetAddress,
pCompareAddress,
uWordSize,
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
optAlreadySpun);
}
template <EWaitMethod T>
auline bool TryWaitOnAddressSpecialTmpl(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
return Primitives::DoTryIfAlderLake([&]()
{
return !WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress);
}, pTargetAddress);
}
WOAFASTPUB bool TryWaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
return TryWaitOnAddressSpecialTmpl<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize);
}
WOAFASTPUB bool TryWaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialTmpl, pTargetAddress, pCompareAddress, uWordSize);
return false;
}
WOAFASTPUB bool TryWaitOnAddressEx(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
if (!check)
{
return TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize);
}
return Primitives::DoTryIfAlderLake([&]()
{
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
return false;
}
return check(pTargetAddress, pCompareAddress, uWordSize);
}, pTargetAddress);
}
template <EWaitMethod T>
bool TryWaitOnAddressSpecialExTmpl(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
return Primitives::DoTryIfAlderLake([&]()
{
if (WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress))
{
return false;
}
return check(pTargetAddress, pCompareAddress, uWordSize);
}, pTargetAddress);
}
WOAFASTPUB bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
if (!check)
{
return TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize);
}
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialExTmpl, pTargetAddress, pCompareAddress, uWordSize, check);
return false;
}
WOAFASTPUB void WakeNOnAddress(const void *pTargetAddress,
AuUInt8 uNMaximumThreads)
{
if (IsWaitOnRecommended())
{
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
if (uDelta)
{
SysWakeAllOnAddress(pWakeAddress);
}
else
{
SysWakeNOnAddress(pWakeAddress, uNMaximumThreads);
}
}
else
{
(void)gProcessWaitables.IterateWake(pTargetAddress, [&](WaitEntry &entry) -> AuPair<bool, bool>
{
if (!uNMaximumThreads)
{
return AuMakePair(false, false);
}
bool bWake {};
if (entry.TrySignalAddress(pTargetAddress))
{
bWake = true;
uNMaximumThreads--;
}
bool bCont = uNMaximumThreads != 0;
return AuMakePair(bCont, bWake);
});
}
}
WOAFASTPUB void WakeOnAddress(const void *pTargetAddress)
{
WakeNOnAddress(pTargetAddress, 1);
}
WOAFASTPUB void WakeAllOnAddress(const void *pTargetAddress)
{
if (IsWaitOnRecommended())
{
auto [pWakeAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, 1);
SysWakeAllOnAddress(pWakeAddress);
}
else
{
(void)gProcessWaitables.IterateWake(pTargetAddress, [&](WaitEntry &entry) -> AuPair<bool, bool>
{
return AuMakePair(true, entry.TrySignalAddress(pTargetAddress));
});
}
}
WOAFASTPUB bool WaitOnAddressSteady(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid emulated path dynamic TLS fetch without TLS section
// or various security checks
// or other such bloated thunks
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
{
return true;
}
bool bWaitOnAddress = IsWaitOnRecommended();
if (bWaitOnAddress)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
bool bSpun {};
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
bSpun = true;
}
if (!qwNanoseconds)
{
RunOSWaitOnAddressNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds;
return RunOSWaitOnAddressTimedSteady(pWaitAddress, pCompareAddress2, state, bSpun);
}
}
else
{
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
}
return WaitOnAddressWide<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64>{}, false, nullptr);
}
return false;
}
WOAFASTPUB bool WaitOnAddressSpecialSteady(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid emulated path dynamic TLS fetch without TLS section
// or various security checks
// or other such bloated thunks
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
{
return true;
}
bool bWaitOnAddress = IsWaitOnRecommended();
if (bWaitOnAddress)
{
auto [pWaitAddress, uDelta, uMask] = DecodeAddress(pTargetAddress, uWordSize);
auto pCompareAddress2 = AuReinterpretCast<const char *>(pCompareAddress) - uDelta;
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
state.pCompare2 = pCompareAddress;
state.eWaitMethod = eMethod;
bool bSpun {};
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
bSpun = true;
}
if (!qwNanoseconds)
{
DO_OF_METHOD_TYPE(, RunOSWaitOnAddressEQNoTimedNoErrors, pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds;
DO_OF_METHOD_TYPE(return, RunOSWaitOnAddressEQTimedSteady, pWaitAddress, pCompareAddress2, state, bSpun);
}
}
else
{
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
}
DO_OF_METHOD_TYPE(return, WaitOnAddressWide, pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64> {}, false, pCompareAddress);
}
return false;
}
}