[+] AuThreading::EWaitMethod

[+] AuThreading::TryWaitOnAddressSpecial
[+] AuThreading::TryWaitOnAddressSpecialEx
[+] AuThreading::WaitOnAddressSpecial
[+] AuThreading::WaitOnAddressSpecialSteady
This commit is contained in:
Reece Wilson 2024-03-12 22:50:22 +00:00
parent 2ba5ae6fa5
commit d14ba6cfd4
8 changed files with 625 additions and 275 deletions

View File

@ -161,17 +161,17 @@ namespace Aurora::Threading::Waitables
{
const AuUInt32 kRef { uValue };
while (!TryWaitOnAddressUntilEqual((const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
while (!TryWaitOnAddressSpecial(EWaitMethod::eEqual, (const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
{
bool bStatus {};
AuAtomicAdd(&this->uAtomicSleeping, 1u);
bStatus = WaitOnAddressUntilEqualSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs.ValueOr(0), true);
bStatus = WaitOnAddressSpecialSteady(EWaitMethod::eEqual, (const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs.ValueOr(0), true);
AuAtomicSub(&this->uAtomicSleeping, 1u);
if (!bStatus)
{
return TryWaitOnAddressUntilEqual((const void *)&this->uAtomicState, &kRef, sizeof(kRef));
return TryWaitOnAddressSpecial(EWaitMethod::eEqual, (const void *)&this->uAtomicState, &kRef, sizeof(kRef));
}
}
@ -185,10 +185,10 @@ namespace Aurora::Threading::Waitables
AuUInt32 uState {};
bool bStatus { true };
while (!TryWaitOnAddressUntilEqual((const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
while (!TryWaitOnAddressSpecial(EWaitMethod::eEqual, (const void *)&this->uAtomicState, &kRef, sizeof(kRef)))
{
AuAtomicAdd(&this->uAtomicSleeping, 1u);
bStatus = WaitOnAddressUntilEqualSteady((const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs.ValueOr(0), true);
bStatus = WaitOnAddressSpecialSteady(EWaitMethod::eEqual, (const void *)&this->uAtomicState, &kRef, sizeof(kRef), qwTimeoutAbs.ValueOr(0), true);
AuAtomicSub(&this->uAtomicSleeping, 1u);
if (!bStatus)
@ -213,16 +213,37 @@ namespace Aurora::Threading::Waitables
return false;
}
inline bool LockUntilAtleastAbsNS(AuUInt32 uValue,
AuOptional<AuUInt64> qwTimeoutAbs)
{
AuUInt32 uState {};
bool bStatus { true };
while (!TryWaitOnAddressSpecial(EWaitMethod::eGreaterThanOrEqualsCompare, (const void *)&this->uAtomicState, &uValue, sizeof(uValue)))
{
AuAtomicAdd(&this->uAtomicSleeping, 1u);
bStatus = WaitOnAddressSpecialSteady(EWaitMethod::eGreaterThanOrEqualsCompare, (const void *)&this->uAtomicState, &uValue, sizeof(uValue), qwTimeoutAbs.ValueOr(0), true);
AuAtomicSub(&this->uAtomicSleeping, 1u);
if (!bStatus)
{
return false;
}
}
return true;
}
inline bool AcquireUntilAtleastAbsNS(AuUInt32 uValue,
AuOptional<AuUInt64> qwTimeoutAbs)
{
AuUInt32 uState {};
bool bStatus { true };
while ((uState = AuAtomicLoad(&this->uAtomicState) < uValue))
while (!TryWaitOnAddressSpecial(EWaitMethod::eGreaterThanOrEqualsCompare, (const void *)&this->uAtomicState, &uValue, sizeof(uValue)))
{
AuAtomicAdd(&this->uAtomicSleeping, 1u);
bStatus = WaitOnAddressSteady((const void *)&this->uAtomicState, &uState, sizeof(uState), qwTimeoutAbs.ValueOr(0), true);
bStatus = WaitOnAddressSpecialSteady(EWaitMethod::eGreaterThanOrEqualsCompare, (const void *)&this->uAtomicState, &uValue, sizeof(uValue), qwTimeoutAbs.ValueOr(0), true);
AuAtomicSub(&this->uAtomicSleeping, 1u);
if (!bStatus)
@ -236,14 +257,13 @@ namespace Aurora::Threading::Waitables
return false;
}
do
while ((uState = AuAtomicLoad(&this->uAtomicState) >= uValue))
{
if (AuAtomicCompareExchange(&this->uAtomicState, uState - uValue, uState) == uState)
{
return true;
}
}
while ((uState = AuAtomicLoad(&this->uAtomicState) >= uValue));
return false;
}

View File

@ -12,12 +12,13 @@
2: uWordSize must be less than or equal to 8 bytes
3: only the least significant 32bits are guaranteed to be used as wake signals
in either mode:
1: WaitOnAddress[...] can wake at any-time if a fast path permits.
(we only care about strict guarantees during the deep slow-path yield operation.
after the first pass, after a cache miss, after a fast path succeeds,
1: WaitOnAddress[...] can wake at any-time if a fast-path permits.
(we only care about strict order guarantees during the deep slow-path yield operation.
after the first test, after an exchange miss, after a fast path succeeds,
it's anybodys guess who will *return* first. on the other hand, a set of 5 threads
already in the kernel *should* wake in the expected order. otherwise, WaitOnAddress[...]
just assumes pTargetAddress != pCompareAddress is an orderless return condition.)
already in the kernel *should* wake in the expected order. otherwise, the WaitOnAddress[...]
functions merely assumes the comparison operation is an orderless return condition.)
1 cont: This extends to correcting spurious wakeups. If the condition is lost, we will not return.
* By default: UNIXes and targets below/inc Windows 7 will be in userland emulation mode for performance reasons.
* Linux and other targets can directly interface with their futex interface under a smaller wrapper;
@ -29,16 +30,22 @@
* are written with OS specific optimizations in mind, and therefore consider emulation bloat.
* bPreferEmulatedWakeOnAddress disables the emulation layer, if theres a reasonable native
* interface available.
* Defer to ThreadingConfig::bPreferEmulatedWakeOnAddress = !AuBuild::kIsNtDerived
Note: UntilEqual (new experimental) variants yield until a specified pCompareAddress value.
The base variants treat pCompareAddress as the previous CAS return value.
* Defer to ThreadingConfig::bPreferEmulatedWakeOnAddress = !AuBuild::kIsNtDerived
WARNING: Windows 10+ WILL NOT HAVE EFFICIENT IMPLEMENTATIONS OF THE SPECIAL VARIANTS
Windows XP - 7 will by necessity; Linux will by default; Windows 10, on the other hand, is biased towards a thinner eNotEqual wrapper.
WARNING: ThreadingConfig::bPreferEmulatedWakeOnAddress == FALSE WILL IMPACT THE SPECIAL VARIANTS PERFORMANCE
***/
#pragma once
namespace Aurora::Threading
{
AUE_DEFINE(EWaitMethod, (
eNotEqual, eEqual, eLessThanCompare, eGreaterThanCompare, eLessThanOrEqualsCompare, eGreaterThanOrEqualsCompare
))
AUKN_SYM void WakeAllOnAddress(const void *pTargetAddress);
AUKN_SYM void WakeOnAddress(const void *pTargetAddress);
@ -55,9 +62,10 @@ namespace Aurora::Threading
const void *pCompareAddress,
AuUInt8 uWordSize);
AUKN_SYM bool TryWaitOnAddressUntilEqual(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize);
AUKN_SYM bool TryWaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize);
// On systems with processors of shared execution pipelines, these try-series of operations will spin (eg: mm_pause) for a configurable
// amount of time, so long as the the process-wide state isn't overly contested. This means you can use these arbitrarily without
@ -70,10 +78,11 @@ namespace Aurora::Threading
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check);
AUKN_SYM bool TryWaitOnAddressUntilEqualEx(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check);
AUKN_SYM bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check);
// Relative timeout variant of nanosecond resolution WoA. 0 = indefinite
AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
@ -83,11 +92,12 @@ namespace Aurora::Threading
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
// Relative timeout variant of nanosecond resolution WoA. 0 = indefinite
AUKN_SYM bool WaitOnAddressUntilEqual(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
AUKN_SYM bool WaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
// Absolute timeout variant of nanosecond resolution WoA. Nanoseconds are in steady clock time. 0 = indefinite
AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
@ -97,9 +107,10 @@ namespace Aurora::Threading
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
// Absolute timeout variant of nanosecond resolution WoA. Nanoseconds are in steady clock time. 0 = indefinite
AUKN_SYM bool WaitOnAddressUntilEqualSteady(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
AUKN_SYM bool WaitOnAddressSpecialSteady(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
}

View File

@ -410,4 +410,9 @@ namespace Aurora
{
futex_wake((AuUInt32 *)pAddress, INT_MAX);
}
void SysWakeOneOnAddress(const void *pAddress)
{
futex_wake((AuUInt32 *)pAddress, 1);
}
}

View File

@ -274,6 +274,7 @@ namespace Aurora
ADD_GET_PROC(Kernel32, QueryPerformanceFrequency)
ADD_GET_PROC(Kernel32, RemoveDllDirectory)
ADD_GET_PROC(Kernel32, AddDllDirectory)
ADD_GET_PROC(Kernel32, SetProcessInformation)
ADD_GET_PROC_BI2(Kernel32, PSAPILegacy, K32GetProcessMemoryInfo, GetProcessMemoryInfo)
@ -513,6 +514,44 @@ namespace Aurora
}
}
if (pSetProcessInformation &&
AuSwInfo::IsWindows10OrGreater())
{
static AuInitOnceSmall gInitOnce;
// Imagine paying any amount of money for a computer above Ivan's ewaste shitbox standards, just for Microsoft to tell you your non-portable platform isn't EcOnOmIcaLLY scheduling tasks EffiCiENT enough.
// Unless otherwise stated, piss off.
// Why would any platform make us opt into being able to use our own hardware to its' full potential? This is stupid.
// Microshit cant even write a memory management subsystem that provides free pages during phases of low resource consumption; and now, we have to trust them to give us high priority scheduling?
// (low resource consumption -> read: RAM [ (UNUSED HUGE FILE) (UNUSED HUGE FILE) (UNUSED HUGE FILE) (UNUSED HUGE FILE) (UNUSED HUGE FILE) (HUGE FILE) (FREE SPACE) (FREE SPACE) (YOU) ],
// SWAP: [ (UNUSED HUGE FILE) (UNUSED HUGE FILE) (UNUSED HUGE FILE) (UNUSED HUGE FILE) (UNUSED HUGE FILE) (HUGE FILE) (HEAP BACKUP LOL) (SOME EXTRA SWAP SPACE YOU WONT BE ABLE TO USE) ] )
// I'll let you know when we devolve into writing worthless chrome_helper.exe (42) processes allegedly doing some sandboxing of perceived value. Until then, let's assume our processes aren't *literal* retards staring into the abyss all day.
//
// Disable via: gRuntimeConfig.threadingConfig.bEnableAggressiveScheduling (def: true)
if (AuThreading::InitOnceLocker::TryLock(&gInitOnce))
{
PROCESS_POWER_THROTTLING_STATE powerThrottling {};
powerThrottling.Version = PROCESS_POWER_THROTTLING_CURRENT_VERSION;
powerThrottling.ControlMask = PROCESS_POWER_THROTTLING_EXECUTION_SPEED;
powerThrottling.StateMask = 0;
pSetProcessInformation(GetCurrentProcess(),
ProcessPowerThrottling,
&powerThrottling,
sizeof(powerThrottling));
powerThrottling.ControlMask = PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION;
powerThrottling.StateMask = 0;
pSetProcessInformation(GetCurrentProcess(),
ProcessPowerThrottling,
&powerThrottling,
sizeof(powerThrottling));
AuThreading::InitOnceLocker::Finish(&gInitOnce);
} /* else no-wait. intentionally nop*/
}
if (pZwSetTimerResolution)
{
auto uRet = pZwSetTimerResolution(1, true, &ullActualResolution);
@ -529,8 +568,6 @@ namespace Aurora
}
}
}
// ...SetProcessInformation?
}
void Win32Terminate()
@ -689,20 +726,6 @@ namespace Aurora
return bool(pWaitOnAddress) || bool(pRtlWaitOnAddress);
}
bool SysWaitOnAddressNoTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
if (pRtlWaitOnAddress)
{
return pRtlWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, nullptr);
}
else
{
return pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, INFINITE);
}
}
bool SysWaitOnAddressTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
@ -715,44 +738,6 @@ namespace Aurora
SysUnreachable();
}
void SysWakeNOnAddress(const void *pAddress,
AuUInt32 dwCount)
{
if (pRtlWakeAddressSingle)
{
if (dwCount < 6)
{
for (AuUInt i = 0; i < dwCount; i++)
{
pRtlWakeAddressSingle((void *)pAddress);
}
}
else
{
pRtlWakeByAddressAll((void *)pAddress);
}
}
else
{
for (AuUInt i = 0; i < dwCount; i++)
{
pWakeByAddressSingle((void *)pAddress);
}
}
}
void SysWakeAllOnAddress(const void *pAddress)
{
if (pRtlWakeByAddressAll)
{
pRtlWakeByAddressAll((void *)pAddress);
}
else
{
pWakeByAddressAll((void *)pAddress);
}
}
AuUInt64 SysGetFileLength(AuUInt uOSHandle)
{
LARGE_INTEGER length;

View File

@ -28,6 +28,7 @@ enum _SE_OBJECT_TYPE;
enum _MINIDUMP_TYPE;
enum _OBJECT_WAIT_TYPE;
enum _SE_OBJECT_TYPE;
enum _PROCESS_INFORMATION_CLASS;
//#if defined(AURORA_COMPILER_MSVC)
struct _IP_ADAPTER_ADDRESSES_LH;
@ -302,6 +303,13 @@ namespace Aurora
LPWSTR lpBuffer
);
inline BOOL(__stdcall *pSetProcessInformation)(
HANDLE hProcess,
_PROCESS_INFORMATION_CLASS ProcessInformationClass,
LPVOID ProcessInformation,
DWORD ProcessInformationSize
);
inline BOOL(__stdcall *pPrefetchVirtualMemory)(
HANDLE hProcess,
ULONG_PTR NumberOfEntries,
@ -1219,4 +1227,68 @@ namespace Aurora
HMODULE hModule,
LPCSTR lpProcName
);
static auline bool SysWaitOnAddressNoTimed(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
if (pRtlWaitOnAddress)
{
return pRtlWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, nullptr);
}
else
{
return pWaitOnAddress((void *)pTargetAddress, (void *)pCompareAddress, uWordSize, INFINITE);
}
}
static auline void SysWakeNOnAddress(const void *pAddress,
AuUInt32 dwCount)
{
if (pRtlWakeAddressSingle)
{
if (dwCount < 6)
{
for (AuUInt i = 0; i < dwCount; i++)
{
pRtlWakeAddressSingle((void *)pAddress);
}
}
else
{
pRtlWakeByAddressAll((void *)pAddress);
}
}
else
{
for (AuUInt i = 0; i < dwCount; i++)
{
pWakeByAddressSingle((void *)pAddress);
}
}
}
static auline void SysWakeAllOnAddress(const void *pAddress)
{
if (pRtlWakeByAddressAll)
{
pRtlWakeByAddressAll((void *)pAddress);
}
else
{
pWakeByAddressAll((void *)pAddress);
}
}
static auline void SysWakeOneOnAddress(const void *pAddress)
{
if (pRtlWakeAddressSingle)
{
pRtlWakeAddressSingle((void *)pAddress);
}
else
{
pWakeByAddressSingle((void *)pAddress);
}
}
}

View File

@ -42,6 +42,8 @@ namespace Aurora
void SysWakeNOnAddress(const void *pAddress,
AuUInt32 dwCount);
void SysWakeOneOnAddress(const void *pAddress);
void SysWakeAllOnAddress(const void *pAddress);
AuUInt64 SysGetFileLength(AuUInt uOSHandle);

View File

@ -5,12 +5,17 @@
Date: 2023-3-10
Author: Reece
***/
#if defined(AURORA_COMPILER_MSVC)
#pragma strict_gs_check(off)
#pragma check_stack(off)
#endif
#include <Source/RuntimeInternal.hpp>
#include "AuWakeOnAddress.hpp"
#include "Primitives/SMTYield.hpp"
#include <Time/Time.hpp>
#define HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD
// WOA_ALWAYS_DUMB_OS_TARGET -> iOS, notarized MacOS, Win9x, Xbox 360, etc
@ -22,6 +27,29 @@ namespace Aurora::Threading
static thread_local WaitEntry tlsWaitEntry;
#endif
#define DO_OF_METHOD_TYPE(preface, DoOfMethodType, ...) \
switch (eMethod) \
{ \
case EWaitMethod::eNotEqual: \
preface DoOfMethodType<EWaitMethod::eNotEqual>(__VA_ARGS__); \
break; \
case EWaitMethod::eEqual: \
preface DoOfMethodType<EWaitMethod::eEqual>(__VA_ARGS__); \
break; \
case EWaitMethod::eGreaterThanCompare: \
preface DoOfMethodType<EWaitMethod::eGreaterThanCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eGreaterThanOrEqualsCompare: \
preface DoOfMethodType<EWaitMethod::eGreaterThanOrEqualsCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eLessThanCompare: \
preface DoOfMethodType<EWaitMethod::eLessThanCompare>(__VA_ARGS__); \
break; \
case EWaitMethod::eLessThanOrEqualsCompare: \
preface DoOfMethodType<EWaitMethod::eLessThanOrEqualsCompare>(__VA_ARGS__); \
break; \
}
static ProcessWaitContainer gProcessWaitables;
static int gShouldSpinOnlyInCPU = 1; // TODO: havent decided
@ -89,6 +117,7 @@ namespace Aurora::Threading
#endif
}
template <EWaitMethod eMethod>
bool WaitEntry::SleepOn(WaitState &state)
{
#if !defined(WOA_SEMAPHORE_MODE)
@ -97,7 +126,7 @@ namespace Aurora::Threading
if (state.qwNanosecondsAbs)
{
if (!WaitBuffer::Compare(this->pAddress, this->uSize, state))
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
@ -107,7 +136,7 @@ namespace Aurora::Threading
while (uNow < uEndTime)
{
if (!WaitBuffer::Compare(this->pAddress, this->uSize, state))
if (!WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
return true;
}
@ -121,7 +150,7 @@ namespace Aurora::Threading
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Unlock();
#endif
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2);
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2, eMethod);
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Lock();
#endif
@ -139,18 +168,18 @@ namespace Aurora::Threading
uNow = AuTime::SteadyClockNS();
}
return !WaitBuffer::Compare(this->pAddress, this->uSize, state);
return !WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask);
}
else
{
while (WaitBuffer::Compare(this->pAddress, this->uSize, state))
while (WaitBuffer::Compare2<eMethod, true>(this->pAddress, this->uSize, state.compare.buffer, state.uDownsizeMask))
{
if (!this->bAlive)
{
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Unlock();
#endif
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2);
(void)gProcessWaitables.WaitBufferFrom(this->pAddress, this->uSize, false, state.pCompare2, eMethod);
#if !defined(WOA_SEMAPHORE_MODE)
this->mutex.Lock();
#endif
@ -180,7 +209,7 @@ namespace Aurora::Threading
if (this->pCompareAddress)
{
if (!WaitBuffer::Compare(pAddress, this->uSize, this->pCompareAddress))
if (WaitBuffer::Compare(pAddress, this->uSize, this->pCompareAddress, kMax64, this->eWaitMethod))
{
return false;
}
@ -202,76 +231,264 @@ namespace Aurora::Threading
return AuMove(wait);
}
bool WaitBuffer::Compare(const void *pBuf, AuUInt8 uSize, WaitState &state)
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, WaitState &state)
{
auto eMethod = state.eWaitMethod;
return WaitBuffer::Compare(pHotAddress, uSize, state.compare.buffer, state.uDownsizeMask, eMethod);
}
bool WaitBuffer::Compare(const void *pHotAddress, AuUInt8 uSize, const void *pCompare, AuUInt64 uMask, EWaitMethod eMethod)
{
bool bRet {};
FlushWaitBufferPAddressCache();
if (!state.uDownsizeMask)
#if 0
switch (eMethod)
{
bRet = AuMemcmp(pBuf, state.compare.buffer, AuMin(uSize, state.compare.uSize)) == 0;
}
else
case EWaitMethod::eEqual:
case EWaitMethod::eNotEqual:
{
auto uMask = state.uDownsizeMask.value();
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(pBuf);
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(state.compare.buffer);
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(pHotAddress);
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(pCompare);
bRet = (uSrcWord & uMask) == (uCmpWord & uMask);
}
bRet ^= bool(state.pCompare2);
return bRet;
}
bool WaitBuffer::Compare(const void *pBuf, AuUInt8 uSize, const void *pBuf2)
{
FlushWaitBufferPAddressCache();
switch (uSize)
{
case 1:
return AuReadU8(pBuf, 0) == AuReadU8(pBuf2, 0);
case 2:
return AuReadU16(pBuf, 0) == AuReadU16(pBuf2, 0);
case 4:
return AuReadU32(pBuf, 0) == AuReadU32(pBuf2, 0);
case 8:
return AuReadU64(pBuf, 0) == AuReadU64(pBuf2, 0);
bRet ^= bool(eMethod == EWaitMethod::eEqual);
break;
};
default:
return AuMemcmp(pBuf, pBuf2, uSize) == 0;
}
}
bool WaitBuffer::Compare(const void *pBuf)
{
return WaitBuffer::Compare(this->buffer, this->uSize, pBuf);
}
bool WaitBuffer::Compare(WaitState &state)
{
bool bRet {};
if (!state.uDownsizeMask)
{
bRet = WaitBuffer::Compare(this->buffer, AuMin(this->uSize, state.compare.uSize), state.compare.buffer);
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
}
else
{
auto uMask = state.uDownsizeMask.value();
auto &uSrcWord = *AuReinterpretCast<const AuUInt32 *>(this->buffer);
auto &uCmpWord = *AuReinterpretCast<const AuUInt32 *>(state.compare.buffer);
bRet = (uSrcWord & uMask) == (uCmpWord & uMask);
}
#else
DO_OF_METHOD_TYPE(return, Compare2, pHotAddress, uSize, pCompare)
#endif
bRet ^= bool(state.pCompare2);
return bRet;
}
WaitEntry *ProcessWaitNodeContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress)
template <EWaitMethod eMethod, bool bFast>
bool WaitBuffer::Compare2(const void *pHot, AuUInt8 uSize, const void *pBuf2, AuUInt64 uMask)
{
FlushWaitBufferPAddressCache();
if constexpr (!bFast)
{
if constexpr (eMethod == EWaitMethod::eNotEqual)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask);
case 2:
return (AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask);
case 4:
return (AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask);
case 8:
return (AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask);
default:
return (AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eEqual)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) == (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) == (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) == (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) == (AuReadU64(pBuf2, 0) & uMask));
default:
return !(AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) > (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) > (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) > (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) > (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) >= (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) >= (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) >= (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) >= (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) < (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) < (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) < (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) < (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0) & uMask) <= (AuReadU8(pBuf2, 0) & uMask));
case 2:
return !((AuReadU16(pHot, 0) & uMask) <= (AuReadU16(pBuf2, 0) & uMask));
case 4:
return !((AuReadU32(pHot, 0) & uMask) <= (AuReadU32(pBuf2, 0) & uMask));
case 8:
return !((AuReadU64(pHot, 0) & uMask) <= (AuReadU64(pBuf2, 0) & uMask));
default:
return false;
}
}
}
else
{
if constexpr (eMethod == EWaitMethod::eNotEqual)
{
switch (uSize)
{
case 1:
return (AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0));
case 2:
return (AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0));
case 4:
return (AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0));
case 8:
return (AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0));
default:
return (AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eEqual)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) == (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) == (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) == (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) == (AuReadU64(pBuf2, 0)));
default:
return !(AuMemcmp(pHot, pBuf2, uSize) == 0);
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) > (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) > (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) > (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) > (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eGreaterThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) >= (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) >= (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) >= (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) >= (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) < (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) < (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) < (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) < (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
if constexpr (eMethod == EWaitMethod::eLessThanOrEqualsCompare)
{
switch (uSize)
{
case 1:
return !((AuReadU8(pHot, 0)) <= (AuReadU8(pBuf2, 0)));
case 2:
return !((AuReadU16(pHot, 0)) <= (AuReadU16(pBuf2, 0)));
case 4:
return !((AuReadU32(pHot, 0)) <= (AuReadU32(pBuf2, 0)));
case 8:
return !((AuReadU64(pHot, 0)) <= (AuReadU64(pBuf2, 0)));
default:
return false;
}
}
}
return false;
}
WaitEntry *ProcessWaitNodeContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress, EWaitMethod eWaitMethod)
{
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
auto pReturn = tlsWaitEntry.get();
@ -282,6 +499,7 @@ namespace Aurora::Threading
pReturn->pAddress = pAddress;
pReturn->uSize = uSize;
pReturn->pCompareAddress = pCompareAddress;
pReturn->eWaitMethod = eWaitMethod;
if (bScheduleFirst /*First in, First Out*/)
{
@ -434,9 +652,9 @@ namespace Aurora::Threading
#define AddressToIndex AuHashCode(pAddress) & (AuArraySize(this->list) - 1)
WaitEntry *ProcessWaitContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress)
WaitEntry *ProcessWaitContainer::WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pCompareAddress, EWaitMethod eWaitMethod)
{
return this->list[AddressToIndex].WaitBufferFrom(pAddress, uSize, bScheduleFirst, pCompareAddress);
return this->list[AddressToIndex].WaitBufferFrom(pAddress, uSize, bScheduleFirst, pCompareAddress, eWaitMethod);
}
template <typename T>
@ -462,7 +680,7 @@ namespace Aurora::Threading
#endif
}
AUKN_SYM bool IsWaitOnRecommended()
WOAFASTPUB bool IsWaitOnRecommended()
{
#if defined(WOA_ALWAYS_DUMB_OS_TARGET)
return false;
@ -496,20 +714,20 @@ namespace Aurora::Threading
return kArray;
}
template <EWaitMethod T>
bool WaitOnAddressWide(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuOptional<AuUInt64> qwNanoseconds,
AuOptional<AuUInt64> qwNanosecondsAbs,
bool bOSSupportsWait,
const void *pCompareAddress2
)
const void *pCompareAddress2)
{
WaitState state;
SysAssertDbg(uWordSize <= 32);
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize, true, pCompareAddress2);
auto pWaitEntry = gProcessWaitables.WaitBufferFrom(pTargetAddress, uWordSize, true, pCompareAddress2, T);
// Unlocked update to a safer comparison address; hardens against bad code
{
@ -532,7 +750,7 @@ namespace Aurora::Threading
auto pTempHoldMe = tlsWaitEntry;
#endif
auto bResult = pWaitEntry->SleepOn(state);
auto bResult = pWaitEntry->SleepOn<T>(state);
#if defined(HACK_NO_INVALID_ACCESS_LEAK_SHARED_REF_ON_DESTROYED_THREAD)
pTempHoldMe.reset();
@ -546,11 +764,11 @@ namespace Aurora::Threading
return bResult;
}
AuTuple<const void *, AuUInt8, AuOptionalEx<AuUInt32>> DecodeAddress(const void *pAddress,
AuUInt32 uWordSize)
AuTuple<const void *, AuUInt8, AuUInt64> DecodeAddress(const void *pAddress,
AuUInt32 uWordSize)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
return AuMakeTuple(pAddress, 0, AuOptionalEx<AuUInt32> {});
return AuMakeTuple(pAddress, 0, kMax64);
#endif
auto pRounded = AuPageRound(AuUInt(pAddress), AuUInt(4));
@ -558,7 +776,7 @@ namespace Aurora::Threading
if (uWordSize == 8)
{
return AuMakeTuple((const void *)pRounded, uDelta, 0xFFFFFFFF);
return AuMakeTuple((const void *)pRounded, uDelta, kMax64);
}
AuUInt32 uSizeMask = (1ull << (uWordSize * 8)) - 1ull;
@ -590,8 +808,6 @@ namespace Aurora::Threading
if (pRtlWaitOnAddress)
{
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
AuUInt64 uNow {};
while (uAbsTimeSteadyClock ?
(uAbsTimeSteadyClock > (uNow = AuTime::SteadyClockNS())) :
@ -607,7 +823,7 @@ namespace Aurora::Threading
{
if (uAbsTimeSteadyClock <= uNow)
{
return !expect.Compare(pTargetAddress);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}
word.QuadPart = -(AuInt64(uAbsTimeSteadyClock - uNow) / 100ull);
@ -618,10 +834,9 @@ namespace Aurora::Threading
}
}
if (expect.Compare(pTargetAddress))
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
pRtlWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize, &word);
if (!expect.Compare(pTargetAddress))
if (pRtlWaitOnAddress(pTargetAddress, pCompareAddress, uWordSize, &word))
{
return true;
}
@ -649,7 +864,7 @@ namespace Aurora::Threading
if (iDelta <= 0)
{
return !WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}
uRelativeNanoseconds = iDelta;
@ -662,9 +877,6 @@ namespace Aurora::Threading
auto uMS = AuNSToMS<AuUInt32>(uRelativeNanoseconds);
if (!uMS)
{
// take a copy
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
// first: cpu spin to avoid the kernel all together
if (!bSpun)
{
@ -678,7 +890,7 @@ namespace Aurora::Threading
unsigned uLimit {};
do
{
if (!expect.Compare(pTargetAddress))
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
break;
}
@ -704,7 +916,7 @@ namespace Aurora::Threading
auto expect = WaitBuffer::From(pCompareAddress, uWordSize);
// never trust the error value/status provided by wait addresses - instead, do a quick compare
if (!expect.Compare(pTargetAddress))
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
// best case: we woke up during the ms-res waitonaddress
return true;
@ -719,7 +931,7 @@ namespace Aurora::Threading
if (Primitives::DoTryIf([&]()
{
return !expect.Compare(pTargetAddress);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
}))
{
// hit it within the span of 1 << SpinLoopPowerA SMT stalls
@ -747,7 +959,7 @@ namespace Aurora::Threading
}
}
return !WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress);
#else
@ -766,11 +978,11 @@ namespace Aurora::Threading
const void *pCompareAddress,
WaitState &state)
{
while (WaitBuffer::Compare(pTargetAddress, state.uWordSize, state))
while (WaitBuffer::Compare2<EWaitMethod::eNotEqual, AuBuild::kIsNTDerived>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
if (!SysWaitOnAddressNoTimed(pTargetAddress, pCompareAddress, state.uWordSize))
{
AuThreading::ContextYield();
//AuThreading::ContextYield();
}
}
}
@ -780,15 +992,20 @@ namespace Aurora::Threading
WaitState &state,
bool bSpun = false)
{
if (!WaitBuffer::Compare(pTargetAddress, state.uWordSize, state))
#if 1
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, AuBuild::kIsNTDerived>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return true;
}
(void)RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
return !WaitBuffer::Compare(pTargetAddress, state.uWordSize, state);
return !WaitBuffer::Compare2<EWaitMethod::eNotEqual, AuBuild::kIsNTDerived>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask);
#else
return RunOSWaitOnAddressTimed(pTargetAddress, pCompareAddress, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
#endif
}
template <EWaitMethod T>
static void RunOSWaitOnAddressEQNoTimedNoErrors(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state)
@ -797,16 +1014,16 @@ namespace Aurora::Threading
{
WaitBuffer wb = WaitBuffer::From(pTargetAddress, state.uWordSize);
if (!wb.Compare(state))
if (!WaitBuffer::Compare2<T>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return;
}
(void)SysWaitOnAddressNoTimed(pTargetAddress, wb.buffer, state.uWordSize);
if (WaitBuffer::Compare(pTargetAddress, state.uWordSize, state))
if (WaitBuffer::Compare2<T>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
SysWakeNOnAddress(pTargetAddress, 1);
SysWakeOneOnAddress(pTargetAddress);
}
else
{
@ -815,6 +1032,7 @@ namespace Aurora::Threading
}
}
template <EWaitMethod T>
static bool RunOSWaitOnAddressEQTimedSteady(const void *pTargetAddress,
const void *pCompareAddress,
WaitState &state,
@ -824,16 +1042,16 @@ namespace Aurora::Threading
{
WaitBuffer wb = WaitBuffer::From(pTargetAddress, state.uWordSize);
if (!wb.Compare(state))
if (!WaitBuffer::Compare2<T, AuBuild::kIsNtDerived>(wb.buffer, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
return true;
}
bool bResult = RunOSWaitOnAddressTimed(pTargetAddress, wb.buffer, state.uWordSize, state.qwNanosecondsAbs.value(), { }, { }, bSpun);
if (WaitBuffer::Compare(pTargetAddress, state.uWordSize, state))
if (WaitBuffer::Compare2<T, AuBuild::kIsNtDerived>(pTargetAddress, state.uWordSize, pCompareAddress, state.uDownsizeMask))
{
SysWakeNOnAddress(pTargetAddress, 1);
SysWakeOneOnAddress(pTargetAddress);
if (!bResult)
{
return false;
@ -860,10 +1078,10 @@ namespace Aurora::Threading
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask ?
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask ? 4 : uWordSize;
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
if (!qwNanosecondsAbs)
{
@ -921,14 +1139,14 @@ namespace Aurora::Threading
#endif
}
AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
WOAFASTPUB bool WaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress))
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
{
return true;
}
@ -940,49 +1158,59 @@ namespace Aurora::Threading
optAlreadySpun);
}
AUKN_SYM bool WaitOnAddressUntilEqual(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
WOAFASTPUB bool WaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid SteadyTime syscall in the event of HAL retardation (missing KUSER QPC, Linux vDSO, etc)
if (WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress))
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
{
return true;
}
return WaitOnAddressUntilEqualSteady(pTargetAddress,
pCompareAddress,
uWordSize,
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
optAlreadySpun);
return WaitOnAddressSpecialSteady(eMethod,
pTargetAddress,
pCompareAddress,
uWordSize,
qwNanoseconds ? qwNanoseconds + AuTime::SteadyClockNS() : 0,
optAlreadySpun);
}
AUKN_SYM bool TryWaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
template <EWaitMethod T>
auline bool TryWaitOnAddressSpecialTmpl(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
return Primitives::DoTryIf([&]()
{
return !WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress);
return !WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress);
});
}
AUKN_SYM bool TryWaitOnAddressUntilEqual(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
return Primitives::DoTryIf([&]()
{
return WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress);
});
}
AUKN_SYM bool TryWaitOnAddressEx(const void *pTargetAddress,
WOAFASTPUB bool TryWaitOnAddress(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
AuUInt8 uWordSize)
{
return TryWaitOnAddressSpecialTmpl<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize);
}
WOAFASTPUB bool TryWaitOnAddressSpecial(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize)
{
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialTmpl, pTargetAddress, pCompareAddress, uWordSize);
return false;
}
WOAFASTPUB bool TryWaitOnAddressEx(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
if (!check)
{
@ -991,7 +1219,7 @@ namespace Aurora::Threading
return Primitives::DoTryIf([&]()
{
if (WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress))
if (WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress))
{
return false;
}
@ -1000,29 +1228,40 @@ namespace Aurora::Threading
});
}
AUKN_SYM bool TryWaitOnAddressUntilEqualEx(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
template <EWaitMethod T>
bool TryWaitOnAddressSpecialExTmpl(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
return Primitives::DoTryIf([&]()
{
if (!WaitBuffer::Compare2<T, true>(pTargetAddress, uWordSize, pCompareAddress))
{
return false;
}
return check(pTargetAddress, pCompareAddress, uWordSize);
});
}
WOAFASTPUB bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
const AuFunction<bool(const void *, const void *, AuUInt8)> &check)
{
if (!check)
{
return TryWaitOnAddressUntilEqual(pTargetAddress, pCompareAddress, uWordSize);
return TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize);
}
return Primitives::DoTryIf([&]()
{
if (!WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress))
{
return false;
}
return check(pTargetAddress, pCompareAddress, uWordSize);
});
DO_OF_METHOD_TYPE(return, TryWaitOnAddressSpecialExTmpl, pTargetAddress, pCompareAddress, uWordSize, check);
return false;
}
AUKN_SYM void WakeNOnAddress(const void *pTargetAddress,
AuUInt8 uNMaximumThreads)
WOAFASTPUB void WakeNOnAddress(const void *pTargetAddress,
AuUInt8 uNMaximumThreads)
{
if (IsWaitOnRecommended())
{
@ -1058,12 +1297,12 @@ namespace Aurora::Threading
}
}
AUKN_SYM void WakeOnAddress(const void *pTargetAddress)
WOAFASTPUB void WakeOnAddress(const void *pTargetAddress)
{
WakeNOnAddress(pTargetAddress, 1);
}
AUKN_SYM void WakeAllOnAddress(const void *pTargetAddress)
WOAFASTPUB void WakeAllOnAddress(const void *pTargetAddress)
{
if (IsWaitOnRecommended())
{
@ -1079,16 +1318,16 @@ namespace Aurora::Threading
}
}
AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
WOAFASTPUB bool WaitOnAddressSteady(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid emulated path dynamic TLS fetch without TLS section
// or various security checks
// or other such bloated thunks
if (!WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress))
if (!WaitBuffer::Compare2<EWaitMethod::eNotEqual, true>(pTargetAddress, uWordSize, pCompareAddress, kMax64))
{
return true;
}
@ -1101,10 +1340,10 @@ namespace Aurora::Threading
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask ?
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask ? 4 : uWordSize;
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
bool bSpun {};
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
@ -1114,9 +1353,10 @@ namespace Aurora::Threading
{
return true;
}
bSpun = true;
}
if (!qwNanoseconds)
{
@ -1140,22 +1380,23 @@ namespace Aurora::Threading
}
}
return WaitOnAddressWide(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64>{}, false, nullptr);
return WaitOnAddressWide<EWaitMethod::eNotEqual>(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64>{}, false, nullptr);
}
return false;
}
AUKN_SYM bool WaitOnAddressUntilEqualSteady(const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
WOAFASTPUB bool WaitOnAddressSpecialSteady(EWaitMethod eMethod,
const void *pTargetAddress,
const void *pCompareAddress,
AuUInt8 uWordSize,
AuUInt64 qwNanoseconds,
AuOptional<bool> optAlreadySpun)
{
// Avoid emulated path dynamic TLS fetch without TLS section
// or various security checks
// or other such bloated thunks
if (WaitBuffer::Compare(pCompareAddress, uWordSize, pTargetAddress))
if (!WaitBuffer::Compare(pTargetAddress, uWordSize, pCompareAddress, kMax64, eMethod))
{
return true;
}
@ -1168,17 +1409,18 @@ namespace Aurora::Threading
WaitState state;
state.uDownsizeMask = uMask;
state.compare = uMask ?
state.compare = uMask != kMax64 ?
WaitBuffer::From(pCompareAddress2, 4) :
WaitBuffer::From(pCompareAddress2, uWordSize);
state.uWordSize = uMask ? 4 : uWordSize;
state.uWordSize = uMask != kMax64 ? 4 : uWordSize;
state.pCompare2 = pCompareAddress;
state.eWaitMethod = eMethod;
bool bSpun {};
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpinNative &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddressUntilEqual(pTargetAddress, pCompareAddress, uWordSize))
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
@ -1188,13 +1430,13 @@ namespace Aurora::Threading
if (!qwNanoseconds)
{
RunOSWaitOnAddressEQNoTimedNoErrors(pWaitAddress, pCompareAddress2, state);
DO_OF_METHOD_TYPE(, RunOSWaitOnAddressEQNoTimedNoErrors, pWaitAddress, pCompareAddress2, state);
return true;
}
else
{
state.qwNanosecondsAbs = qwNanoseconds;
return RunOSWaitOnAddressEQTimedSteady(pWaitAddress, pCompareAddress2, state, bSpun);
DO_OF_METHOD_TYPE(return, RunOSWaitOnAddressEQTimedSteady, pWaitAddress, pCompareAddress2, state, bSpun);
}
}
else
@ -1202,13 +1444,12 @@ namespace Aurora::Threading
if (Primitives::ThrdCfg::gPreferWaitOnAddressAlwaysSpin &&
optAlreadySpun.value_or(false))
{
if (TryWaitOnAddressUntilEqual(pTargetAddress, pCompareAddress, uWordSize))
if (TryWaitOnAddressSpecial(eMethod, pTargetAddress, pCompareAddress, uWordSize))
{
return true;
}
}
return WaitOnAddressWide(pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64>{}, false, pCompareAddress);
DO_OF_METHOD_TYPE(return, WaitOnAddressWide, pTargetAddress, pCompareAddress, uWordSize, {}, qwNanoseconds ? qwNanoseconds : AuOptional<AuUInt64> {}, false, pCompareAddress);
}
return false;

View File

@ -13,9 +13,18 @@
#include "Primitives/AuConditionVariable.Generic.hpp"
#include "Primitives/AuSemaphore.Generic.hpp"
#if defined(AURORA_COMPILER_MSVC)
#define WOAFAST __declspec(safebuffers) auline
#define WOAFASTPUB AUKN_SYM __declspec(safebuffers) auline
#else
#define WOAFAST auline
#define WOAFASTPUB AUKN_SYM
#endif
namespace Aurora::Threading
{
static const auto kDefaultWaitPerProcess = 128;
static const auto kMax64 = 0xFFFFFFFFFFFFFFFFull;
struct WaitState;
@ -24,12 +33,14 @@ namespace Aurora::Threading
char buffer[32];
AuUInt8 uSize;
static WaitBuffer From(const void *pBuf, AuUInt8 uSize);
static bool Compare(const void *pBuf, AuUInt8 uSize, WaitState &state);
static bool Compare(const void *pBuf, AuUInt8 uSize, const void *pBuf2);
WOAFAST static WaitBuffer From(const void *pBuf, AuUInt8 uSize);
bool Compare(const void *pBuf);
bool Compare(WaitState &state);
WOAFAST static bool Compare(const void *pHotAddress, AuUInt8 uSize, WaitState &state);
WOAFAST static bool Compare(const void *pHotAddress, AuUInt8 uSize, const void *pCompare, AuUInt64 uMask, EWaitMethod eMethod);
// returns false when valid
template <EWaitMethod eMethod, bool bFast = false>
WOAFAST static bool Compare2(const void *pHotAddress, AuUInt8 uSize, const void *pReference, AuUInt64 uMask = 0xFFFFFFFFFFFFFFFF);
};
struct WaitState
@ -37,9 +48,10 @@ namespace Aurora::Threading
WaitBuffer compare;
//AuOptionalEx<AuUInt64> qwNanoseconds;
AuOptionalEx<AuUInt64> qwNanosecondsAbs;
AuOptionalEx<AuUInt32> uDownsizeMask;
AuUInt64 uDownsizeMask { 0xFFFFFFFFFFFFFFFF };
AuUInt32 uWordSize {};
const void *pCompare2 {};
EWaitMethod eWaitMethod { EWaitMethod::eNotEqual };
};
struct WaitEntry
@ -80,12 +92,14 @@ namespace Aurora::Threading
const void *pAddress {};
AuUInt8 uSize {};
const void *pCompareAddress {};
EWaitMethod eWaitMethod { EWaitMethod::eNotEqual };
// bookkeeping (parent container)
volatile bool bAlive {}; // wait entry validity. must be rechecked for each spurious or expected wake, if the comparison doesn't break the yield loop.
// if false, and we're still yielding under pCompare == pAddress, we must reschedule with inverse order (as to steal the next signal, as opposed to waiting last)
void Release();
template <EWaitMethod eMethod>
bool SleepOn(WaitState &state);
bool TrySignalAddress(const void *pAddress);
};
@ -101,7 +115,7 @@ namespace Aurora::Threading
AuUInt32 uAtomic {};
ProcessListWait waitList;
WaitEntry *WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pAddressCompare);
WaitEntry *WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pAddressCompare, EWaitMethod eWaitMethod);
template <typename T>
bool IterateWake(T callback);
@ -118,7 +132,7 @@ namespace Aurora::Threading
{
ProcessWaitNodeContainer list[kDefaultWaitPerProcess];
WaitEntry *WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pAddressCompare);
WaitEntry *WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pAddressCompare, EWaitMethod eWaitMethod);
template <typename T>
bool IterateWake(const void *pAddress, T callback);