Jamie Reece Wilson
035d822ec1
In all other cases, the memory is either thread-local write-local or followed up by an indirect aquire/release of the processors pipeline and L1 cache by virtue of the containers dumb spinlock ::Lock, ::Unlock (...release, ...barrier) Clang doesn't have /volatile:ms anymore so we cant rely on that Assuming MSVC-like or x86 isnt good enough (and, no retard midwits, volatile is a fine keyword. take ur spec sperging and shove it. i just need to control over-optimization of defacto-weakly ordered access between explicit lockless semaphore yields)
146 lines
5.0 KiB
C++
146 lines
5.0 KiB
C++
/***
|
|
Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: AuWakeOnAddress.hpp
|
|
Date: 2023-3-10
|
|
Author: Reece
|
|
***/
|
|
#pragma once
|
|
|
|
#include "Primitives/AuWoASemaphore.hpp"
|
|
|
|
#include "Primitives/AuConditionMutex.Generic.hpp"
|
|
#include "Primitives/AuConditionVariable.Generic.hpp"
|
|
#include "Primitives/AuSemaphore.Generic.hpp"
|
|
|
|
#if defined(AURORA_COMPILER_MSVC)
|
|
#define WOAFAST __declspec(safebuffers) auline
|
|
#define WOAFASTPUB AUKN_SYM __declspec(safebuffers) auline
|
|
#else
|
|
#define WOAFAST auline
|
|
#define WOAFASTPUB AUKN_SYM
|
|
#endif
|
|
|
|
namespace Aurora::Threading
|
|
{
|
|
static const auto kDefaultWaitPerProcess = 128;
|
|
static const auto kMax64 = 0xFFFFFFFFFFFFFFFFull;
|
|
static const auto kPlatformFutexNoForcedAlignedU32 = AuBuild::kIsNTDerived;
|
|
|
|
struct WaitState;
|
|
|
|
struct WaitBuffer
|
|
{
|
|
char buffer[32];
|
|
AuUInt8 uSize;
|
|
|
|
WOAFAST static WaitBuffer From(const void *pBuf, AuUInt8 uSize);
|
|
|
|
WOAFAST static bool Compare(const void *pHotAddress, AuUInt8 uSize, WaitState &state);
|
|
WOAFAST static bool Compare(const void *pHotAddress, AuUInt8 uSize, const void *pCompare, AuUInt64 uMask, EWaitMethod eMethod);
|
|
|
|
// returns false when valid
|
|
template <EWaitMethod eMethod, bool bFast = false>
|
|
WOAFAST static bool Compare2(const void *pHotAddress, AuUInt8 uSize, const void *pReference, AuUInt64 uMask = 0xFFFFFFFFFFFFFFFF);
|
|
|
|
template <EWaitMethod eMethod, bool bFast = false>
|
|
WOAFAST static bool Compare2(const volatile void *pHotAddress, AuUInt8 uSize, const void *pReference, AuUInt64 uMask = 0xFFFFFFFFFFFFFFFF);
|
|
};
|
|
|
|
struct WaitState
|
|
{
|
|
WaitBuffer compare;
|
|
//AuOptionalEx<AuUInt64> qwNanoseconds;
|
|
AuOptionalEx<AuUInt64> qwNanosecondsAbs;
|
|
AuUInt64 uDownsizeMask { 0xFFFFFFFFFFFFFFFF };
|
|
AuUInt32 uWordSize {};
|
|
const void *pCompare2 {};
|
|
EWaitMethod eWaitMethod { EWaitMethod::eNotEqual };
|
|
};
|
|
|
|
struct WaitEntry
|
|
{
|
|
WaitEntry();
|
|
~WaitEntry();
|
|
|
|
WaitEntry * volatile pNext {};
|
|
WaitEntry * volatile pBefore {};
|
|
|
|
// synch
|
|
#if defined(WOA_SEMAPHORE_MODE)
|
|
|
|
#if !defined(WOA_SEMAPHORE_SEMAPHORE)
|
|
Primitives::Semaphore semaphore;
|
|
#else
|
|
// Recommended for XNU targets:
|
|
WOA_SEMAPHORE_SEMAPHORE semaphore;
|
|
#endif
|
|
|
|
#else
|
|
// Recommended (we can better filter spurious wakes for the cost of a barrier on signal):
|
|
// !!! we also prefer to block the containers mutex while we signal each thread individually !!!
|
|
// !!! for the sake of optimizing for windows xp - 7, its far nicer to optimize the entire signaling and wait operations under a container lock, than it is to buffer shared pointers or externally managed memory out of the lock scope !!!
|
|
// !!! also note: container spinlocks =/= WaitEntry::mutex !!
|
|
|
|
#if !defined(WOA_CONDVAR_MUTEX)
|
|
Primitives::ConditionMutexInternal mutex; // mutex ctor must come before var
|
|
Primitives::ConditionVariableInternal variable; // ...and something all 2007+ micro and monolithic kernels should have; an event or semaphore primitive on which we can form a crude condvar
|
|
#else
|
|
WOA_CONDVAR_MUTEX mutex;
|
|
WOA_CONDVAR_VARIABLE variable;
|
|
#endif
|
|
|
|
#endif
|
|
|
|
// state
|
|
const void *pAddress {};
|
|
AuUInt8 uSize {};
|
|
const void *pCompareAddress {};
|
|
EWaitMethod eWaitMethod { EWaitMethod::eNotEqual };
|
|
|
|
// bookkeeping (parent container)
|
|
volatile AuUInt8 bAlive {}; // wait entry validity. must be rechecked for each spurious or expected wake, if the comparison doesn't break the yield loop.
|
|
// if false, and we're still yielding under pCompare == pAddress, we must reschedule with inverse order (as to steal the next signal, as opposed to waiting last)
|
|
void Release();
|
|
|
|
template <EWaitMethod eMethod>
|
|
bool SleepOn(WaitState &state);
|
|
bool TrySignalAddress(const void *pAddress);
|
|
};
|
|
|
|
struct ProcessListWait
|
|
{
|
|
WaitEntry *pHead {};
|
|
WaitEntry *pTail {};
|
|
};
|
|
|
|
struct ProcessWaitNodeContainer
|
|
{
|
|
AuUInt32 uAtomic {};
|
|
ProcessListWait waitList;
|
|
|
|
WaitEntry *WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pAddressCompare, EWaitMethod eWaitMethod);
|
|
|
|
template <typename T>
|
|
bool IterateWake(T callback);
|
|
|
|
void RemoveSelf(WaitEntry *pSelf);
|
|
void RemoveEntry(WaitEntry *pSelf, bool bAllUnderLock);
|
|
|
|
void Lock();
|
|
|
|
void Unlock();
|
|
};
|
|
|
|
struct ProcessWaitContainer
|
|
{
|
|
ProcessWaitNodeContainer list[kDefaultWaitPerProcess];
|
|
|
|
WaitEntry *WaitBufferFrom(const void *pAddress, AuUInt8 uSize, bool bScheduleFirst, const void *pAddressCompare, EWaitMethod eWaitMethod);
|
|
|
|
template <typename T>
|
|
bool IterateWake(const void *pAddress, T callback);
|
|
|
|
void RemoveSelf(const void *pAddress, WaitEntry *pSelf);
|
|
};
|
|
} |