[+] By-raw pointer WOA lists
(also they are now fairer) [+] Steps towards future proofing NT (not the future proofing itself)
This commit is contained in:
parent
50413f36e5
commit
1a8acbdde5
@ -97,11 +97,13 @@ namespace Aurora::Threading
|
||||
|
||||
void WaitEntry::Release()
|
||||
{
|
||||
#if 0
|
||||
if (this->bOverflow)
|
||||
{
|
||||
gProcessWaitables.Remove(this);
|
||||
this->bOverflow = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
AuResetMember(this->uSize);
|
||||
AuResetMember(this->pAddress);
|
||||
@ -273,6 +275,7 @@ namespace Aurora::Threading
|
||||
|
||||
WaitEntry *ProcessWaitContainer::WaitBufferFrom(void *pAddress, AuUInt8 uSize)
|
||||
{
|
||||
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
|
||||
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
|
||||
{
|
||||
if (this->entries[i].TryAcquire(pAddress, uSize))
|
||||
@ -280,13 +283,27 @@ namespace Aurora::Threading
|
||||
return &this->entries[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
auto pReturn = &tlsWaitEntry;
|
||||
pReturn->bReleaseOnWake = true;
|
||||
|
||||
pReturn->pAddress = pAddress;
|
||||
pReturn->uSize = uSize;
|
||||
pReturn->uAtomic = 0;
|
||||
|
||||
{
|
||||
Lock();
|
||||
this->overflow.push_back(pReturn);
|
||||
pReturn->bOverflow = true;
|
||||
if (auto pLoadFromMemory = this->waitList.pHead)
|
||||
{
|
||||
pReturn->pNext = pLoadFromMemory;
|
||||
pLoadFromMemory->pBefore = pReturn;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->waitList.pTail = pReturn;
|
||||
}
|
||||
this->waitList.pHead = pReturn;
|
||||
Unlock();
|
||||
}
|
||||
|
||||
@ -296,6 +313,7 @@ namespace Aurora::Threading
|
||||
template <typename T>
|
||||
bool ProcessWaitContainer::IterateAll(T callback)
|
||||
{
|
||||
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
|
||||
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
|
||||
{
|
||||
auto &entry = this->entries[i];
|
||||
@ -319,17 +337,25 @@ namespace Aurora::Threading
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Lock();
|
||||
for (auto &overflow : this->overflow)
|
||||
{
|
||||
AU_LOCK_GUARD(overflow->mutex);
|
||||
if (!callback(*overflow.get()))
|
||||
Lock();
|
||||
auto pCurrentHead = this->waitList.pHead;
|
||||
while (pCurrentHead)
|
||||
{
|
||||
return false;
|
||||
AU_LOCK_GUARD(pCurrentHead->mutex);
|
||||
|
||||
if (!callback(pCurrentHead))
|
||||
{
|
||||
Unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
pCurrentHead = pCurrentHead->pNext;
|
||||
}
|
||||
Unlock();
|
||||
}
|
||||
Unlock();
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -339,6 +365,7 @@ namespace Aurora::Threading
|
||||
{
|
||||
bool bRetStatus { true };
|
||||
|
||||
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
|
||||
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
|
||||
{
|
||||
auto &entry = this->entries[i];
|
||||
@ -360,21 +387,118 @@ namespace Aurora::Threading
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Lock();
|
||||
|
||||
for (auto &overflow : this->overflow)
|
||||
{
|
||||
AU_LOCK_GUARD(overflow->mutex);
|
||||
if (!callback(*overflow))
|
||||
Lock();
|
||||
auto pCurrentHead = this->waitList.pHead;
|
||||
while (pCurrentHead)
|
||||
{
|
||||
bRetStatus = false;
|
||||
break;
|
||||
AU_LOCK_GUARD(pCurrentHead->mutex);
|
||||
|
||||
if (!callback(*pCurrentHead))
|
||||
{
|
||||
bRetStatus = false;
|
||||
break;
|
||||
}
|
||||
|
||||
pCurrentHead = pCurrentHead->pNext;
|
||||
}
|
||||
Unlock();
|
||||
}
|
||||
|
||||
|
||||
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
|
||||
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
|
||||
{
|
||||
auto &entry = this->entries[i];
|
||||
{
|
||||
entry.uAtomic = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return bRetStatus;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
bool ProcessWaitContainer::IterateWake(T callback)
|
||||
{
|
||||
bool bRetStatus { true };
|
||||
|
||||
Lock();
|
||||
{
|
||||
// FIFO
|
||||
auto pCurrentHead = this->waitList.pTail;
|
||||
decltype(pCurrentHead) pLast {};
|
||||
while (pCurrentHead)
|
||||
{
|
||||
AU_LOCK_GUARD(pCurrentHead->mutex);
|
||||
|
||||
auto [bCont, bRemove] = callback(*pCurrentHead);
|
||||
|
||||
if (bRemove)
|
||||
{
|
||||
if (pLast)
|
||||
{
|
||||
pLast->pNext = pCurrentHead->pNext;
|
||||
}
|
||||
else if (this->waitList.pHead == pCurrentHead)
|
||||
{
|
||||
this->waitList.pHead = pCurrentHead->pNext;
|
||||
}
|
||||
|
||||
if (pCurrentHead->pNext)
|
||||
{
|
||||
pCurrentHead->pNext->pBefore = pCurrentHead->pBefore;
|
||||
}
|
||||
|
||||
if (this->waitList.pTail == pCurrentHead)
|
||||
{
|
||||
this->waitList.pTail = pLast;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bCont)
|
||||
{
|
||||
bRetStatus = false;
|
||||
break;
|
||||
}
|
||||
|
||||
pLast = pCurrentHead;
|
||||
pCurrentHead = pCurrentHead->pBefore;
|
||||
}
|
||||
}
|
||||
Unlock();
|
||||
|
||||
// meh - just so i can experiment with changes
|
||||
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
|
||||
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
|
||||
{
|
||||
auto &entry = this->entries[i];
|
||||
{
|
||||
DoSpinLockOnVar(&entry.uAtomic);
|
||||
|
||||
if (entry.pAddress)
|
||||
{
|
||||
AU_LOCK_GUARD(entry.mutex);
|
||||
|
||||
auto [bCont, bRemove] = callback(*entry);
|
||||
|
||||
if (!bCont)
|
||||
{
|
||||
for (AU_ITERATE_N(z, i + 1))
|
||||
{
|
||||
this->entries[z].uAtomic = 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (AU_ITERATE_N(i, kDefaultWaitPerProcess))
|
||||
{
|
||||
auto &entry = this->entries[i];
|
||||
@ -382,6 +506,7 @@ namespace Aurora::Threading
|
||||
entry.uAtomic = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return bRetStatus;
|
||||
}
|
||||
@ -399,17 +524,37 @@ namespace Aurora::Threading
|
||||
void ProcessWaitContainer::Remove(WaitEntry *pParent)
|
||||
{
|
||||
Lock();
|
||||
for (auto itr = this->overflow.begin();
|
||||
itr != this->overflow.end();
|
||||
)
|
||||
{
|
||||
if ((*itr) == pParent)
|
||||
auto pCurrent = this->waitList.pHead;
|
||||
decltype(pCurrent) pLast {};
|
||||
while (pCurrent)
|
||||
{
|
||||
itr = this->overflow.erase(itr);
|
||||
}
|
||||
else
|
||||
{
|
||||
itr++;
|
||||
if (pCurrent == pParent)
|
||||
{
|
||||
if (pLast)
|
||||
{
|
||||
pLast->pNext = pCurrent->pNext;
|
||||
}
|
||||
else if (this->waitList.pHead == pCurrent)
|
||||
{
|
||||
this->waitList.pHead = pCurrent->pNext;
|
||||
}
|
||||
|
||||
if (pCurrent->pNext)
|
||||
{
|
||||
pCurrent->pNext->pBefore = pCurrent->pBefore;
|
||||
}
|
||||
|
||||
if (this->waitList.pTail == pParent)
|
||||
{
|
||||
this->waitList.pTail = pLast;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
pLast = pCurrent;
|
||||
pCurrent = pCurrent->pNext;
|
||||
}
|
||||
}
|
||||
Unlock();
|
||||
@ -451,7 +596,9 @@ namespace Aurora::Threading
|
||||
state.compare = WaitBuffer::From(pCompareAddress, uWordSize);
|
||||
state.qwNanoseconds = qwNanoseconds ? AuOptionalEx<AuUInt64> { qwNanoseconds.value() } : AuOptionalEx<AuUInt64> {}; // from default/zeroable optional, to boolean suffix
|
||||
auto bResult = pWaitEntry->SleepOn(state);
|
||||
#if defined(WOA_USE_DEFERRED_REL)
|
||||
pWaitEntry->Release();
|
||||
#endif
|
||||
return bResult;
|
||||
}
|
||||
|
||||
@ -747,19 +894,34 @@ namespace Aurora::Threading
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(WOA_USE_DEFERRED_REL)
|
||||
(void)gProcessWaitables.IterateForceNoCreateDuringOp([&](WaitEntry &entry) -> bool
|
||||
#else
|
||||
(void)gProcessWaitables.IterateWake([&](WaitEntry &entry) -> AuPair<bool, bool>
|
||||
#endif
|
||||
{
|
||||
if (!uNMaximumThreads)
|
||||
{
|
||||
#if defined(WOA_USE_DEFERRED_REL)
|
||||
return false;
|
||||
#else
|
||||
return AuMakePair(false, false);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool bWake {};
|
||||
if (entry.TryWakeNoLockNoReallyNoLock(pTargetAddress))
|
||||
{
|
||||
bWake = true;
|
||||
uNMaximumThreads--;
|
||||
}
|
||||
|
||||
return uNMaximumThreads != 0;
|
||||
bool bCont = uNMaximumThreads != 0;
|
||||
#if defined(WOA_USE_DEFERRED_REL)
|
||||
return bCont;
|
||||
#else
|
||||
return AuMakePair(bCont, bWake);
|
||||
#endif
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -777,11 +939,31 @@ namespace Aurora::Threading
|
||||
}
|
||||
else
|
||||
{
|
||||
(void)gProcessWaitables.IterateForceNoCreateDuringOp([=](WaitEntry &entry) -> bool
|
||||
#if defined(WOA_USE_DEFERRED_REL)
|
||||
(void)gProcessWaitables.IterateForceNoCreateDuringOp([&](WaitEntry &entry) -> bool
|
||||
#else
|
||||
(void)gProcessWaitables.IterateWake([&](WaitEntry &entry) -> AuPair<bool, bool>
|
||||
#endif
|
||||
{
|
||||
#if defined(WOA_USE_DEFERRED_REL)
|
||||
entry.TryWakeNoLockNoReallyNoLock(pTargetAddress);
|
||||
return true;
|
||||
#else
|
||||
return AuMakePair(true, entry.TryWakeNoLockNoReallyNoLock(pTargetAddress));
|
||||
#endif
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Future (Reece): AuThread aware (safe force-terminate)
|
||||
// There are three ways we can go about this:
|
||||
// Shared pointers
|
||||
// Shared pointers such that we dont need to remove the raw pointer optimization
|
||||
// Callback on thread death
|
||||
//
|
||||
// 1st would increase overhead for a case i dont want to condone
|
||||
// 2nd would work but would probably require a callback on death
|
||||
// 3rd would work.
|
||||
//
|
||||
// to be addressed later
|
||||
}
|
@ -39,6 +39,9 @@ namespace Aurora::Threading
|
||||
WaitEntry();
|
||||
~WaitEntry();
|
||||
|
||||
WaitEntry *pNext {};
|
||||
WaitEntry *pBefore {};
|
||||
|
||||
// synch
|
||||
AuUInt32 uAtomic {}; // fastpath
|
||||
Primitives::ConditionMutexImpl mutex; // mutex ctor must come before var
|
||||
@ -50,6 +53,7 @@ namespace Aurora::Threading
|
||||
|
||||
// bookkeeping (parent container)
|
||||
bool bOverflow {};
|
||||
bool bReleaseOnWake {};
|
||||
|
||||
bool TryAcquire(const void *pAddress, AuUInt8 uSize);
|
||||
void Release();
|
||||
@ -60,11 +64,19 @@ namespace Aurora::Threading
|
||||
bool TryWakeNoLockNoReallyNoLock(const void *pAddress);
|
||||
};
|
||||
|
||||
struct ProcessListWait
|
||||
{
|
||||
WaitEntry *pHead {};
|
||||
WaitEntry *pTail {};
|
||||
};
|
||||
|
||||
struct ProcessWaitContainer
|
||||
{
|
||||
AuUInt32 uAtomic {};
|
||||
#if defined(WOA_ENABLE_OLD_SHORT_LIST)
|
||||
WaitEntry entries[kDefaultWaitPerProcess];
|
||||
AuList<WaitEntry *> overflow;
|
||||
#endif
|
||||
ProcessListWait waitList;
|
||||
|
||||
WaitEntry *WaitBufferFrom(void *pAddress, AuUInt8 uSize);
|
||||
|
||||
@ -74,6 +86,9 @@ namespace Aurora::Threading
|
||||
template <typename T>
|
||||
bool IterateForceNoCreateDuringOp(T callback);
|
||||
|
||||
template <typename T>
|
||||
bool IterateWake(T callback);
|
||||
|
||||
void Lock();
|
||||
|
||||
void Unlock();
|
||||
|
@ -73,8 +73,8 @@ namespace Aurora::Threading::Primitives
|
||||
while (true)
|
||||
{
|
||||
auto uNow = this->wlist;
|
||||
auto waiting = uNow >> 2u;
|
||||
auto uNext = ((waiting + 1) << 2u) | (!bool(waiting)) | (uNow & 1);
|
||||
auto waiting = uNow >> kShiftCountByBits;
|
||||
auto uNext = ((waiting + 1) << kShiftCountByBits) | (!bool(waiting)) | (uNow & 1);
|
||||
|
||||
if (AuAtomicCompareExchange(&this->wlist, uNext, uNow) == uNow)
|
||||
{
|
||||
@ -104,7 +104,7 @@ namespace Aurora::Threading::Primitives
|
||||
#endif
|
||||
{
|
||||
auto uNow = this->wlist;
|
||||
auto uOld = (uNow >> 2u);
|
||||
auto uOld = (uNow >> kShiftCountByBits);
|
||||
|
||||
if (uOld == 0)
|
||||
{
|
||||
@ -125,7 +125,7 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
// go for an atomic decrement while racing against ::Signal and ::Broadcast
|
||||
auto waiting = uOld - 1u;
|
||||
auto uNext = waiting << 2u;
|
||||
auto uNext = waiting << kShiftCountByBits;
|
||||
|
||||
if (AuAtomicCompareExchange(&this->wlist, uNext, uNow) == uNow)
|
||||
{
|
||||
@ -158,8 +158,8 @@ namespace Aurora::Threading::Primitives
|
||||
while (true)
|
||||
{
|
||||
auto uNow = this->wlist;
|
||||
auto waiting = uNow >> 2u;
|
||||
auto uNext = ((waiting + 1) << 2u) | (!bool(waiting)) | (uNow & 1);
|
||||
auto waiting = uNow >> kShiftCountByBits;
|
||||
auto uNext = ((waiting + 1) << kShiftCountByBits) | (!bool(waiting)) | (uNow & 1);
|
||||
|
||||
if (AuAtomicCompareExchange(&this->wlist, uNext, uNow) == uNow)
|
||||
{
|
||||
@ -239,9 +239,12 @@ namespace Aurora::Threading::Primitives
|
||||
return false;
|
||||
}
|
||||
|
||||
if (uSignalNext == 0)
|
||||
if constexpr (kBoolRequiredLateSet)
|
||||
{
|
||||
InterlockedOr((volatile LONG*)&this->wlist, 1);
|
||||
if (uSignalNext == 0)
|
||||
{
|
||||
InterlockedOr((volatile LONG *)&this->wlist, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -254,7 +257,7 @@ namespace Aurora::Threading::Primitives
|
||||
#if !defined(AURORA_FORCE_SRW_LOCKS)
|
||||
auto original = this->wlist;
|
||||
auto expected = original;
|
||||
expected = expected >> 2;
|
||||
expected = expected >> kShiftCountByBits;
|
||||
|
||||
if (expected)
|
||||
{
|
||||
@ -262,14 +265,14 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
while (expected)
|
||||
{
|
||||
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << 2) /*intentional clear*/, original) == original)
|
||||
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << kShiftCountByBits) /*intentional clear*/, original) == original)
|
||||
{
|
||||
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
original = this->wlist;
|
||||
expected = original >> 2;
|
||||
expected = original >> kShiftCountByBits;
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -282,7 +285,7 @@ namespace Aurora::Threading::Primitives
|
||||
#if !defined(AURORA_FORCE_SRW_LOCKS)
|
||||
auto original = this->wlist;
|
||||
auto expected = original;
|
||||
expected = expected >> 2;
|
||||
expected = expected >> kShiftCountByBits;
|
||||
|
||||
auto uBroadcastIterations = expected;
|
||||
|
||||
@ -293,7 +296,7 @@ namespace Aurora::Threading::Primitives
|
||||
while (expected && uBroadcastIterations)
|
||||
{
|
||||
bool bBreak {};
|
||||
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << 2) /*intentional clear*/, original) == original)
|
||||
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << kShiftCountByBits) /*intentional clear*/, original) == original)
|
||||
{
|
||||
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
|
||||
|
||||
@ -302,7 +305,7 @@ namespace Aurora::Threading::Primitives
|
||||
}
|
||||
|
||||
original = this->wlist;
|
||||
expected = original >> 2;
|
||||
expected = original >> kShiftCountByBits;
|
||||
|
||||
if (bBreak)
|
||||
{
|
||||
|
@ -34,5 +34,37 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
std::shared_ptr<Win32ConditionMutex> mutex_;
|
||||
};
|
||||
|
||||
static const auto kBoolRequiredLateSet = true;
|
||||
|
||||
// Future (Reece): I got future plans
|
||||
static const auto kShiftCountByBits = 8u;
|
||||
// ...otherwise
|
||||
// assume undefined behaviour past:
|
||||
// * bit zero is used for atomic bit test and yield loops
|
||||
// ( keyed events are an optimization mechanism for Windows XPs spinloop i had accidentally recreated in xenus. )
|
||||
// ( originally, nt yielding sucked with the most barebones spinlock being dumb a hypervisor-unaware, smt-aware, spinner. )
|
||||
// ( keyed events would then go in these spinners to serve as an early futex as early back as the year 2000 (?). )
|
||||
// ( that does, in fact, mean the free-toddlers crying about how 'windows stole muh kernels totally originally idea' is entirely wrong at each sub-point. )
|
||||
// ( though, keyed didn't see much use until Windows Vistas synch primitives were built on top of them. )
|
||||
// ( infamously missing 100ths scale nanosecond yimeouts and an inablity to lock with a timeout. )
|
||||
// ( raymond chen once claimed they didnt make it to xp because they werent fast enough )
|
||||
// ( raymond chen once claimed a "con" of keyedevents were that they were linear )
|
||||
// ( problem is, as far as i can tell, they didnt really change. whats worse, WakeOnAddress (windows 8+)
|
||||
// ( ...inherits the issue of not having relative/abs nanosecond scale timeouts AND the issue the primitives sucking. )
|
||||
// ( WakeOnAddress is nothing more than keyed events 2.0 - but with userland list keeping. )
|
||||
// ( scratch the concept of how i implement WakeOnAddress with lists, how older nts waited with lists under lock, )
|
||||
// ( they use hashmaps with "le meme lockless" interactions which are surly less expensive than reusing tls allocations ?! )
|
||||
// ( whether or not its even faster is still up for debate. its just easier to use. )
|
||||
// * bit one might be used under some niche versions of windows
|
||||
// (hearsay paranoia)
|
||||
// i actually have zero reason to believe windows ever implemented lock-awareness into the kernel
|
||||
// i think it might be fine to skip the whole bit zero thing, but still, im going to say keep the min=2
|
||||
// worst case scenario, we end up using these bits.
|
||||
// ....
|
||||
// =8 is future proof
|
||||
// =2 is recommended
|
||||
// =0 would require a bit of a require. i think this is how other people use keyed events nowadays
|
||||
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user