[-] Remove 2 year old 0.0 WaitFor back-off implementation

This commit is contained in:
Reece Wilson 2023-09-12 18:21:36 +01:00
parent f08f4a476a
commit 8e54071d60
5 changed files with 141 additions and 380 deletions

View File

@ -11,26 +11,47 @@ namespace Aurora::Threading
{ {
using PollCallback_cb = AuFunction<bool()>; using PollCallback_cb = AuFunction<bool()>;
AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb); AUKN_SYM bool YieldPollNs(bool bPermitMultipleContextSwitches, AuUInt64 qwAbsTimeoutNs, PollCallback_cb cb);
AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb);
/*! AUKN_SYM bool WaitForAbsNS(IWaitable *pWaitable, AuUInt64 qwAbsTimeout = 0);
Waits for a list of IWaitable objects. <br>
See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others
*/
AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout = 0);
static bool WaitFor(std::atomic<bool> &value, AuUInt64 timeout = 0) static const auto kWaitForFlagTimeoutIsNanoseconds = 1ul;
static const auto kWaitForFlagTimeoutIsAbsolute = 1ul << 1;
static const auto kWaitForFlagTimeoutIsOr = 1ul << 2;
/**
* Waits for a list of IWaitable objects to complete.
* See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others
* On timeout, returns false
* On error, waitables are restored to their state at the point of WaitFors
*/
AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt32 uFlags, AuUInt64 uTimeout = 0);
static inline bool WaitForShared(const AuList<AuSPtr<IWaitable>> &pWaitables, AuUInt32 uFlags, AuUInt64 uTimeout)
{ {
Waitables::BooleanWaitable waitable(value); AU_DEBUG_MEMCRUNCH;
return WaitFor(&waitable, timeout); AuList<IWaitable *> waitables;
waitables.reserve(pWaitables.size());
for (const auto &pIWaitable : pWaitables)
{
waitables.push_back(pIWaitable.get());
}
return WaitFor(waitables, uFlags, uTimeout);
} }
/*! /// legacy api (~3 years old, relative to 2023)
Waits on a list of IWaitable objects. <br> /// @deprecated
See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others <br> static inline bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 uTimeout)
On timeout, returns false <br> {
On error, waitables are restored to their state at the point of WaitFors call return WaitFor(waitables, 0, uTimeout);
*/ }
AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 timeout = 0);
/// legacy api (~3 years old, relative to 2023)
/// @deprecated
static inline bool WaitFor(IWaitable *pWaitable, AuUInt64 uTimeoutMS)
{
return WaitForAbsNS(pWaitable, AuMSToNS<AuUInt64>(uTimeoutMS) + Time::SteadyClockNS());
}
} }

View File

@ -170,7 +170,7 @@ namespace Aurora::Processes
static bool Wait2500OrUntilClose(HANDLE handle) static bool Wait2500OrUntilClose(HANDLE handle)
{ {
return Threading::YieldPoll(true, 2500, [=]() return Threading::YieldPollNs(true, AuTime::SteadyClockNS() + AuMSToNS<AuUInt64>(2500), [=]()
{ {
return !HasWin32ProcessExited(handle); return !HasWin32ProcessExited(handle);
}); });

View File

@ -7,377 +7,153 @@
***/ ***/
#include <Source/RuntimeInternal.hpp> #include <Source/RuntimeInternal.hpp>
#include "AuWaitFor.hpp" #include "AuWaitFor.hpp"
#include "Primitives/SMTYield.hpp"
#if defined(AURORA_IS_LINUX_DERIVED) #if defined(AURORA_IS_POSIX_DERIVED)
#include <sched.h> #include <sched.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <time.h>
#endif #endif
// Read the local header file for this file.
// The original idea was sane.
// The implemention, not so much...
// TODO: REWRITE!
namespace Aurora::Threading namespace Aurora::Threading
{ {
static void YieldToSharedCore(long spin) static void YieldToSharedCore(long uSpin)
{ {
int loops = (1 << spin); #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
while (loops > 0)
auto loops = __rdtsc() + (1ull << uSpin);
while (loops > __rdtsc())
{ {
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
_mm_pause(); _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
#endif _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
loops -= 1; _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
} }
#else
auto uRemainingTicks = (1ull << uSpin);
while (uRemainingTicks > 0)
{
Primitives::SMPPause();
uRemainingTicks -= 1;
}
#endif
} }
void YieldToOtherThread() AUKN_SYM void ContextYield()
{ {
#if defined(AURORA_IS_MODERNNT_DERIVED) #if defined(AURORA_IS_MODERNNT_DERIVED)
SwitchToThread(); ::SwitchToThread();
#elif defined(AURORA_IS_LINUX_DERIVED) #elif defined(AURORA_IS_POSIX_DERIVED)
sched_yield(); ::sched_yield();
#else #else
YieldToSharedCore(12); YieldToSharedCore(12);
#endif #endif
} }
template<AuMach Flags> // forcefully optiMize by templating a constant argument AUKN_SYM bool YieldPollNs(bool bPermitMultipleContextSwitches, AuUInt64 qwAbsTimeoutNs, PollCallback_cb cb)
static inline void _FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallNS, int &alpha, int &bravo, bool &forceSpin) //, bool yieldFaster , long maxStallMS = 20)
{ {
// TODO: rewrite me while (!Primitives::DoTryIf(cb))
AuUInt64 now = Time::SteadyClockNS();
// Begin least likely checks, we're getting on now
// Ironically we need to burn off some CPU cycles
AuUInt64 deltaNS = now - startTime;
#define SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs)\
(static_cast<int>(Flags) & kYieldFlagsContextSwitchASAP)
// Validate we have at least one whole average of a context switch of overhead remaining
#define HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs)\
(maxStallNS >= (roundTripNs + deltaNS))
// The point of rewriting kernel-free userland thread components is to delegate everything to userland
// One key reason is single app performance. We should we not know how long to yield for, giving an undefined
// ...amount of time to other applications might be a bad thing. fuck. why cant we have rtos functionality :(
#define HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs)\
((static_cast<int>(Flags)& kYieldFlagsContextSwitchForever) && (!maxStallNS))
// Perform a good faith guess at assuming we have enough overhead for a hard context switch
#define HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)\
(HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs) || HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs))
// Validate enough time (lets say 1/3rd of the approximated time of a preemptive switch or sleep(0)) has passed
#define HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs)\
(deltaNS > yieldDelayThresholdNs)
#define SHOULD_CTXSWAP(yieldDelayThresholdNs, roundTripNs)\
if (SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs) || (HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs) && HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)))
#if defined(AURORA_IS_LINUX_DERIVED)
SHOULD_CTXSWAP(kLinuxYieldTimeThresNano, kPredictedLinuxKernelTimeRTNano)
{ {
// we are not very nice :D if (Time::SteadyClockNS() >= qwAbsTimeoutNs)
setpriority(PRIO_PROCESS, 0, bravo);
static timespec fuck = { 0, kLinuxYieldTimeNano };
nanosleep(&fuck, &fuck);
setpriority(PRIO_PROCESS, 0, alpha);
forceSpin = true;
return;
}
#endif
#if defined(AURORA_PLATFORM_WIN32)
SHOULD_CTXSWAP(kPredictedNTOSSwitchTimeYDNS, kPredictedNTOSSwitchTimeRTNS)
{
// TODO:
::Sleep(1);
return;
}
#endif
// Always at least try to burn some cycles off in a spinlock-esc time waster
YieldToOtherThread();
}
template<AuMach Flags> // forcefully optiMize by templating a constant argument
static void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS) //, bool yieldFaster , long maxStallMS = 20)
{
#if defined(AURORA_IS_LINUX_DERIVED)
int alpha = getpriority(PRIO_PROCESS, 0);
int bravo = AuMin(15, AuMax(19, alpha + 5));
#else
int alpha, bravo = 0;
#endif
bool spin = false;
_FastSnooze<Flags>(count, startTime, maxStallMS, alpha, bravo, spin);
}
template
void FastSnooze<0>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
template
void FastSnooze<kYieldFlagsContextSwitchASAP>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
template
void FastSnooze<kYieldFlagsContextSwitchForever>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
template<bool permitMultipleContextSwitches>
static inline bool YieldPollTmpl(AuUInt64 &time, AuUInt64 timeoutMs, PollCallback_cb cb)
{
#if defined(AURORA_IS_LINUX_DERIVED)
int alpha = getpriority(PRIO_PROCESS, 0);
int bravo = AuMin(15, AuMax(19, alpha + 5));
#else
int alpha, bravo = 0;
#endif
bool spin = false;
long count = 0;
unsigned long long a = Time::SteadyClockNS();
do
{
if (permitMultipleContextSwitches)
{
_FastSnooze<kYieldFlagsContextSwitchForever>(count, time, timeoutMs, alpha, bravo, spin);
}
else
{
_FastSnooze<0>(count, time, timeoutMs, alpha, bravo, spin);
}
if (cb())
{
return true;
}
a = Time::SteadyClockNS();
} while ((!timeoutMs) || (timeoutMs > a));
return cb();
}
AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb)
{
AuUInt64 time = Time::SteadyClockNS();
if (cb())
{
return true;
}
if (timeoutNs)
{
// only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters
permitMultipleContextSwitches = false;
}
// do not trust the compiler do branch here with a mere Func<variable>(...)
// it's far more likely the branch will be handled in our yield loop
if (permitMultipleContextSwitches)
{
return YieldPollTmpl<true>(time, timeoutNs, cb);
}
else
{
return YieldPollTmpl<false>(time, timeoutNs, cb);
}
return false;
}
AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb)
{
AuUInt64 time = Time::SteadyClockNS();
AuUInt64 timeoutNs = timeoutMs ? (time + (timeoutMs * 1000000)) : 0;
if (cb())
{
return true;
}
if (timeoutMs)
{
// only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters
permitMultipleContextSwitches = false;
}
// do not trust the compiler do branch here with a mere Func<variable>(...)
// it's far more likely the branch will be handled in our yield loop
if (permitMultipleContextSwitches)
{
return YieldPollTmpl<true>(time, timeoutNs, cb);
}
else
{
return YieldPollTmpl<false>(time, timeoutNs, cb);
}
return false;
}
static bool WaitLogicHandledByImplementor(bool &status, IWaitable *waitable, AuUInt64 timeout)
{
if (!waitable->HasLockImplementation())
{
return false;
}
status = waitable->LockMS(timeout);
return true;
}
static bool WaitLogicHandledByNTOS(bool &status, IWaitable *waitable, AuUInt64 timeout)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
AuMach handle = 0;
if (!waitable->HasOSHandle(handle))
{
return false;
}
auto win32 = reinterpret_cast<HANDLE>(handle);
auto ret = WaitForSingleObject(win32, timeout ? timeout : INFINITE);
SysAssert(ret != WAIT_FAILED, "Internal Win32 Error {}", GetLastError());
if (ret == WAIT_TIMEOUT)
{
status = false;
}
else
{
status = true;
}
return true;
#else
return false;
#endif
}
AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout)
{
bool status;
if (WaitLogicHandledByNTOS(status, waitable, timeout))
{
return status;
}
if (WaitLogicHandledByImplementor(status, waitable, timeout))
{
return status;
}
return YieldPoll(true, timeout, [=]()
{
return waitable->TryLock();
});
}
static bool CanWin32HandleAll(const AuList<IWaitable *> &waitables)
{
#if defined(AURORA_IS_MODERNNT_DERIVED)
for (auto &waitable : waitables)
{
AuMach handle = 0;
if (!waitable->HasOSHandle(handle))
{ {
return false; return false;
} }
if (bPermitMultipleContextSwitches)
{
ContextYield();
}
} }
return true; return true;
#else
return false;
#endif
} }
static bool Win32HandleMultiple(const AuList<IWaitable *> &waitables, AuUInt64 timeoutMs) AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 qwTimeoutMs, PollCallback_cb cb)
{ {
#if defined(AURORA_IS_MODERNNT_DERIVED) return YieldPollNs(permitMultipleContextSwitches,
AuList<HANDLE> winWaitables; qwTimeoutMs ? Time::SteadyClockNS() + AuMSToNS<AuUInt64>(qwTimeoutMs) : 0,
cb);
}
winWaitables.resize(waitables.size()); AUKN_SYM bool WaitForAbsNS(IWaitable *pWaitable, AuUInt64 qwAbsTimeout)
{
std::transform(waitables.begin(), waitables.end(), winWaitables.begin(), [](IWaitable *waitable) -> HANDLE if (pWaitable->HasLockImplementation())
{ {
AuMach handle = 0; return pWaitable->LockAbsNS(qwAbsTimeout);
auto status = waitable->HasOSHandle(handle); }
SysAssert(status, "OS Handle was NULL");
return reinterpret_cast<HANDLE>(handle); return YieldPollNs(true, qwAbsTimeout, [=]()
{
return pWaitable->TryLock();
}); });
}
AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt32 uFlags, AuUInt64 uTimeout)
{
AU_DEBUG_MEMCRUNCH;
AuUInt64 qwTimeoutAbs {};
AuList<bool> releasedObjects(waitables.size());
auto status = WaitForMultipleObjectsEx(winWaitables.size(), winWaitables.data(), TRUE, timeoutMs ? timeoutMs : INFINITE, true); if (uFlags & kWaitForFlagTimeoutIsNanoseconds)
SysAssert(status != WAIT_FAILED, "Internal Win32 Error {}", GetLastError());
if (status == WAIT_TIMEOUT)
{ {
return false; qwTimeoutAbs = uTimeout;
} }
else else
{ {
return true; qwTimeoutAbs = AuMSToNS<AuUInt64>(uTimeout);
}
#else
return false;
#endif
}
AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 timeout)
{
if (CanWin32HandleAll(waitables))
{
return Win32HandleMultiple(waitables, timeout);
} }
// im worried about the complexity of using a vector here if (!(uFlags & kWaitForFlagTimeoutIsAbsolute))
// we would have to hit o(n) and memcpy in the best case scenario on each object release
// unordered maps are glorified hash tables
// maps are glorified binary trees
// maps should be fast enough
AuHashMap<int, bool> releasedObjects;
releasedObjects.reserve(waitables.size());
// pseudo reserve
for (AuMach i = 0; i < waitables.size(); i++)
{ {
releasedObjects[i] = false; qwTimeoutAbs += AuTime::SteadyClockNS();
} }
// yield for all auto bIsAnd = !(uFlags & kWaitForFlagTimeoutIsOr);
auto status = YieldPoll(true, timeout, [&]()
auto bStatus = YieldPollNs(true, qwTimeoutAbs, [&]()
{ {
for (AuMach i = 0; i < waitables.size(); i++) bool bStatus { !waitables.size() };
for (AU_ITERATE_N(i, waitables.size()))
{ {
if (!releasedObjects[i]) if (releasedObjects[i])
{ {
if (waitables[i]->TryLock()) continue;
{ }
releasedObjects[i] = true;
} bool bLocked {};
else
if (bIsAnd)
{
bLocked = WaitForAbsNS(waitables[i], qwTimeoutAbs);
}
else
{
bLocked = waitables[i]->TryLock();
}
if (bLocked)
{
releasedObjects[i] = true;
bStatus = true;
}
else
{
if (bIsAnd)
{ {
return false; return false;
} }
} }
} }
return true;
return bStatus;
}); });
// from the perspective of locks, should the be a timeout event, we need to go back and unlock them on timeout if (!bStatus)
if (!status)
{ {
for (AuMach i = 0; i < waitables.size(); i++) for (AU_ITERATE_N(i, waitables.size()))
{ {
if (releasedObjects[i]) if (releasedObjects[i])
{ {
@ -386,11 +162,6 @@ namespace Aurora::Threading
} }
} }
return status; return bStatus;
}
AUKN_SYM void ContextYield()
{
YieldToOtherThread();
} }
} }

View File

@ -9,40 +9,9 @@
namespace Aurora::Threading namespace Aurora::Threading
{ {
// the original idea: /// @deprecated
// It's not insane to expect slow linux kernels to run at 250 jiffies a second, so, 4ms static void YieldToOtherThread()
// It's also not insane to expect a complete context swap/rescheduled yield on windows to last 15ms {
// -> if sleep time greater than 15ms, yield to nt Sleep Aurora::Threading::ContextYield();
// -> if sleep time greater than 4ms, yield to linux kernel }
// -> if sleep time greater than 2ms (?), yield to SwitchToThread
// -> SPIIIIIN
static const AuUInt64 kPredictedLinuxKernelJiffies = 250; // some kernel builds go up to 1000
static const AuUInt64 kPredictedLinuxKernelTimeMilli = (1000 / kPredictedLinuxKernelJiffies);
static const AuUInt64 kPredictedLinuxKernelTimeMicro = kPredictedLinuxKernelTimeMilli * 1000;
static const AuUInt64 kPredictedLinuxKernelTimeNano = kPredictedLinuxKernelTimeMilli * 1000000;
static const AuUInt64 kLinuxYieldTimeNano = 1e+6 / 150; // completely arbitrary
static const AuUInt64 kLinuxYieldTimeThresNano = 1e+6 / 25; // completely arbitrary
static const AuUInt64 kPredictedLinuxKernelTimeRTNano = (kLinuxYieldTimeNano + kPredictedLinuxKernelTimeNano) * 3;
//static const AuUInt64 kPredictedNTOSSwitchTimeMS = 10;
//static const AuUInt64 kPredictedNTOSSwitchTimeYDMS = kPredictedNTOSSwitchTimeMS / 4;
//static const AuUInt64 kPredictedNTOSSwitchTimeRTMS = kPredictedNTOSSwitchTimeMS + kPredictedNTOSSwitchTimeMS;
static const AuUInt64 kPredictedNTOSSwitchTimeRTNS = 1000000;// kPredictedNTOSSwitchTimeRTMS* 1000000;
//static const AuUInt64 kPredictedNTOSSwitchTimeNS = 3* 1000000;
static const AuUInt64 kPredictedNTOSSwitchTimeYDNS = 1000000 / 4;// kPredictedNTOSSwitchTimeNS / 4;
static const AuMach kYieldFlagsNone = 0;
static const AuMach kYieldFlagsRemoved = 1;
static const AuMach kYieldFlagsContextSwitchASAP = 2;
static const AuMach kYieldFlagsContextSwitchForever = 4;
static const AuMach kYieldFlagsRegular = kYieldFlagsContextSwitchASAP | kYieldFlagsContextSwitchForever;
template<AuMach yieldFaster>
void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, Threading::PollCallback_cb cb);
void YieldToOtherThread();
} }

View File

@ -346,7 +346,7 @@ namespace Aurora::Threading::Threads
TeminateOSContext(true); TeminateOSContext(true);
while (true) while (true)
{ {
YieldToOtherThread(); ContextYield();
} }
} }
} }