From 8e54071d60dd1a3bb85e2ca52747ddd8ad3755e9 Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Tue, 12 Sep 2023 18:21:36 +0100 Subject: [PATCH] [-] Remove 2 year old 0.0 WaitFor back-off implementation --- Include/Aurora/Threading/WaitFor.hpp | 55 +++- Source/Processes/AuProcess.Win32.cpp | 2 +- Source/Threading/AuWaitFor.cpp | 421 ++++++------------------ Source/Threading/AuWaitFor.hpp | 41 +-- Source/Threading/Threads/AuOSThread.cpp | 2 +- 5 files changed, 141 insertions(+), 380 deletions(-) diff --git a/Include/Aurora/Threading/WaitFor.hpp b/Include/Aurora/Threading/WaitFor.hpp index 5b3d68a6..74c97320 100644 --- a/Include/Aurora/Threading/WaitFor.hpp +++ b/Include/Aurora/Threading/WaitFor.hpp @@ -11,26 +11,47 @@ namespace Aurora::Threading { using PollCallback_cb = AuFunction; - AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb); - AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb); + AUKN_SYM bool YieldPollNs(bool bPermitMultipleContextSwitches, AuUInt64 qwAbsTimeoutNs, PollCallback_cb cb); - /*! - Waits for a list of IWaitable objects.
- See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others - */ - AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout = 0); + AUKN_SYM bool WaitForAbsNS(IWaitable *pWaitable, AuUInt64 qwAbsTimeout = 0); - static bool WaitFor(std::atomic &value, AuUInt64 timeout = 0) + static const auto kWaitForFlagTimeoutIsNanoseconds = 1ul; + static const auto kWaitForFlagTimeoutIsAbsolute = 1ul << 1; + static const auto kWaitForFlagTimeoutIsOr = 1ul << 2; + + /** + * Waits for a list of IWaitable objects to complete. + * See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others + * On timeout, returns false + * On error, waitables are restored to their state at the point of WaitFors + */ + AUKN_SYM bool WaitFor(const AuList &waitables, AuUInt32 uFlags, AuUInt64 uTimeout = 0); + + static inline bool WaitForShared(const AuList> &pWaitables, AuUInt32 uFlags, AuUInt64 uTimeout) { - Waitables::BooleanWaitable waitable(value); - return WaitFor(&waitable, timeout); + AU_DEBUG_MEMCRUNCH; + AuList waitables; + + waitables.reserve(pWaitables.size()); + for (const auto &pIWaitable : pWaitables) + { + waitables.push_back(pIWaitable.get()); + } + + return WaitFor(waitables, uFlags, uTimeout); } - /*! - Waits on a list of IWaitable objects.
- See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others
- On timeout, returns false
- On error, waitables are restored to their state at the point of WaitFors call - */ - AUKN_SYM bool WaitFor(const AuList &waitables, AuUInt64 timeout = 0); + /// legacy api (~3 years old, relative to 2023) + /// @deprecated + static inline bool WaitFor(const AuList &waitables, AuUInt64 uTimeout) + { + return WaitFor(waitables, 0, uTimeout); + } + + /// legacy api (~3 years old, relative to 2023) + /// @deprecated + static inline bool WaitFor(IWaitable *pWaitable, AuUInt64 uTimeoutMS) + { + return WaitForAbsNS(pWaitable, AuMSToNS(uTimeoutMS) + Time::SteadyClockNS()); + } } \ No newline at end of file diff --git a/Source/Processes/AuProcess.Win32.cpp b/Source/Processes/AuProcess.Win32.cpp index 8086dde2..f50695dd 100644 --- a/Source/Processes/AuProcess.Win32.cpp +++ b/Source/Processes/AuProcess.Win32.cpp @@ -170,7 +170,7 @@ namespace Aurora::Processes static bool Wait2500OrUntilClose(HANDLE handle) { - return Threading::YieldPoll(true, 2500, [=]() + return Threading::YieldPollNs(true, AuTime::SteadyClockNS() + AuMSToNS(2500), [=]() { return !HasWin32ProcessExited(handle); }); diff --git a/Source/Threading/AuWaitFor.cpp b/Source/Threading/AuWaitFor.cpp index 2ed66b03..9792551f 100644 --- a/Source/Threading/AuWaitFor.cpp +++ b/Source/Threading/AuWaitFor.cpp @@ -7,377 +7,153 @@ ***/ #include #include "AuWaitFor.hpp" +#include "Primitives/SMTYield.hpp" -#if defined(AURORA_IS_LINUX_DERIVED) +#if defined(AURORA_IS_POSIX_DERIVED) #include - #include - #include - #include - #include #endif -// Read the local header file for this file. -// The original idea was sane. -// The implemention, not so much... - -// TODO: REWRITE! - namespace Aurora::Threading { - static void YieldToSharedCore(long spin) + static void YieldToSharedCore(long uSpin) { - int loops = (1 << spin); - while (loops > 0) + #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) + + auto loops = __rdtsc() + (1ull << uSpin); + while (loops > __rdtsc()) { - #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) - _mm_pause(); - #endif - loops -= 1; + _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause(); + _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause(); + _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause(); + _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause(); } + + #else + + auto uRemainingTicks = (1ull << uSpin); + while (uRemainingTicks > 0) + { + Primitives::SMPPause(); + uRemainingTicks -= 1; + } + + #endif } - void YieldToOtherThread() + AUKN_SYM void ContextYield() { #if defined(AURORA_IS_MODERNNT_DERIVED) - SwitchToThread(); - #elif defined(AURORA_IS_LINUX_DERIVED) - sched_yield(); + ::SwitchToThread(); + #elif defined(AURORA_IS_POSIX_DERIVED) + ::sched_yield(); #else YieldToSharedCore(12); #endif } - template // forcefully optiMize by templating a constant argument - static inline void _FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallNS, int &alpha, int &bravo, bool &forceSpin) //, bool yieldFaster , long maxStallMS = 20) + AUKN_SYM bool YieldPollNs(bool bPermitMultipleContextSwitches, AuUInt64 qwAbsTimeoutNs, PollCallback_cb cb) { - // TODO: rewrite me - AuUInt64 now = Time::SteadyClockNS(); - - // Begin least likely checks, we're getting on now - // Ironically we need to burn off some CPU cycles - AuUInt64 deltaNS = now - startTime; - - - #define SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs)\ - (static_cast(Flags) & kYieldFlagsContextSwitchASAP) - - // Validate we have at least one whole average of a context switch of overhead remaining - #define HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs)\ - (maxStallNS >= (roundTripNs + deltaNS)) - - // The point of rewriting kernel-free userland thread components is to delegate everything to userland - // One key reason is single app performance. We should we not know how long to yield for, giving an undefined - // ...amount of time to other applications might be a bad thing. fuck. why cant we have rtos functionality :( - #define HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs)\ - ((static_cast(Flags)& kYieldFlagsContextSwitchForever) && (!maxStallNS)) - - // Perform a good faith guess at assuming we have enough overhead for a hard context switch - #define HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)\ - (HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs) || HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs)) - - // Validate enough time (lets say 1/3rd of the approximated time of a preemptive switch or sleep(0)) has passed - #define HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs)\ - (deltaNS > yieldDelayThresholdNs) - - #define SHOULD_CTXSWAP(yieldDelayThresholdNs, roundTripNs)\ - if (SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs) || (HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs) && HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs))) - - #if defined(AURORA_IS_LINUX_DERIVED) - SHOULD_CTXSWAP(kLinuxYieldTimeThresNano, kPredictedLinuxKernelTimeRTNano) + while (!Primitives::DoTryIf(cb)) { - // we are not very nice :D - setpriority(PRIO_PROCESS, 0, bravo); - static timespec fuck = { 0, kLinuxYieldTimeNano }; - nanosleep(&fuck, &fuck); - setpriority(PRIO_PROCESS, 0, alpha); - forceSpin = true; - return; - } - #endif - - #if defined(AURORA_PLATFORM_WIN32) - SHOULD_CTXSWAP(kPredictedNTOSSwitchTimeYDNS, kPredictedNTOSSwitchTimeRTNS) - { - // TODO: - ::Sleep(1); - return; - } - #endif - - - // Always at least try to burn some cycles off in a spinlock-esc time waster - YieldToOtherThread(); - } - - template // forcefully optiMize by templating a constant argument - static void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS) //, bool yieldFaster , long maxStallMS = 20) - { - #if defined(AURORA_IS_LINUX_DERIVED) - int alpha = getpriority(PRIO_PROCESS, 0); - int bravo = AuMin(15, AuMax(19, alpha + 5)); - #else - int alpha, bravo = 0; - #endif - bool spin = false; - _FastSnooze(count, startTime, maxStallMS, alpha, bravo, spin); - } - - template - void FastSnooze<0>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS); - template - void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS); - template - void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS); - - template - static inline bool YieldPollTmpl(AuUInt64 &time, AuUInt64 timeoutMs, PollCallback_cb cb) - { - #if defined(AURORA_IS_LINUX_DERIVED) - int alpha = getpriority(PRIO_PROCESS, 0); - int bravo = AuMin(15, AuMax(19, alpha + 5)); - #else - int alpha, bravo = 0; - #endif - bool spin = false; - - long count = 0; - - unsigned long long a = Time::SteadyClockNS(); - do - { - if (permitMultipleContextSwitches) - { - _FastSnooze(count, time, timeoutMs, alpha, bravo, spin); - } - else - { - _FastSnooze<0>(count, time, timeoutMs, alpha, bravo, spin); - } - - if (cb()) - { - return true; - } - a = Time::SteadyClockNS(); - - } while ((!timeoutMs) || (timeoutMs > a)); - - return cb(); - } - - AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb) - { - AuUInt64 time = Time::SteadyClockNS(); - - if (cb()) - { - return true; - } - - if (timeoutNs) - { - // only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters - permitMultipleContextSwitches = false; - } - - // do not trust the compiler do branch here with a mere Func(...) - // it's far more likely the branch will be handled in our yield loop - if (permitMultipleContextSwitches) - { - return YieldPollTmpl(time, timeoutNs, cb); - } - else - { - return YieldPollTmpl(time, timeoutNs, cb); - } - - return false; - } - - AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb) - { - AuUInt64 time = Time::SteadyClockNS(); - AuUInt64 timeoutNs = timeoutMs ? (time + (timeoutMs * 1000000)) : 0; - - if (cb()) - { - return true; - } - - if (timeoutMs) - { - // only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters - permitMultipleContextSwitches = false; - } - - // do not trust the compiler do branch here with a mere Func(...) - // it's far more likely the branch will be handled in our yield loop - if (permitMultipleContextSwitches) - { - return YieldPollTmpl(time, timeoutNs, cb); - } - else - { - return YieldPollTmpl(time, timeoutNs, cb); - } - - return false; - } - - static bool WaitLogicHandledByImplementor(bool &status, IWaitable *waitable, AuUInt64 timeout) - { - if (!waitable->HasLockImplementation()) - { - return false; - } - - status = waitable->LockMS(timeout); - return true; - } - - static bool WaitLogicHandledByNTOS(bool &status, IWaitable *waitable, AuUInt64 timeout) - { - #if defined(AURORA_IS_MODERNNT_DERIVED) - AuMach handle = 0; - if (!waitable->HasOSHandle(handle)) - { - return false; - } - - auto win32 = reinterpret_cast(handle); - auto ret = WaitForSingleObject(win32, timeout ? timeout : INFINITE); - - SysAssert(ret != WAIT_FAILED, "Internal Win32 Error {}", GetLastError()); - - if (ret == WAIT_TIMEOUT) - { - status = false; - } - else - { - status = true; - } - - return true; - #else - return false; - #endif - } - - AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout) - { - bool status; - - if (WaitLogicHandledByNTOS(status, waitable, timeout)) - { - return status; - } - - if (WaitLogicHandledByImplementor(status, waitable, timeout)) - { - return status; - } - - return YieldPoll(true, timeout, [=]() - { - return waitable->TryLock(); - }); - } - - static bool CanWin32HandleAll(const AuList &waitables) - { - #if defined(AURORA_IS_MODERNNT_DERIVED) - for (auto &waitable : waitables) - { - AuMach handle = 0; - if (!waitable->HasOSHandle(handle)) + if (Time::SteadyClockNS() >= qwAbsTimeoutNs) { return false; } + + if (bPermitMultipleContextSwitches) + { + ContextYield(); + } } + return true; - #else - return false; - #endif } - static bool Win32HandleMultiple(const AuList &waitables, AuUInt64 timeoutMs) + AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 qwTimeoutMs, PollCallback_cb cb) { - #if defined(AURORA_IS_MODERNNT_DERIVED) - AuList winWaitables; + return YieldPollNs(permitMultipleContextSwitches, + qwTimeoutMs ? Time::SteadyClockNS() + AuMSToNS(qwTimeoutMs) : 0, + cb); + } - winWaitables.resize(waitables.size()); - - std::transform(waitables.begin(), waitables.end(), winWaitables.begin(), [](IWaitable *waitable) -> HANDLE + AUKN_SYM bool WaitForAbsNS(IWaitable *pWaitable, AuUInt64 qwAbsTimeout) + { + if (pWaitable->HasLockImplementation()) { - AuMach handle = 0; - auto status = waitable->HasOSHandle(handle); - SysAssert(status, "OS Handle was NULL"); - return reinterpret_cast(handle); + return pWaitable->LockAbsNS(qwAbsTimeout); + } + + return YieldPollNs(true, qwAbsTimeout, [=]() + { + return pWaitable->TryLock(); }); + } + + AUKN_SYM bool WaitFor(const AuList &waitables, AuUInt32 uFlags, AuUInt64 uTimeout) + { + AU_DEBUG_MEMCRUNCH; + + AuUInt64 qwTimeoutAbs {}; + AuList releasedObjects(waitables.size()); - auto status = WaitForMultipleObjectsEx(winWaitables.size(), winWaitables.data(), TRUE, timeoutMs ? timeoutMs : INFINITE, true); - SysAssert(status != WAIT_FAILED, "Internal Win32 Error {}", GetLastError()); - - if (status == WAIT_TIMEOUT) + if (uFlags & kWaitForFlagTimeoutIsNanoseconds) { - return false; + qwTimeoutAbs = uTimeout; } else { - return true; - } - #else - return false; - #endif - } - - AUKN_SYM bool WaitFor(const AuList &waitables, AuUInt64 timeout) - { - if (CanWin32HandleAll(waitables)) - { - return Win32HandleMultiple(waitables, timeout); + qwTimeoutAbs = AuMSToNS(uTimeout); } - // im worried about the complexity of using a vector here - // we would have to hit o(n) and memcpy in the best case scenario on each object release - // unordered maps are glorified hash tables - // maps are glorified binary trees - // maps should be fast enough - AuHashMap releasedObjects; - - releasedObjects.reserve(waitables.size()); - - // pseudo reserve - for (AuMach i = 0; i < waitables.size(); i++) + if (!(uFlags & kWaitForFlagTimeoutIsAbsolute)) { - releasedObjects[i] = false; + qwTimeoutAbs += AuTime::SteadyClockNS(); } - // yield for all - auto status = YieldPoll(true, timeout, [&]() + auto bIsAnd = !(uFlags & kWaitForFlagTimeoutIsOr); + + auto bStatus = YieldPollNs(true, qwTimeoutAbs, [&]() { - for (AuMach i = 0; i < waitables.size(); i++) + bool bStatus { !waitables.size() }; + + for (AU_ITERATE_N(i, waitables.size())) { - if (!releasedObjects[i]) + if (releasedObjects[i]) { - if (waitables[i]->TryLock()) - { - releasedObjects[i] = true; - } - else + continue; + } + + bool bLocked {}; + + if (bIsAnd) + { + bLocked = WaitForAbsNS(waitables[i], qwTimeoutAbs); + } + else + { + bLocked = waitables[i]->TryLock(); + } + + if (bLocked) + { + releasedObjects[i] = true; + bStatus = true; + } + else + { + if (bIsAnd) { return false; } } } - return true; + + return bStatus; }); - // from the perspective of locks, should the be a timeout event, we need to go back and unlock them on timeout - if (!status) + if (!bStatus) { - for (AuMach i = 0; i < waitables.size(); i++) + for (AU_ITERATE_N(i, waitables.size())) { if (releasedObjects[i]) { @@ -386,11 +162,6 @@ namespace Aurora::Threading } } - return status; - } - - AUKN_SYM void ContextYield() - { - YieldToOtherThread(); + return bStatus; } } \ No newline at end of file diff --git a/Source/Threading/AuWaitFor.hpp b/Source/Threading/AuWaitFor.hpp index 8aed5927..71431c3d 100644 --- a/Source/Threading/AuWaitFor.hpp +++ b/Source/Threading/AuWaitFor.hpp @@ -9,40 +9,9 @@ namespace Aurora::Threading { - // the original idea: - // It's not insane to expect slow linux kernels to run at 250 jiffies a second, so, 4ms - // It's also not insane to expect a complete context swap/rescheduled yield on windows to last 15ms - // -> if sleep time greater than 15ms, yield to nt Sleep - // -> if sleep time greater than 4ms, yield to linux kernel - // -> if sleep time greater than 2ms (?), yield to SwitchToThread - // -> SPIIIIIN - - static const AuUInt64 kPredictedLinuxKernelJiffies = 250; // some kernel builds go up to 1000 - static const AuUInt64 kPredictedLinuxKernelTimeMilli = (1000 / kPredictedLinuxKernelJiffies); - static const AuUInt64 kPredictedLinuxKernelTimeMicro = kPredictedLinuxKernelTimeMilli * 1000; - static const AuUInt64 kPredictedLinuxKernelTimeNano = kPredictedLinuxKernelTimeMilli * 1000000; - static const AuUInt64 kLinuxYieldTimeNano = 1e+6 / 150; // completely arbitrary - static const AuUInt64 kLinuxYieldTimeThresNano = 1e+6 / 25; // completely arbitrary - static const AuUInt64 kPredictedLinuxKernelTimeRTNano = (kLinuxYieldTimeNano + kPredictedLinuxKernelTimeNano) * 3; - - //static const AuUInt64 kPredictedNTOSSwitchTimeMS = 10; - //static const AuUInt64 kPredictedNTOSSwitchTimeYDMS = kPredictedNTOSSwitchTimeMS / 4; - //static const AuUInt64 kPredictedNTOSSwitchTimeRTMS = kPredictedNTOSSwitchTimeMS + kPredictedNTOSSwitchTimeMS; - static const AuUInt64 kPredictedNTOSSwitchTimeRTNS = 1000000;// kPredictedNTOSSwitchTimeRTMS* 1000000; - //static const AuUInt64 kPredictedNTOSSwitchTimeNS = 3* 1000000; - static const AuUInt64 kPredictedNTOSSwitchTimeYDNS = 1000000 / 4;// kPredictedNTOSSwitchTimeNS / 4; - - static const AuMach kYieldFlagsNone = 0; - static const AuMach kYieldFlagsRemoved = 1; - static const AuMach kYieldFlagsContextSwitchASAP = 2; - static const AuMach kYieldFlagsContextSwitchForever = 4; - static const AuMach kYieldFlagsRegular = kYieldFlagsContextSwitchASAP | kYieldFlagsContextSwitchForever; - - template - void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS); - - - bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, Threading::PollCallback_cb cb); - - void YieldToOtherThread(); + /// @deprecated + static void YieldToOtherThread() + { + Aurora::Threading::ContextYield(); + } } \ No newline at end of file diff --git a/Source/Threading/Threads/AuOSThread.cpp b/Source/Threading/Threads/AuOSThread.cpp index b9fce36b..7dccd176 100644 --- a/Source/Threading/Threads/AuOSThread.cpp +++ b/Source/Threading/Threads/AuOSThread.cpp @@ -346,7 +346,7 @@ namespace Aurora::Threading::Threads TeminateOSContext(true); while (true) { - YieldToOtherThread(); + ContextYield(); } } }