[-] Remove 2 year old 0.0 WaitFor back-off implementation

2023-09-12 18:21:36 +01:00 · 2023-09-12 18:21:36 +01:00 · 8e54071d60
commit 8e54071d60
parent f08f4a476a
5 changed files with 141 additions and 380 deletions
--- a/Include/Aurora/Threading/WaitFor.hpp
+++ b/Include/Aurora/Threading/WaitFor.hpp
@ -11,26 +11,47 @@ namespace Aurora::Threading
 {
    using PollCallback_cb = AuFunction<bool()>;
    
-    AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb);
-    AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb);
+    AUKN_SYM bool YieldPollNs(bool bPermitMultipleContextSwitches, AuUInt64 qwAbsTimeoutNs, PollCallback_cb cb);

-    /*!
-        Waits for a list of IWaitable objects. <br>
-        See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others
+    AUKN_SYM bool WaitForAbsNS(IWaitable *pWaitable, AuUInt64 qwAbsTimeout = 0);
+
+    static const auto kWaitForFlagTimeoutIsNanoseconds = 1ul;
+    static const auto kWaitForFlagTimeoutIsAbsolute    = 1ul << 1;
+    static const auto kWaitForFlagTimeoutIsOr          = 1ul << 2;
+
+    /**
+     *  Waits for a list of IWaitable objects to complete.
+     *  See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others
+     *  On timeout, returns false
+     *  On error, waitables are restored to their state at the point of WaitFors
     */
-    AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout = 0);
+    AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt32 uFlags, AuUInt64 uTimeout = 0);

-    static bool WaitFor(std::atomic<bool> &value, AuUInt64 timeout = 0)
+    static inline bool WaitForShared(const AuList<AuSPtr<IWaitable>> &pWaitables, AuUInt32 uFlags, AuUInt64 uTimeout)
    {
-        Waitables::BooleanWaitable waitable(value);
-        return WaitFor(&waitable, timeout);
+        AU_DEBUG_MEMCRUNCH;
+        AuList<IWaitable *> waitables;
+        
+        waitables.reserve(pWaitables.size());
+        for (const auto &pIWaitable : pWaitables)
+        {
+            waitables.push_back(pIWaitable.get());
        }

-    /*!
-        Waits on a list of IWaitable objects.                                    <br>
-        See: Mutex, CriticalSection, Semaphore, Event, Thread, Async, and others <br>
-        On timeout, returns false                                                <br>
-        On error, waitables are restored to their state at the point of WaitFors call
-    */
-    AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 timeout = 0);   
+        return WaitFor(waitables, uFlags, uTimeout);
+    }
+
+    /// legacy api (~3 years old, relative to 2023)
+    /// @deprecated
+    static inline bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 uTimeout)
+    {
+        return WaitFor(waitables, 0, uTimeout);
+    }
+
+    /// legacy api (~3 years old, relative to 2023)
+    /// @deprecated
+    static inline bool WaitFor(IWaitable *pWaitable, AuUInt64 uTimeoutMS)
+    {
+        return WaitForAbsNS(pWaitable, AuMSToNS<AuUInt64>(uTimeoutMS) + Time::SteadyClockNS());
+    }
 }
--- a/Source/Processes/AuProcess.Win32.cpp
+++ b/Source/Processes/AuProcess.Win32.cpp
@ -170,7 +170,7 @@ namespace Aurora::Processes
    
    static bool Wait2500OrUntilClose(HANDLE handle)
    {
-        return Threading::YieldPoll(true, 2500, [=]()
+        return Threading::YieldPollNs(true, AuTime::SteadyClockNS() + AuMSToNS<AuUInt64>(2500), [=]()
        {
            return !HasWin32ProcessExited(handle);
        });
--- a/Source/Threading/AuWaitFor.cpp
+++ b/Source/Threading/AuWaitFor.cpp
@ -7,377 +7,153 @@
 ***/
 #include <Source/RuntimeInternal.hpp>
 #include "AuWaitFor.hpp"
+#include "Primitives/SMTYield.hpp"

-#if defined(AURORA_IS_LINUX_DERIVED)
+#if defined(AURORA_IS_POSIX_DERIVED)
    #include <sched.h>
-    #include <sys/resource.h>
-    #include <sys/time.h>
-    #include <unistd.h>
-    #include <time.h>
 #endif 

-// Read the local header file for this file.
-// The original idea was sane.
-// The implemention, not so much...
-
-// TODO: REWRITE!
-
 namespace Aurora::Threading
 {
-    static void YieldToSharedCore(long spin)
-    {
-        int loops = (1 << spin);
-        while (loops > 0)
+    static void YieldToSharedCore(long uSpin)
    {
    #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
-            _mm_pause();
-        #endif
-            loops -= 1;
-        }
+
+        auto loops = __rdtsc() + (1ull << uSpin);
+        while (loops > __rdtsc())
+        {
+            _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
+            _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
+            _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
+            _mm_pause(); _mm_pause(); _mm_pause(); _mm_pause();
        }

-    void YieldToOtherThread()
+    #else
+
+        auto uRemainingTicks = (1ull << uSpin);
+        while (uRemainingTicks > 0)
+        {
+            Primitives::SMPPause();
+            uRemainingTicks -= 1;
+        }
+
+    #endif
+    }
+
+    AUKN_SYM void ContextYield()
    {
    #if defined(AURORA_IS_MODERNNT_DERIVED)
-        SwitchToThread();
-    #elif defined(AURORA_IS_LINUX_DERIVED)
-        sched_yield();
+        ::SwitchToThread();
+    #elif defined(AURORA_IS_POSIX_DERIVED)
+        ::sched_yield();
    #else
        YieldToSharedCore(12);
    #endif
    }

-    template<AuMach Flags> // forcefully optiMize by templating a constant argument 
-    static inline void _FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallNS, int &alpha, int &bravo, bool &forceSpin) //, bool yieldFaster , long maxStallMS = 20)
+    AUKN_SYM bool YieldPollNs(bool bPermitMultipleContextSwitches, AuUInt64 qwAbsTimeoutNs, PollCallback_cb cb)
    {
-        // TODO: rewrite me
-        AuUInt64 now = Time::SteadyClockNS();
-
-        // Begin least likely checks, we're getting on now
-        // Ironically we need to burn off some CPU cycles 
-        AuUInt64 deltaNS = now - startTime;
-
-    
-    #define SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs)\
-        (static_cast<int>(Flags) & kYieldFlagsContextSwitchASAP)
-
-    // Validate we have at least one whole average of a context switch of overhead remaining
-    #define HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs)\
-        (maxStallNS >= (roundTripNs + deltaNS))
-
-    // The point of rewriting kernel-free userland thread components is to delegate everything to userland
-    // One key reason is single app performance. We should we not know how long to yield for, giving an undefined 
-    // ...amount of time to other applications might be a bad thing. fuck. why cant we have rtos functionality :( 
-    #define HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs)\
-        ((static_cast<int>(Flags)& kYieldFlagsContextSwitchForever) && (!maxStallNS))
-
-    // Perform a good faith guess at assuming we have enough overhead for a hard context switch 
-    #define HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)\
-        (HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs) || HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs))
-
-    // Validate enough time (lets say 1/3rd of the approximated time of a preemptive switch or sleep(0)) has passed
-    #define HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs)\
-        (deltaNS > yieldDelayThresholdNs)
-
-    #define SHOULD_CTXSWAP(yieldDelayThresholdNs, roundTripNs)\
-         if  (SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs) || (HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs) &&  HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)))
-
-    #if defined(AURORA_IS_LINUX_DERIVED)
-        SHOULD_CTXSWAP(kLinuxYieldTimeThresNano, kPredictedLinuxKernelTimeRTNano)
+        while (!Primitives::DoTryIf(cb))
        {
-            // we are not very nice :D 
-            setpriority(PRIO_PROCESS, 0, bravo);
-            static timespec fuck = { 0, kLinuxYieldTimeNano };
-            nanosleep(&fuck, &fuck);
-            setpriority(PRIO_PROCESS, 0, alpha);
-            forceSpin = true;
-            return;
-        }
-    #endif 
-
-    #if defined(AURORA_PLATFORM_WIN32)
-        SHOULD_CTXSWAP(kPredictedNTOSSwitchTimeYDNS, kPredictedNTOSSwitchTimeRTNS)
-        {
-            // TODO: 
-            ::Sleep(1);
-            return;
-        }
-    #endif 
-
-
-        // Always at least try to burn some cycles off in a spinlock-esc time waster  
-        YieldToOtherThread();
-    }
-
-    template<AuMach Flags> // forcefully optiMize by templating a constant argument 
-    static void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS) //, bool yieldFaster , long maxStallMS = 20)
-    {
-    #if defined(AURORA_IS_LINUX_DERIVED)
-        int alpha = getpriority(PRIO_PROCESS, 0);
-        int bravo = AuMin(15, AuMax(19, alpha + 5));
-    #else
-        int alpha, bravo = 0;
-    #endif
-        bool spin = false;
-        _FastSnooze<Flags>(count, startTime, maxStallMS, alpha, bravo, spin);
-    }
-
-    template
-    void FastSnooze<0>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
-    template
-    void FastSnooze<kYieldFlagsContextSwitchASAP>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
-    template
-    void FastSnooze<kYieldFlagsContextSwitchForever>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
-
-    template<bool permitMultipleContextSwitches>
-    static inline bool YieldPollTmpl(AuUInt64 &time, AuUInt64 timeoutMs, PollCallback_cb cb)
-    {
-    #if defined(AURORA_IS_LINUX_DERIVED)
-        int alpha = getpriority(PRIO_PROCESS, 0);
-        int bravo = AuMin(15, AuMax(19, alpha + 5));
-    #else
-        int alpha, bravo = 0;
-    #endif
-        bool spin = false;
-
-        long count = 0;
-
-        unsigned long long a = Time::SteadyClockNS();
-        do
-        {
-            if (permitMultipleContextSwitches)
-            {
-                _FastSnooze<kYieldFlagsContextSwitchForever>(count, time, timeoutMs, alpha, bravo, spin);
-            }
-            else
-            {
-                _FastSnooze<0>(count, time, timeoutMs, alpha, bravo, spin);
-            }
-
-            if (cb())
-            {
-                return true;
-            }
-            a = Time::SteadyClockNS();
-
-        } while ((!timeoutMs) || (timeoutMs > a));
-
-        return cb();
-    }
-
-    AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb)
-    {
-        AuUInt64 time = Time::SteadyClockNS();
-
-        if (cb())
-        {
-            return true;
-        }
-
-        if (timeoutNs)
-        {
-            // only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters
-            permitMultipleContextSwitches = false;
-        }
-
-        // do not trust the compiler do branch here with a mere Func<variable>(...)
-        // it's far more likely the branch will be handled in our yield loop
-        if (permitMultipleContextSwitches)
-        {
-            return YieldPollTmpl<true>(time, timeoutNs, cb);
-        }
-        else
-        {
-            return YieldPollTmpl<false>(time, timeoutNs, cb);
-        }
-
-        return false;
-    }
-
-    AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb)
-    {
-        AuUInt64 time = Time::SteadyClockNS();
-        AuUInt64 timeoutNs = timeoutMs ? (time + (timeoutMs * 1000000)) : 0;
-
-        if (cb())
-        {
-            return true;
-        }
-
-        if (timeoutMs)
-        {
-            // only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters
-            permitMultipleContextSwitches = false;
-        }
-
-        // do not trust the compiler do branch here with a mere Func<variable>(...)
-        // it's far more likely the branch will be handled in our yield loop
-        if (permitMultipleContextSwitches)
-        {
-            return YieldPollTmpl<true>(time, timeoutNs, cb);
-        }
-        else
-        {
-            return YieldPollTmpl<false>(time, timeoutNs, cb);
-        }
-
-        return false;
-    }
-
-    static bool WaitLogicHandledByImplementor(bool &status, IWaitable *waitable, AuUInt64 timeout)
-    {
-        if (!waitable->HasLockImplementation())
+            if (Time::SteadyClockNS() >= qwAbsTimeoutNs)
            {
                return false;
            }

-        status = waitable->LockMS(timeout);
-        return true;
+            if (bPermitMultipleContextSwitches)
+            {
+                ContextYield();
            }
-
-    static bool WaitLogicHandledByNTOS(bool &status, IWaitable *waitable, AuUInt64 timeout)
-    {
-    #if defined(AURORA_IS_MODERNNT_DERIVED)
-        AuMach handle = 0;
-        if (!waitable->HasOSHandle(handle))
-        {
-            return false;
-        }
-
-        auto win32 = reinterpret_cast<HANDLE>(handle);
-        auto ret = WaitForSingleObject(win32, timeout ? timeout : INFINITE);
-
-        SysAssert(ret != WAIT_FAILED, "Internal Win32 Error {}", GetLastError());
-
-        if (ret == WAIT_TIMEOUT)
-        {
-            status = false;
-        }
-        else
-        {
-            status = true;
        }

        return true;
-    #else
-        return false;
-    #endif 
    }

-    AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout)
+    AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 qwTimeoutMs, PollCallback_cb cb)
    {
-        bool status;
-
-        if (WaitLogicHandledByNTOS(status, waitable, timeout))
-        {
-            return status;
+        return YieldPollNs(permitMultipleContextSwitches,
+                           qwTimeoutMs ? Time::SteadyClockNS() + AuMSToNS<AuUInt64>(qwTimeoutMs) : 0,
+                           cb);
    }

-        if (WaitLogicHandledByImplementor(status, waitable, timeout))
+    AUKN_SYM bool WaitForAbsNS(IWaitable *pWaitable, AuUInt64 qwAbsTimeout)
    {
-            return status;
+        if (pWaitable->HasLockImplementation())
+        {
+            return pWaitable->LockAbsNS(qwAbsTimeout);
        }

-        return YieldPoll(true, timeout, [=]()
+        return YieldPollNs(true, qwAbsTimeout, [=]()
        {
-            return waitable->TryLock();
+            return pWaitable->TryLock();
        });
    }
   
-    static bool CanWin32HandleAll(const AuList<IWaitable *> &waitables)
+    AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt32 uFlags, AuUInt64 uTimeout)
    {
-    #if defined(AURORA_IS_MODERNNT_DERIVED)
-        for (auto &waitable : waitables)
-        {
-            AuMach handle = 0;
-            if (!waitable->HasOSHandle(handle))
-            {
-                return false;
-            }
-        }
-        return true;
-    #else
-        return false;
-    #endif
-    }
+        AU_DEBUG_MEMCRUNCH;
        
-    static bool Win32HandleMultiple(const AuList<IWaitable *> &waitables, AuUInt64 timeoutMs)
+        AuUInt64 qwTimeoutAbs {};
+        AuList<bool> releasedObjects(waitables.size());
+
+        if (uFlags & kWaitForFlagTimeoutIsNanoseconds)
        {
-    #if defined(AURORA_IS_MODERNNT_DERIVED)
-        AuList<HANDLE> winWaitables;
-
-        winWaitables.resize(waitables.size());
-
-        std::transform(waitables.begin(), waitables.end(), winWaitables.begin(), [](IWaitable *waitable) -> HANDLE
-        {
-            AuMach handle = 0;
-            auto status = waitable->HasOSHandle(handle);
-            SysAssert(status, "OS Handle was NULL");
-            return reinterpret_cast<HANDLE>(handle);
-        });
-
-        auto status = WaitForMultipleObjectsEx(winWaitables.size(), winWaitables.data(), TRUE, timeoutMs ? timeoutMs : INFINITE, true);
-        SysAssert(status != WAIT_FAILED, "Internal Win32 Error {}", GetLastError());
-
-        if (status == WAIT_TIMEOUT)
-        {
-            return false;
+            qwTimeoutAbs = uTimeout;
        }
        else
        {
-            return true;
-        }
-    #else
-        return false;
-    #endif
+            qwTimeoutAbs = AuMSToNS<AuUInt64>(uTimeout);
        }

-    AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 timeout)
+        if (!(uFlags & kWaitForFlagTimeoutIsAbsolute))
        {
-        if (CanWin32HandleAll(waitables))
-        {
-            return Win32HandleMultiple(waitables, timeout);
+            qwTimeoutAbs += AuTime::SteadyClockNS();
        }

-        // im worried about the complexity of using a vector here
-        // we would have to hit o(n) and memcpy in the best case scenario on each object release
-        // unordered maps are glorified hash tables
-        // maps are glorified binary trees
-        // maps should be fast enough
-        AuHashMap<int, bool> releasedObjects;
+        auto bIsAnd = !(uFlags & kWaitForFlagTimeoutIsOr);

-        releasedObjects.reserve(waitables.size());
-
-        // pseudo reserve 
-        for (AuMach i = 0; i < waitables.size(); i++)
+        auto bStatus = YieldPollNs(true, qwTimeoutAbs, [&]()
        {
-            releasedObjects[i] = false;
+            bool bStatus { !waitables.size() };
+
+            for (AU_ITERATE_N(i, waitables.size()))
+            {
+                if (releasedObjects[i])
+                {
+                    continue;
                }

-        // yield for all 
-        auto status = YieldPoll(true, timeout, [&]()
+                bool bLocked {};
+
+                if (bIsAnd)
                {
-            for (AuMach i = 0; i < waitables.size(); i++)
+                    bLocked = WaitForAbsNS(waitables[i], qwTimeoutAbs);
+                }
+                else
                {
-                if (!releasedObjects[i])
-                {
-                    if (waitables[i]->TryLock())
+                    bLocked = waitables[i]->TryLock();
+                }
+
+                if (bLocked)
                {
                    releasedObjects[i] = true;
+                    bStatus = true;
                }
                else
+                {
+                    if (bIsAnd)
                    {
                        return false;
                    }
                }
            }
-            return true;
+
+            return bStatus;
        });

-        // from the perspective of locks, should the be a timeout event, we need to go back and unlock them on timeout 
-        if (!status)
+        if (!bStatus)
        {
-            for (AuMach i = 0; i < waitables.size(); i++)
+            for (AU_ITERATE_N(i, waitables.size()))
            {
                if (releasedObjects[i])
                {
@ -386,11 +162,6 @@ namespace Aurora::Threading
            }
        }

-        return status;
-    }
-
-    AUKN_SYM void ContextYield()
-    {
-        YieldToOtherThread();
+        return bStatus;
    }
 }
--- a/Source/Threading/AuWaitFor.hpp
+++ b/Source/Threading/AuWaitFor.hpp
@ -9,40 +9,9 @@

 namespace Aurora::Threading
 {
-    // the original idea:
-    // It's not insane to expect slow linux kernels to run at 250 jiffies a second, so, 4ms
-    // It's also not insane to expect a complete context swap/rescheduled yield on windows to last 15ms  
-    // -> if sleep time greater than 15ms, yield to nt Sleep 
-    // -> if sleep time greater than 4ms, yield to linux kernel 
-    // -> if sleep time greater than 2ms (?), yield to SwitchToThread 
-    // -> SPIIIIIN
-
-    static const AuUInt64 kPredictedLinuxKernelJiffies = 250; // some kernel builds go up to 1000
-    static const AuUInt64 kPredictedLinuxKernelTimeMilli = (1000 / kPredictedLinuxKernelJiffies);
-    static const AuUInt64 kPredictedLinuxKernelTimeMicro = kPredictedLinuxKernelTimeMilli * 1000;
-    static const AuUInt64 kPredictedLinuxKernelTimeNano = kPredictedLinuxKernelTimeMilli * 1000000;
-    static const AuUInt64 kLinuxYieldTimeNano = 1e+6 / 150; // completely arbitrary
-    static const AuUInt64 kLinuxYieldTimeThresNano = 1e+6 / 25; // completely arbitrary
-    static const AuUInt64 kPredictedLinuxKernelTimeRTNano = (kLinuxYieldTimeNano + kPredictedLinuxKernelTimeNano) * 3;
-
-    //static const AuUInt64 kPredictedNTOSSwitchTimeMS        = 10;
-    //static const AuUInt64 kPredictedNTOSSwitchTimeYDMS      = kPredictedNTOSSwitchTimeMS  / 4;
-    //static const AuUInt64 kPredictedNTOSSwitchTimeRTMS      = kPredictedNTOSSwitchTimeMS + kPredictedNTOSSwitchTimeMS;
-    static const AuUInt64 kPredictedNTOSSwitchTimeRTNS = 1000000;// kPredictedNTOSSwitchTimeRTMS* 1000000;
-    //static const AuUInt64 kPredictedNTOSSwitchTimeNS        = 3* 1000000;
-    static const AuUInt64 kPredictedNTOSSwitchTimeYDNS = 1000000 / 4;// kPredictedNTOSSwitchTimeNS  / 4;
-
-    static const AuMach kYieldFlagsNone = 0;
-    static const AuMach kYieldFlagsRemoved = 1;
-    static const AuMach kYieldFlagsContextSwitchASAP = 2;
-    static const AuMach kYieldFlagsContextSwitchForever = 4;
-    static const AuMach kYieldFlagsRegular = kYieldFlagsContextSwitchASAP | kYieldFlagsContextSwitchForever;
-
-    template<AuMach yieldFaster>
-    void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
-
-
-    bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, Threading::PollCallback_cb cb);
-
-    void YieldToOtherThread();
+    /// @deprecated
+    static void YieldToOtherThread()
+    {
+        Aurora::Threading::ContextYield();
+    }
 }
--- a/Source/Threading/Threads/AuOSThread.cpp
+++ b/Source/Threading/Threads/AuOSThread.cpp
@ -346,7 +346,7 @@ namespace Aurora::Threading::Threads
                TeminateOSContext(true);
                while (true)
                {
-                    YieldToOtherThread();
+                    ContextYield();
                }
            }
        }