Reece
d755a9d651
[*] Refactor ambiguous IWaitable::Lock(timeoutMs) to LockMS to prevent final using collisions
396 lines
12 KiB
C++
396 lines
12 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: AuWaitFor.cpp
|
|
Date: 2021-6-12
|
|
Author: Reece
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "AuWaitFor.hpp"
|
|
|
|
#if defined(AURORA_IS_LINUX_DERIVED)
|
|
#include <sched.h>
|
|
#include <sys/resource.h>
|
|
#include <sys/time.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
#endif
|
|
|
|
// Read the local header file for this file.
|
|
// The original idea was sane.
|
|
// The implemention, not so much...
|
|
|
|
// TODO: REWRITE!
|
|
|
|
namespace Aurora::Threading
|
|
{
|
|
static void YieldToSharedCore(long spin)
|
|
{
|
|
int loops = (1 << spin);
|
|
while (loops > 0)
|
|
{
|
|
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
|
|
_mm_pause();
|
|
#endif
|
|
loops -= 1;
|
|
}
|
|
}
|
|
|
|
void YieldToOtherThread()
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
SwitchToThread();
|
|
#elif defined(AURORA_IS_LINUX_DERIVED)
|
|
sched_yield();
|
|
#else
|
|
YieldToSharedCore(12);
|
|
#endif
|
|
}
|
|
|
|
template<AuMach Flags> // forcefully optiMize by templating a constant argument
|
|
static inline void _FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallNS, int &alpha, int &bravo, bool &forceSpin) //, bool yieldFaster , long maxStallMS = 20)
|
|
{
|
|
// TODO: rewrite me
|
|
AuUInt64 now = Time::SteadyClockNS();
|
|
|
|
// Begin least likely checks, we're getting on now
|
|
// Ironically we need to burn off some CPU cycles
|
|
AuUInt64 deltaNS = now - startTime;
|
|
|
|
|
|
#define SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs)\
|
|
(static_cast<int>(Flags) & kYieldFlagsContextSwitchASAP)
|
|
|
|
// Validate we have at least one whole average of a context switch of overhead remaining
|
|
#define HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs)\
|
|
(maxStallNS >= (roundTripNs + deltaNS))
|
|
|
|
// The point of rewriting kernel-free userland thread components is to delegate everything to userland
|
|
// One key reason is single app performance. We should we not know how long to yield for, giving an undefined
|
|
// ...amount of time to other applications might be a bad thing. fuck. why cant we have rtos functionality :(
|
|
#define HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs)\
|
|
((static_cast<int>(Flags)& kYieldFlagsContextSwitchForever) && (!maxStallNS))
|
|
|
|
// Perform a good faith guess at assuming we have enough overhead for a hard context switch
|
|
#define HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)\
|
|
(HAS_ENOUGH_TIME_FOR_INFINITE_SLEEP(yieldDelayThresholdNs, roundTripNs) || HAS_ENOUGH_TIME_FOR_TIMED_SLEEP(yieldDelayThresholdNs, roundTripNs))
|
|
|
|
// Validate enough time (lets say 1/3rd of the approximated time of a preemptive switch or sleep(0)) has passed
|
|
#define HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs)\
|
|
(deltaNS > yieldDelayThresholdNs)
|
|
|
|
#define SHOULD_CTXSWAP(yieldDelayThresholdNs, roundTripNs)\
|
|
if (SHOULD_SWITCH_ASAP(yieldDelayThresholdNs, roundTripNs) || (HAS_ENOUGH_TIME_PASSED(yieldDelayThresholdNs, roundTripNs) && HAS_ENOUGH_TIME_OVERHEAD(yieldDelayThresholdNs, roundTripNs)))
|
|
|
|
#if defined(AURORA_IS_LINUX_DERIVED)
|
|
SHOULD_CTXSWAP(kLinuxYieldTimeThresNano, kPredictedLinuxKernelTimeRTNano)
|
|
{
|
|
// we are not very nice :D
|
|
setpriority(PRIO_PROCESS, 0, bravo);
|
|
static timespec fuck = { 0, kLinuxYieldTimeNano };
|
|
nanosleep(&fuck, &fuck);
|
|
setpriority(PRIO_PROCESS, 0, alpha);
|
|
forceSpin = true;
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if defined(AURORA_PLATFORM_WIN32)
|
|
SHOULD_CTXSWAP(kPredictedNTOSSwitchTimeYDNS, kPredictedNTOSSwitchTimeRTNS)
|
|
{
|
|
// TODO:
|
|
::Sleep(1);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
|
|
// Always at least try to burn some cycles off in a spinlock-esc time waster
|
|
YieldToOtherThread();
|
|
}
|
|
|
|
template<AuMach Flags> // forcefully optiMize by templating a constant argument
|
|
static void FastSnooze(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS) //, bool yieldFaster , long maxStallMS = 20)
|
|
{
|
|
#if defined(AURORA_IS_LINUX_DERIVED)
|
|
int alpha = getpriority(PRIO_PROCESS, 0);
|
|
int bravo = AuMin(15, AuMax(19, alpha + 5));
|
|
#else
|
|
int alpha, bravo = 0;
|
|
#endif
|
|
bool spin = false;
|
|
_FastSnooze<Flags>(count, startTime, maxStallMS, alpha, bravo, spin);
|
|
}
|
|
|
|
template
|
|
void FastSnooze<0>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
|
|
template
|
|
void FastSnooze<kYieldFlagsContextSwitchASAP>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
|
|
template
|
|
void FastSnooze<kYieldFlagsContextSwitchForever>(long &count, AuUInt64 &startTime, AuUInt64 maxStallMS);
|
|
|
|
template<bool permitMultipleContextSwitches>
|
|
static inline bool YieldPollTmpl(AuUInt64 &time, AuUInt64 timeoutMs, PollCallback_cb cb)
|
|
{
|
|
#if defined(AURORA_IS_LINUX_DERIVED)
|
|
int alpha = getpriority(PRIO_PROCESS, 0);
|
|
int bravo = AuMin(15, AuMax(19, alpha + 5));
|
|
#else
|
|
int alpha, bravo = 0;
|
|
#endif
|
|
bool spin = false;
|
|
|
|
long count = 0;
|
|
|
|
unsigned long long a = Time::SteadyClockNS();
|
|
do
|
|
{
|
|
if (permitMultipleContextSwitches)
|
|
{
|
|
_FastSnooze<kYieldFlagsContextSwitchForever>(count, time, timeoutMs, alpha, bravo, spin);
|
|
}
|
|
else
|
|
{
|
|
_FastSnooze<0>(count, time, timeoutMs, alpha, bravo, spin);
|
|
}
|
|
|
|
if (cb())
|
|
{
|
|
return true;
|
|
}
|
|
a = Time::SteadyClockNS();
|
|
|
|
} while ((!timeoutMs) || (timeoutMs > a));
|
|
|
|
return cb();
|
|
}
|
|
|
|
AUKN_SYM bool YieldPollNs(bool permitMultipleContextSwitches, AuUInt64 timeoutNs, PollCallback_cb cb)
|
|
{
|
|
AuUInt64 time = Time::SteadyClockNS();
|
|
|
|
if (cb())
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (timeoutNs)
|
|
{
|
|
// only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters
|
|
permitMultipleContextSwitches = false;
|
|
}
|
|
|
|
// do not trust the compiler do branch here with a mere Func<variable>(...)
|
|
// it's far more likely the branch will be handled in our yield loop
|
|
if (permitMultipleContextSwitches)
|
|
{
|
|
return YieldPollTmpl<true>(time, timeoutNs, cb);
|
|
}
|
|
else
|
|
{
|
|
return YieldPollTmpl<false>(time, timeoutNs, cb);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
AUKN_SYM bool YieldPoll(bool permitMultipleContextSwitches, AuUInt64 timeoutMs, PollCallback_cb cb)
|
|
{
|
|
AuUInt64 time = Time::SteadyClockNS();
|
|
AuUInt64 timeoutNs = timeoutMs ? (time + (timeoutMs * 1000000)) : 0;
|
|
|
|
if (cb())
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (timeoutMs)
|
|
{
|
|
// only relevant when there's no timeout, fastsnooze will do its own magic given the templates parameters
|
|
permitMultipleContextSwitches = false;
|
|
}
|
|
|
|
// do not trust the compiler do branch here with a mere Func<variable>(...)
|
|
// it's far more likely the branch will be handled in our yield loop
|
|
if (permitMultipleContextSwitches)
|
|
{
|
|
return YieldPollTmpl<true>(time, timeoutNs, cb);
|
|
}
|
|
else
|
|
{
|
|
return YieldPollTmpl<false>(time, timeoutNs, cb);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool WaitLogicHandledByImplementor(bool &status, IWaitable *waitable, AuUInt64 timeout)
|
|
{
|
|
if (!waitable->HasLockImplementation())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
status = waitable->LockMS(timeout);
|
|
return true;
|
|
}
|
|
|
|
static bool WaitLogicHandledByNTOS(bool &status, IWaitable *waitable, AuUInt64 timeout)
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
AuMach handle = 0;
|
|
if (!waitable->HasOSHandle(handle))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
auto win32 = reinterpret_cast<HANDLE>(handle);
|
|
auto ret = WaitForSingleObject(win32, timeout ? timeout : INFINITE);
|
|
|
|
SysAssert(ret != WAIT_FAILED, "Internal Win32 Error {}", GetLastError());
|
|
|
|
if (ret == WAIT_TIMEOUT)
|
|
{
|
|
status = false;
|
|
}
|
|
else
|
|
{
|
|
status = true;
|
|
}
|
|
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
AUKN_SYM bool WaitFor(IWaitable *waitable, AuUInt64 timeout)
|
|
{
|
|
bool status;
|
|
|
|
if (WaitLogicHandledByNTOS(status, waitable, timeout))
|
|
{
|
|
return status;
|
|
}
|
|
|
|
if (WaitLogicHandledByImplementor(status, waitable, timeout))
|
|
{
|
|
return status;
|
|
}
|
|
|
|
return YieldPoll(true, timeout, [=]()
|
|
{
|
|
return waitable->TryLock();
|
|
});
|
|
}
|
|
|
|
static bool CanWin32HandleAll(const AuList<IWaitable *> &waitables)
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
for (auto &waitable : waitables)
|
|
{
|
|
AuMach handle = 0;
|
|
if (!waitable->HasOSHandle(handle))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
static bool Win32HandleMultiple(const AuList<IWaitable *> &waitables, AuUInt64 timeoutMs)
|
|
{
|
|
#if defined(AURORA_IS_MODERNNT_DERIVED)
|
|
AuList<HANDLE> winWaitables;
|
|
|
|
winWaitables.resize(waitables.size());
|
|
|
|
std::transform(waitables.begin(), waitables.end(), winWaitables.begin(), [](IWaitable *waitable) -> HANDLE
|
|
{
|
|
AuMach handle = 0;
|
|
auto status = waitable->HasOSHandle(handle);
|
|
SysAssert(status, "OS Handle was NULL");
|
|
return reinterpret_cast<HANDLE>(handle);
|
|
});
|
|
|
|
auto status = WaitForMultipleObjectsEx(winWaitables.size(), winWaitables.data(), TRUE, timeoutMs ? timeoutMs : INFINITE, true);
|
|
SysAssert(status != WAIT_FAILED, "Internal Win32 Error {}", GetLastError());
|
|
|
|
if (status == WAIT_TIMEOUT)
|
|
{
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
return true;
|
|
}
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
AUKN_SYM bool WaitFor(const AuList<IWaitable *> &waitables, AuUInt64 timeout)
|
|
{
|
|
if (CanWin32HandleAll(waitables))
|
|
{
|
|
return Win32HandleMultiple(waitables, timeout);
|
|
}
|
|
|
|
// im worried about the complexity of using a vector here
|
|
// we would have to hit o(n) and memcpy in the best case scenario on each object release
|
|
// unordered maps are glorified hash tables
|
|
// maps are glorified binary trees
|
|
// maps should be fast enough
|
|
AuHashMap<int, bool> releasedObjects;
|
|
|
|
releasedObjects.reserve(waitables.size());
|
|
|
|
// pseudo reserve
|
|
for (AuMach i = 0; i < waitables.size(); i++)
|
|
{
|
|
releasedObjects[i] = false;
|
|
}
|
|
|
|
// yield for all
|
|
auto status = YieldPoll(true, timeout, [&]()
|
|
{
|
|
for (AuMach i = 0; i < waitables.size(); i++)
|
|
{
|
|
if (!releasedObjects[i])
|
|
{
|
|
if (waitables[i]->TryLock())
|
|
{
|
|
releasedObjects[i] = true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
});
|
|
|
|
// from the perspective of locks, should the be a timeout event, we need to go back and unlock them on timeout
|
|
if (!status)
|
|
{
|
|
for (AuMach i = 0; i < waitables.size(); i++)
|
|
{
|
|
if (releasedObjects[i])
|
|
{
|
|
waitables[i]->Unlock();
|
|
}
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
AUKN_SYM void ContextYield()
|
|
{
|
|
YieldToOtherThread();
|
|
}
|
|
} |