[*] Massive perf boost by removing atomic and

[*] Refactor ambiguous IWaitable::Lock(timeoutMs) to LockMS to prevent final using collisions
This commit is contained in:
Reece Wilson 2023-04-03 08:21:44 +01:00
parent 39072499dd
commit d755a9d651
30 changed files with 125 additions and 67 deletions

View File

@ -21,7 +21,7 @@ namespace Aurora::Threading
virtual bool HasOSHandle(AuMach &mach) = 0;
virtual bool HasLockImplementation() = 0;
virtual void Lock() = 0;
virtual bool Lock(AuUInt64 timeout /*=0*/) = 0;
virtual bool LockMS(AuUInt64 timeout /*=0*/) = 0;
virtual bool LockNS(AuUInt64 timeout /*=0*/) = 0;
virtual bool TryLock() = 0;
virtual void Unlock() = 0;

View File

@ -24,7 +24,7 @@ namespace Aurora::Threading::Primitives
inline void Wait(AuUInt32 ms)
{
Lock(ms);
LockMS(ms);
}
virtual void Reset() = 0;

View File

@ -9,5 +9,5 @@
namespace Aurora::Threading::Primitives
{
AUKN_SHARED_SOO(Mutex, IWaitable, kPrimitiveSizeMutex);
AUKN_SHARED_SOO(Mutex, IHyperWaitable, kPrimitiveSizeMutex);
}

View File

@ -7,6 +7,11 @@
***/
#pragma once
#if defined(AURORA_COMPILER_MSVC)
#pragma warning(push)
#pragma warning(disable: 4141)
#endif
namespace Aurora::Threading::Primitives
{
static const auto kDefaultPrimitiveSize = 128;
@ -29,6 +34,9 @@ namespace Aurora::Threading::Primitives
static const auto kPrimitiveSizeCond = kPrimitiveSizeNTCond;
static const auto kPrimitiveSizeCondMutex = kPrimitiveSizeNTCondMutex;
// fuck you, its time to overtake the STL in even Windows 11 micro-benchmarks
#define AURT_ENABLE_HYPER_MUTEX
//#elif defined(AURORA_IS_LINUX_DERIVED)
//
@ -43,4 +51,32 @@ namespace Aurora::Threading::Primitives
static const auto kPrimitiveSizeCondMutex = kDefaultPrimitiveSize;
#endif
struct AUKN_SYM HyperWaitable : IWaitable
{
auline inline void Lock() final override
{
if (AuAtomicTestAndSet(&this->state_, 0u) == 0)
{
return;
}
SlowLock();
}
virtual void SlowLock() = 0;
protected:
volatile AuUInt32 state_ {};
};
#if defined(AURT_ENABLE_HYPER_MUTEX)
using IHyperWaitable = HyperWaitable;
#else
using IHyperWaitable = IWaitable;
#endif
}
#if defined(AURORA_COMPILER_MSVC)
#pragma warning(pop)
#endif

View File

@ -7,27 +7,36 @@
***/
#pragma once
#if defined(AURORA_COMPILER_MSVC)
#pragma warning(push)
#pragma warning(disable: 4275)
#endif
namespace Aurora::Threading::Primitives
{
struct AUKN_SYM SpinLock : IWaitable
struct AUKN_SYM SpinLock : HyperWaitable
{
using HyperWaitable::Lock;
SpinLock();
bool HasOSHandle(AuMach &mach) override;
bool HasLockImplementation() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
bool TryLock() override;
void Lock() override;
void SlowLock() override;
void Unlock() override;
private:
AuAtomicInt value_;
};
AUKN_SHARED_API(SpinLock, SpinLock);
using SpinLockSOO = SpinLock;
using SpinLockSOO_t = SpinLock;
}
#if defined(AURORA_COMPILER_MSVC)
#pragma warning(pop)
#endif

View File

@ -41,7 +41,7 @@ namespace Aurora::Threading::Waitables
SysPanic("BooleanWaitable is not lockable");
}
bool Lock(AuUInt64 timeout) override
bool LockMS(AuUInt64 timeout) override
{
SysPanic("BooleanWaitable is not lockable");
return false;

View File

@ -41,7 +41,7 @@ namespace Aurora::Threading::Waitables
SysPanic("CBWaitable is not lockable");
}
bool Lock(AuUInt64 timeout) override
bool LockMS(AuUInt64 timeout) override
{
SysPanic("CBWaitable is not lockable");
return false;

View File

@ -85,7 +85,7 @@ namespace Aurora::Grug
if (gArrows)
{
// grug sleep for 100ms or until poked
if (gArrows->Lock(kGrugSleepMs))
if (gArrows->LockMS(kGrugSleepMs))
{
DequeueOneArrow();
}

View File

@ -15,8 +15,8 @@ namespace Aurora::Logging
using namespace Console;
static AuList<AuTuple<Logger *, AuUInt8, ConsoleMessage>> gLogTasks;
static AuThreadPrimitives::SpinLock gGlobalSpin;
static AuThreadPrimitives::SpinLock gTaskSpin;
static AuThreadPrimitives::MutexSOO gGlobalSpin;
static AuThreadPrimitives::MutexSOO gTaskSpin;
static AuList<Logger *> gFlushableLoggers;
Logger::Logger(const AuList<AuSPtr<IBasicSink>> &sinks) : sinks(sinks)

View File

@ -231,7 +231,7 @@ namespace Aurora::Threading
return false;
}
status = waitable->Lock(timeout);
status = waitable->LockMS(timeout);
return true;
}

View File

@ -76,7 +76,13 @@ namespace Aurora::Threading::Primitives
#else
auto &uValueRef = this->lock_.uWaitCount;
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A
// Section: 9.1.1
*(AuUInt8 *)&uValueRef = 0;
#else
AuAtomicAnd(&uValueRef, ~0xFFu);
#endif
while (true)
{

View File

@ -49,11 +49,11 @@ namespace Aurora::Threading::Primitives
void CriticalSection::Lock()
{
auto status = Lock(0);
auto status = LockMS(0);
SysAssert(status, "Spurious critical section wakeup");
}
bool CriticalSection::Lock(AuUInt64 timeout)
bool CriticalSection::LockMS(AuUInt64 timeout)
{
auto cur = GetThreadCookie();
@ -63,7 +63,7 @@ namespace Aurora::Threading::Primitives
return true;
}
if (!this->mutex_.Lock(timeout))
if (!this->mutex_.LockMS(timeout))
{
return false;
}

View File

@ -21,13 +21,13 @@ namespace Aurora::Threading::Primitives
bool HasLockImplementation() override;
bool TryLock() override;
void Lock() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Unlock() override;
private:
Mutex mutex_;
ThreadCookie_t owner_;
std::atomic<int> count_;
AuUInt32 count_;
};
}

View File

@ -25,7 +25,7 @@ namespace Aurora::Threading::Primitives
return true;
}
bool EventImpl::Lock(AuUInt64 uTimeout /*=0*/)
bool EventImpl::LockMS(AuUInt64 uTimeout /*=0*/)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -116,7 +116,7 @@ namespace Aurora::Threading::Primitives
void EventImpl::Lock()
{
auto ok = Lock(0);
auto ok = LockNS(0);
SysAssert(ok);
}

View File

@ -18,7 +18,7 @@ namespace Aurora::Threading::Primitives
~EventImpl();
bool Init();
bool Lock(AuUInt64 timeout /*=0*/) override;
bool LockMS(AuUInt64 timeout /*=0*/) override;
bool LockNS(AuUInt64 timeout /*=0*/) override;
bool TryLock() override;
void Reset() override;

View File

@ -75,7 +75,7 @@ namespace Aurora::Threading::Primitives
});
}
bool Mutex::Lock(AuUInt64 uTimeout)
bool Mutex::LockMS(AuUInt64 uTimeout)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -147,7 +147,7 @@ namespace Aurora::Threading::Primitives
void Mutex::Lock()
{
auto status = Lock(0);
auto status = LockMS(0);
SysAssert(status, "Couldn't lock mutex");
}

View File

@ -18,7 +18,7 @@ namespace Aurora::Threading::Primitives
bool TryLock() override;
bool HasLockImplementation() override;
void Lock() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Unlock() override;

View File

@ -55,14 +55,19 @@ namespace Aurora::Threading::Primitives
return true;
}
void Mutex::Lock()
void Mutex::SlowLock()
{
auto status = Lock(0);
auto status = LockNS(0);
SysAssert(status, "Couldn't lock Mutex object");
}
bool Mutex::Lock(AuUInt64 uTimeout)
bool Mutex::LockMS(AuUInt64 uTimeout)
{
if (AuAtomicTestAndSet(&this->state_, 0) == 0)
{
return true;
}
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -239,8 +244,13 @@ namespace Aurora::Threading::Primitives
auto &uValueRef = this->state_;
// clear lock and wake-pending bit, leaving waking up bit alone
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A
// Section: 9.1.1
*(AuUInt8 *)&uValueRef = 0;
#else
AuAtomicAnd(&uValueRef, ~0xFFu);
#endif
while (true)
{
@ -279,12 +289,12 @@ namespace Aurora::Threading::Primitives
}
}
AUKN_SYM IWaitable *MutexNew()
AUKN_SYM IHyperWaitable *MutexNew()
{
return _new Mutex();
}
AUKN_SYM void MutexRelease(IWaitable *pMutex)
AUKN_SYM void MutexRelease(IHyperWaitable *pMutex)
{
AuSafeDelete<Mutex *>(pMutex);
}

View File

@ -9,7 +9,7 @@
namespace Aurora::Threading::Primitives
{
struct Mutex : IWaitable
struct Mutex : IHyperWaitable
{
Mutex();
~Mutex();
@ -17,8 +17,8 @@ namespace Aurora::Threading::Primitives
bool HasOSHandle(AuMach &mach) override;
bool TryLock() override;
bool HasLockImplementation() override;
void Lock() override;
bool Lock(AuUInt64 timeout) override;
void SlowLock() override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Unlock() override;
@ -27,6 +27,5 @@ namespace Aurora::Threading::Primitives
SRWLOCK atomicHolder_;
CONDITION_VARIABLE wakeup_;
#endif
volatile AuUInt32 state_{};
};
}

View File

@ -46,7 +46,7 @@ namespace Aurora::Threading::Primitives
return (old == 0 && AuAtomicCompareExchange(&this->value_, 1, old) == old);
}
bool Mutex::Lock(AuUInt64 uTimeout)
bool Mutex::LockMS(AuUInt64 uTimeout)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -114,16 +114,14 @@ namespace Aurora::Threading::Primitives
void Mutex::Lock()
{
auto status = Lock(0);
auto status = LockNS(0);
SysAssert(status, "Couldn't lock mutex");
}
void Mutex::Unlock()
{
{
AU_LOCK_GUARD(this->mutex_);
this->value_ = 0;
}
auto ret = ::pthread_cond_signal(&this->pthreadCv_);
SysAssert(ret == 0, "Couldn't wake any mutex waiter");
}

View File

@ -20,7 +20,7 @@ namespace Aurora::Threading::Primitives
bool TryLock() override;
bool HasLockImplementation() override;
void Lock() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Unlock() override;

View File

@ -24,7 +24,7 @@ namespace Aurora::Threading::Primitives
}
template<bool bIsReadView, typename T>
bool RWLockAccessView<bIsReadView, T>::Lock(AuUInt64 timeout)
bool RWLockAccessView<bIsReadView, T>::LockMS(AuUInt64 timeout)
{
if constexpr (bIsReadView)
{

View File

@ -24,7 +24,7 @@ namespace Aurora::Threading::Primitives
}
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
bool TryLock() override;
@ -41,7 +41,7 @@ namespace Aurora::Threading::Primitives
void Lock() override
{
SysAssert(Lock(0));
SysAssert(LockNS(0));
}
void Unlock() override;

View File

@ -74,7 +74,7 @@ namespace Aurora::Threading::Primitives
});
}
bool Semaphore::Lock(AuUInt64 uTimeout)
bool Semaphore::LockMS(AuUInt64 uTimeout)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -150,7 +150,7 @@ namespace Aurora::Threading::Primitives
void Semaphore::Lock()
{
auto status = Lock(0);
auto status = LockNS(0);
SysAssert(status, "Couldn't lock semaphore");
}

View File

@ -17,7 +17,7 @@ namespace Aurora::Threading::Primitives
bool HasOSHandle(AuMach &mach) override;
bool HasLockImplementation() override;
bool TryLock() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Lock() override;
void Unlock(long count) override;

View File

@ -43,7 +43,7 @@ namespace Aurora::Threading::Primitives
});
}
bool Semaphore::Lock(AuUInt64 uTimeout)
bool Semaphore::LockMS(AuUInt64 uTimeout)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -130,7 +130,7 @@ namespace Aurora::Threading::Primitives
void Semaphore::Lock()
{
auto status = Lock(0);
auto status = LockNS(0);
SysAssert(status, "Couldn't lock semaphore");
}

View File

@ -20,7 +20,7 @@ namespace Aurora::Threading::Primitives
bool HasOSHandle(AuMach &mach) override;
bool HasLockImplementation() override;
bool TryLock() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Lock() override;
void Unlock(long count) override;

View File

@ -46,7 +46,7 @@ namespace Aurora::Threading::Primitives
return (old != 0 && AuAtomicCompareExchange(&this->value_, old - 1, old) == old);
}
bool Semaphore::Lock(AuUInt64 uTimeout)
bool Semaphore::LockMS(AuUInt64 uTimeout)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
@ -117,7 +117,7 @@ namespace Aurora::Threading::Primitives
void Semaphore::Lock()
{
auto status = Lock(0);
auto status = LockNS(0);
SysAssert(status, "Couldn't lock semaphore");
}

View File

@ -19,7 +19,7 @@ namespace Aurora::Threading::Primitives
bool HasOSHandle(AuMach &mach) override;
bool HasLockImplementation() override;
bool TryLock() override;
bool Lock(AuUInt64 timeout) override;
bool LockMS(AuUInt64 timeout) override;
bool LockNS(AuUInt64 timeout) override;
void Lock() override;
void Unlock(long count) override;

View File

@ -25,7 +25,7 @@ namespace Aurora::Threading::Primitives
SpinLock::SpinLock()
{
value_ = 0;
state_ = 0;
}
bool SpinLock::HasOSHandle(AuMach &mach)
@ -35,7 +35,7 @@ namespace Aurora::Threading::Primitives
bool SpinLock::TryLock()
{
return AuAtomicTestAndSet(&this->value_, 0) == 0;
return AuAtomicTestAndSet(&this->state_, 0) == 0;
}
bool SpinLock::HasLockImplementation()
@ -43,9 +43,9 @@ namespace Aurora::Threading::Primitives
return true;
}
void SpinLock::Lock()
void SpinLock::SlowLock()
{
auto status = Lock(0);
auto status = LockNS(0);
SysAssert(status, "Couldn't lock Mutex object");
}
@ -53,10 +53,10 @@ namespace Aurora::Threading::Primitives
{
if (timeout == 0)
{
while (AuAtomicTestAndSet(&this->value_, 0))
while (AuAtomicTestAndSet(&this->state_, 0))
{
long count = 0;
while (this->value_)
while (this->state_)
{
YieldCpu(count);
}
@ -67,10 +67,10 @@ namespace Aurora::Threading::Primitives
AuUInt64 startTime = AuTime::HighResClockNS();
AuUInt64 endTime = startTime + timeout;
while (AuAtomicTestAndSet(&this->value_, 0))
while (AuAtomicTestAndSet(&this->state_, 0))
{
long count = 0;
while (value_)
while (this->state_)
{
if (endTime <= AuTime::HighResClockNS())
{
@ -84,13 +84,13 @@ namespace Aurora::Threading::Primitives
return true;
}
bool SpinLock::Lock(AuUInt64 timeout)
bool SpinLock::LockMS(AuUInt64 timeout)
{
return LockNS(AuMSToNS<AuUInt64>(timeout));
}
void SpinLock::Unlock()
{
this->value_ = 0;
this->state_ = 0;
}
}