/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: AuSemaphore.NT.cpp Date: 2021-6-12 Author: Reece ***/ #include #include "AuSemaphore.Generic.hpp" #include "AuSemaphore.NT.hpp" #include "SMTYield.hpp" #include "../AuWakeInternal.hpp" #if !defined(_AURUNTIME_GENERIC_SEMAPHORE) namespace Aurora::Threading::Primitives { SemaphoreImpl::SemaphoreImpl(AuUInt16 uIntialValue) { this->dwState_ = uIntialValue; } SemaphoreImpl::~SemaphoreImpl() { } bool SemaphoreImpl::HasOSHandle(AuMach &mach) { return false; } bool SemaphoreImpl::HasLockImplementation() { return true; } bool SemaphoreImpl::TryLockNoSpin() { AuUInt32 uOld {}; while ((uOld = this->dwState_)) { if (AuAtomicCompareExchange(&this->dwState_, uOld - 1, uOld) == uOld) { return true; } } return false; } bool SemaphoreImpl::TryLockHeavy() { return DoTryIf([=]() { return this->TryLockNoSpin(); }); } bool SemaphoreImpl::TryLock() { if (ThrdCfg::gPreferNtSemaphoreSpinTryLock) { return this->TryLockHeavy(); } else { return this->TryLockNoSpin(); } } AuUInt32 *SemaphoreImpl::GetSleepCounter() { return (AuUInt32 *)&this->var; } bool SemaphoreImpl::LockMS(AuUInt64 uTimeout) { return this->LockNS(AuMSToNS(uTimeout)); } bool SemaphoreImpl::LockNS(AuUInt64 uTimeout) { AuUInt64 uStart {}; AuUInt64 uEnd {}; if (this->TryLockNoSpin()) { return true; } if (uTimeout) { uStart = AuTime::SteadyClockNS(); uEnd = uStart + uTimeout; } if (this->TryLockHeavy()) { return true; } if (gUseNativeWaitSemapahore) { auto pCounter = this->GetSleepCounter(); while (!this->TryLockNoSpin()) { static const AuUInt32 kExpect { 0 }; AuAtomicAdd(pCounter, 1u); bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), uEnd); AuAtomicSub(pCounter, 1u); if (!bStatus) { return false; } } return true; } else { this->mutex.Lock(); while (!this->TryLockNoSpin()) { if (uTimeout != 0) { uStart = Time::SteadyClockNS(); if (uStart >= uEnd) { this->mutex.Unlock(); return false; } var.WaitForSignalNsEx(&this->mutex, uEnd - uStart); } else { var.WaitForSignalNsEx(&this->mutex, 0); } } this->mutex.Unlock(); return true; } } bool SemaphoreImpl::LockAbsNS(AuUInt64 qwTimeoutAbs) { if (this->TryLockHeavy()) { return true; } if (gUseNativeWaitSemapahore) { AuUInt32 uYieldCounter {}; auto pCounter = this->GetSleepCounter(); while (!this->TryLockNoSpin()) { static const AuUInt32 kExpect { 0 }; AuAtomicAdd(pCounter, 1u); bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), qwTimeoutAbs); AuAtomicSub(pCounter, 1u); if (!bStatus) { return false; } } return true; } else { this->mutex.Lock(); while (!this->TryLockNoSpin()) { if (qwTimeoutAbs != 0) { auto uStart = Time::SteadyClockNS(); if (uStart >= qwTimeoutAbs) { this->mutex.Unlock(); return false; } var.WaitForSignalNsEx(&this->mutex, qwTimeoutAbs - uStart); } else { var.WaitForSignalNsEx(&this->mutex, 0); } } this->mutex.Unlock(); return true; } } void SemaphoreImpl::Lock() { auto status = LockNS(0); SysAssert(status, "Couldn't lock semaphore"); } void SemaphoreImpl::Unlock(AuUInt16 uCount) { if (gUseNativeWaitSemapahore) { AuAtomicAdd(&this->dwState_, uCount); if (auto dwSleeping = AuAtomicLoad(this->GetSleepCounter())) { if (uCount == 1) { InternalLTSWakeOne((void *)&this->dwState_); } else { InternalLTSWakeCount((void *)&this->dwState_, AuMin(uCount, dwSleeping)); } } } else { // realistically, we cant use the sleep counter optimization trick under windows 7 // we would have to expand our already oversized by 8, 24-byte x86_64 semaphore for a trivial perf boost // we cant efficiently access the conditions state or atomic guarantees... this->mutex.Lock(); // do not [re]move this lock fence AuAtomicAdd(&this->dwState_, uCount); // this could be moved anywhere above the unlock, including above the lock. this->mutex.Unlock(); if (uCount == 1) { this->var.Signal(); } else { if (uCount >= 3) // ...this is the only optimization we can hope to achieve { // we can always save a few cycles by doing an atomic broadcast on a contended semaphore // waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms this->var.Broadcast(); } else // ...otherwise, do the handshake just a few times { // doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off for (AU_ITERATE_N(i, uCount)) { (void)i; this->var.Signal(); } } } } } void SemaphoreImpl::Unlock() { return Unlock(1); } AUKN_SYM ISemaphore *SemaphoreNew(AuUInt16 uIntialValue) { return _new SemaphoreImpl(uIntialValue); } AUKN_SYM void SemaphoreRelease(ISemaphore *pSemaphore) { AuSafeDelete(pSemaphore); } AUROXTL_INTERFACE_SOO_SRC_EX(AURORA_SYMBOL_EXPORT, Semaphore, SemaphoreImpl) } #endif