AuroraRuntime/Source/Threading/Primitives/AuSemaphore.NT.cpp

277 lines
7.3 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuSemaphore.NT.cpp
Date: 2021-6-12
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "AuSemaphore.Generic.hpp"
#include "AuSemaphore.NT.hpp"
#include "SMTYield.hpp"
#include "../AuWakeInternal.hpp"
#if !defined(_AURUNTIME_GENERIC_SEMAPHORE)
namespace Aurora::Threading::Primitives
{
SemaphoreImpl::SemaphoreImpl(AuUInt16 uIntialValue)
{
this->dwState_ = uIntialValue;
}
SemaphoreImpl::~SemaphoreImpl()
{
}
bool SemaphoreImpl::HasOSHandle(AuMach &mach)
{
return false;
}
bool SemaphoreImpl::HasLockImplementation()
{
return true;
}
bool SemaphoreImpl::TryLockNoSpin()
{
auto old = this->dwState_;
return (old != 0 && AuAtomicCompareExchange(&this->dwState_, old - 1, old) == old);
}
bool SemaphoreImpl::TryLockHeavy()
{
return DoTryIf([=]()
{
return TryLockNoSpin();
});
}
bool SemaphoreImpl::TryLock()
{
if (gRuntimeConfig.threadingConfig.bPreferNtSemaphoreSpinTryLock)
{
return TryLockHeavy();
}
else
{
return TryLockNoSpin();
}
}
AuUInt32 *SemaphoreImpl::GetSleepCounter()
{
return (AuUInt32 *)&this->var;
}
bool SemaphoreImpl::LockMS(AuUInt64 uTimeout)
{
return LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
bool SemaphoreImpl::LockNS(AuUInt64 uTimeout)
{
if (this->TryLockNoSpin())
{
return true;
}
AuUInt64 uStart = AuTime::SteadyClockNS();
AuUInt64 uEnd = uTimeout ? uStart + uTimeout : 0;
if (this->TryLockHeavy())
{
return true;
}
if (gUseNativeWaitSemapahore)
{
AuUInt32 uYieldCounter {};
auto old = this->dwState_;
while (!((old != 0) &&
(AuAtomicCompareExchange(&this->dwState_, old - 1, old) == old)))
{
static const AuUInt32 kExpect { 0 };
auto pCounter = GetSleepCounter();
AuAtomicAdd(pCounter, 1u);
bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, (void *)&kExpect, sizeof(kExpect), uEnd);
AuAtomicSub(pCounter, 1u);
if (!bStatus)
{
return false;
}
old = this->dwState_;
}
return true;
}
else
{
this->mutex.Lock();
while (!TryLockNoSpin())
{
if (uTimeout != 0)
{
uStart = Time::SteadyClockNS();
if (uStart >= uEnd)
{
this->mutex.Unlock();
return false;
}
var.WaitForSignalNsEx(&this->mutex, uEnd - uStart);
}
else
{
var.WaitForSignalNsEx(&this->mutex, 0);
}
}
this->mutex.Unlock();
return true;
}
}
bool SemaphoreImpl::LockAbsNS(AuUInt64 qwTimeoutAbs)
{
if (this->TryLockHeavy())
{
return true;
}
if (gUseNativeWaitSemapahore)
{
AuUInt32 uYieldCounter {};
auto old = this->dwState_;
while (!((old != 0) &&
(AuAtomicCompareExchange(&this->dwState_, old - 1, old) == old)))
{
static const AuUInt32 kExpect { 0 };
auto pCounter = GetSleepCounter();
AuAtomicAdd(pCounter, 1u);
bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, (void *)&kExpect, sizeof(kExpect), qwTimeoutAbs);
AuAtomicSub(pCounter, 1u);
if (!bStatus)
{
return false;
}
old = this->dwState_;
}
return true;
}
else
{
this->mutex.Lock();
while (!TryLockNoSpin())
{
if (qwTimeoutAbs != 0)
{
auto uStart = Time::SteadyClockNS();
if (uStart >= qwTimeoutAbs)
{
this->mutex.Unlock();
return false;
}
var.WaitForSignalNsEx(&this->mutex, qwTimeoutAbs - uStart);
}
else
{
var.WaitForSignalNsEx(&this->mutex, 0);
}
}
this->mutex.Unlock();
return true;
}
}
void SemaphoreImpl::Lock()
{
auto status = LockNS(0);
SysAssert(status, "Couldn't lock semaphore");
}
void SemaphoreImpl::Unlock(AuUInt16 count)
{
if (gUseNativeWaitSemapahore)
{
AuAtomicAdd<AuUInt32>(&this->dwState_, count);
if (AuAtomicLoad(GetSleepCounter()))
{
if (count == 1)
{
pWakeByAddressSingle(&this->dwState_);
}
else
{
pWakeByAddressAll(&this->dwState_);
}
}
}
else
{
// realistically, we cant use the sleep counter optimization trick under windows 7
// we would have to expand our already oversized by 8, 24-byte x86_64 semaphore for a trivial perf boost
// we cant efficiently access the conditions state or atomic guarantees...
this->mutex.Lock(); // do not [re]move this lock fence
AuAtomicAdd<AuUInt32>(&this->dwState_, count); // this could be moved anywhere above the unlock, including above the lock.
this->mutex.Unlock();
if (count == 1)
{
this->var.Signal();
}
else
{
if (count >= 3) // ...this is the only optimization we can hope to achieve
{
// we can always save a few cycles by doing an atomic broadcast on a contended semaphore
// waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms
this->var.Broadcast();
}
else // ...otherwise, do the handshake just a few times
{
// doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off
for (AU_ITERATE_N(i, count))
{
(void)i;
this->var.Signal();
}
}
}
}
}
void SemaphoreImpl::Unlock()
{
return Unlock(1);
}
AUKN_SYM ISemaphore *SemaphoreNew(AuUInt16 uIntialValue)
{
return _new SemaphoreImpl(uIntialValue);
}
AUKN_SYM void SemaphoreRelease(ISemaphore *pSemaphore)
{
AuSafeDelete<SemaphoreImpl *>(pSemaphore);
}
AUROXTL_INTERFACE_SOO_SRC_EX(AURORA_SYMBOL_EXPORT, Semaphore, SemaphoreImpl)
}
#endif