AuroraRuntime/Source/Threading/Primitives/AuSemaphore.NT.cpp

285 lines
7.3 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuSemaphore.NT.cpp
Date: 2021-6-12
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "AuSemaphore.Generic.hpp"
#include "AuSemaphore.NT.hpp"
#include "SMTYield.hpp"
#include "../AuWakeInternal.hpp"
#if !defined(_AURUNTIME_GENERIC_SEMAPHORE)
namespace Aurora::Threading::Primitives
{
SemaphoreImpl::SemaphoreImpl(AuUInt16 uIntialValue)
{
this->dwState_ = uIntialValue;
}
SemaphoreImpl::~SemaphoreImpl()
{
}
bool SemaphoreImpl::HasOSHandle(AuMach &mach)
{
return false;
}
bool SemaphoreImpl::HasLockImplementation()
{
return true;
}
bool SemaphoreImpl::TryLockNoSpin()
{
AuUInt32 uOld {};
while ((uOld = this->dwState_))
{
if (AuAtomicCompareExchange(&this->dwState_, uOld - 1, uOld) == uOld)
{
return true;
}
}
return false;
}
bool SemaphoreImpl::TryLockHeavy()
{
return DoTryIf([=]()
{
return this->TryLockNoSpin();
});
}
bool SemaphoreImpl::TryLock()
{
if (ThrdCfg::gPreferNtSemaphoreSpinTryLock)
{
return this->TryLockHeavy();
}
else
{
return this->TryLockNoSpin();
}
}
AuUInt32 *SemaphoreImpl::GetSleepCounter()
{
return (AuUInt32 *)&this->var;
}
bool SemaphoreImpl::LockMS(AuUInt64 uTimeout)
{
return this->LockNS(AuMSToNS<AuUInt64>(uTimeout));
}
bool SemaphoreImpl::LockNS(AuUInt64 uTimeout)
{
AuUInt64 uStart {};
AuUInt64 uEnd {};
if (this->TryLockNoSpin())
{
return true;
}
if (uTimeout)
{
uStart = AuTime::SteadyClockNS();
uEnd = uStart + uTimeout;
}
if (this->TryLockHeavy())
{
return true;
}
if (gUseNativeWaitSemapahore)
{
auto pCounter = this->GetSleepCounter();
while (!this->TryLockNoSpin())
{
static const AuUInt32 kExpect { 0 };
AuAtomicAdd(pCounter, 1u);
bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), uEnd);
AuAtomicSub(pCounter, 1u);
if (!bStatus)
{
return false;
}
}
return true;
}
else
{
this->mutex.Lock();
while (!this->TryLockNoSpin())
{
if (uTimeout != 0)
{
uStart = Time::SteadyClockNS();
if (uStart >= uEnd)
{
this->mutex.Unlock();
return false;
}
var.WaitForSignalNsEx(&this->mutex, uEnd - uStart);
}
else
{
var.WaitForSignalNsEx(&this->mutex, 0);
}
}
this->mutex.Unlock();
return true;
}
}
bool SemaphoreImpl::LockAbsNS(AuUInt64 qwTimeoutAbs)
{
if (this->TryLockHeavy())
{
return true;
}
if (gUseNativeWaitSemapahore)
{
AuUInt32 uYieldCounter {};
auto pCounter = this->GetSleepCounter();
while (!this->TryLockNoSpin())
{
static const AuUInt32 kExpect { 0 };
AuAtomicAdd(pCounter, 1u);
bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), qwTimeoutAbs);
AuAtomicSub(pCounter, 1u);
if (!bStatus)
{
return false;
}
}
return true;
}
else
{
this->mutex.Lock();
while (!this->TryLockNoSpin())
{
if (qwTimeoutAbs != 0)
{
auto uStart = Time::SteadyClockNS();
if (uStart >= qwTimeoutAbs)
{
this->mutex.Unlock();
return false;
}
var.WaitForSignalNsEx(&this->mutex, qwTimeoutAbs - uStart);
}
else
{
var.WaitForSignalNsEx(&this->mutex, 0);
}
}
this->mutex.Unlock();
return true;
}
}
void SemaphoreImpl::Lock()
{
auto status = LockNS(0);
SysAssert(status, "Couldn't lock semaphore");
}
void SemaphoreImpl::Unlock(AuUInt16 uCount)
{
if (gUseNativeWaitSemapahore)
{
AuAtomicAdd<AuUInt32>(&this->dwState_, uCount);
if (auto dwSleeping = AuAtomicLoad(this->GetSleepCounter()))
{
if (uCount == 1)
{
InternalLTSWakeOne((void *)&this->dwState_);
}
else
{
InternalLTSWakeCount((void *)&this->dwState_, AuMin<AuUInt32>(uCount, dwSleeping));
}
}
}
else
{
// realistically, we cant use the sleep counter optimization trick under windows 7
// we would have to expand our already oversized by 8, 24-byte x86_64 semaphore for a trivial perf boost
// we cant efficiently access the conditions state or atomic guarantees...
this->mutex.Lock(); // do not [re]move this lock fence
AuAtomicAdd<AuUInt32>(&this->dwState_, uCount); // this could be moved anywhere above the unlock, including above the lock.
this->mutex.Unlock();
if (uCount == 1)
{
this->var.Signal();
}
else
{
if (uCount >= 3) // ...this is the only optimization we can hope to achieve
{
// we can always save a few cycles by doing an atomic broadcast on a contended semaphore
// waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms
this->var.Broadcast();
}
else // ...otherwise, do the handshake just a few times
{
// doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off
for (AU_ITERATE_N(i, uCount))
{
(void)i;
this->var.Signal();
}
}
}
}
}
void SemaphoreImpl::Unlock()
{
return Unlock(1);
}
AUKN_SYM ISemaphore *SemaphoreNew(AuUInt16 uIntialValue)
{
return _new SemaphoreImpl(uIntialValue);
}
AUKN_SYM void SemaphoreRelease(ISemaphore *pSemaphore)
{
AuSafeDelete<SemaphoreImpl *>(pSemaphore);
}
AUROXTL_INTERFACE_SOO_SRC_EX(AURORA_SYMBOL_EXPORT, Semaphore, SemaphoreImpl)
}
#endif