[*] Improve NT semaphore: use a different internal api now. Might help uncontested servers with work queues using semaphores

This commit is contained in:
Reece Wilson 2023-09-12 13:28:46 +01:00
parent ffd61d4d54
commit 403c186f0a

View File

@ -102,11 +102,12 @@ namespace Aurora::Threading::Primitives
if (gUseNativeWaitSemapahore) if (gUseNativeWaitSemapahore)
{ {
auto pCounter = this->GetSleepCounter();
while (!this->TryLockNoSpin()) while (!this->TryLockNoSpin())
{ {
static const AuUInt32 kExpect { 0 }; static const AuUInt32 kExpect { 0 };
auto pCounter = this->GetSleepCounter();
AuAtomicAdd(pCounter, 1u); AuAtomicAdd(pCounter, 1u);
bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), uEnd); bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), uEnd);
AuAtomicSub(pCounter, 1u); AuAtomicSub(pCounter, 1u);
@ -158,12 +159,12 @@ namespace Aurora::Threading::Primitives
if (gUseNativeWaitSemapahore) if (gUseNativeWaitSemapahore)
{ {
AuUInt32 uYieldCounter {}; AuUInt32 uYieldCounter {};
auto pCounter = this->GetSleepCounter();
while (!this->TryLockNoSpin()) while (!this->TryLockNoSpin())
{ {
static const AuUInt32 kExpect { 0 }; static const AuUInt32 kExpect { 0 };
auto pCounter = this->GetSleepCounter();
AuAtomicAdd(pCounter, 1u); AuAtomicAdd(pCounter, 1u);
bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), qwTimeoutAbs); bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), qwTimeoutAbs);
AuAtomicSub(pCounter, 1u); AuAtomicSub(pCounter, 1u);
@ -211,21 +212,21 @@ namespace Aurora::Threading::Primitives
SysAssert(status, "Couldn't lock semaphore"); SysAssert(status, "Couldn't lock semaphore");
} }
void SemaphoreImpl::Unlock(AuUInt16 count) void SemaphoreImpl::Unlock(AuUInt16 uCount)
{ {
if (gUseNativeWaitSemapahore) if (gUseNativeWaitSemapahore)
{ {
AuAtomicAdd<AuUInt32>(&this->dwState_, count); AuAtomicAdd<AuUInt32>(&this->dwState_, uCount);
if (AuAtomicLoad(this->GetSleepCounter())) if (auto dwSleeping = AuAtomicLoad(this->GetSleepCounter()))
{ {
if (count == 1) if (uCount == 1)
{ {
InternalLTSWakeOne((void *)&this->dwState_); InternalLTSWakeOne((void *)&this->dwState_);
} }
else else
{ {
InternalLTSWakeAll((void *)&this->dwState_); InternalLTSWakeCount((void *)&this->dwState_, AuMin<AuUInt32>(uCount, dwSleeping));
} }
} }
} }
@ -236,16 +237,16 @@ namespace Aurora::Threading::Primitives
// we cant efficiently access the conditions state or atomic guarantees... // we cant efficiently access the conditions state or atomic guarantees...
this->mutex.Lock(); // do not [re]move this lock fence this->mutex.Lock(); // do not [re]move this lock fence
AuAtomicAdd<AuUInt32>(&this->dwState_, count); // this could be moved anywhere above the unlock, including above the lock. AuAtomicAdd<AuUInt32>(&this->dwState_, uCount); // this could be moved anywhere above the unlock, including above the lock.
this->mutex.Unlock(); this->mutex.Unlock();
if (count == 1) if (uCount == 1)
{ {
this->var.Signal(); this->var.Signal();
} }
else else
{ {
if (count >= 3) // ...this is the only optimization we can hope to achieve if (uCount >= 3) // ...this is the only optimization we can hope to achieve
{ {
// we can always save a few cycles by doing an atomic broadcast on a contended semaphore // we can always save a few cycles by doing an atomic broadcast on a contended semaphore
// waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms // waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms
@ -254,7 +255,7 @@ namespace Aurora::Threading::Primitives
else // ...otherwise, do the handshake just a few times else // ...otherwise, do the handshake just a few times
{ {
// doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off // doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off
for (AU_ITERATE_N(i, count)) for (AU_ITERATE_N(i, uCount))
{ {
(void)i; (void)i;
this->var.Signal(); this->var.Signal();