From 403c186f0afc2a7690a9c00f4e61240556f81d18 Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Tue, 12 Sep 2023 13:28:46 +0100 Subject: [PATCH] [*] Improve NT semaphore: use a different internal api now. Might help uncontested servers with work queues using semaphores --- .../Threading/Primitives/AuSemaphore.NT.cpp | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Source/Threading/Primitives/AuSemaphore.NT.cpp b/Source/Threading/Primitives/AuSemaphore.NT.cpp index 1634664a..c5123740 100644 --- a/Source/Threading/Primitives/AuSemaphore.NT.cpp +++ b/Source/Threading/Primitives/AuSemaphore.NT.cpp @@ -102,11 +102,12 @@ namespace Aurora::Threading::Primitives if (gUseNativeWaitSemapahore) { + auto pCounter = this->GetSleepCounter(); + while (!this->TryLockNoSpin()) { static const AuUInt32 kExpect { 0 }; - auto pCounter = this->GetSleepCounter(); AuAtomicAdd(pCounter, 1u); bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), uEnd); AuAtomicSub(pCounter, 1u); @@ -158,12 +159,12 @@ namespace Aurora::Threading::Primitives if (gUseNativeWaitSemapahore) { AuUInt32 uYieldCounter {}; + auto pCounter = this->GetSleepCounter(); while (!this->TryLockNoSpin()) { static const AuUInt32 kExpect { 0 }; - auto pCounter = this->GetSleepCounter(); AuAtomicAdd(pCounter, 1u); bool bStatus = InternalLTSWaitOnAddressHighRes(&this->dwState_, &kExpect, sizeof(kExpect), qwTimeoutAbs); AuAtomicSub(pCounter, 1u); @@ -211,21 +212,21 @@ namespace Aurora::Threading::Primitives SysAssert(status, "Couldn't lock semaphore"); } - void SemaphoreImpl::Unlock(AuUInt16 count) + void SemaphoreImpl::Unlock(AuUInt16 uCount) { if (gUseNativeWaitSemapahore) { - AuAtomicAdd(&this->dwState_, count); + AuAtomicAdd(&this->dwState_, uCount); - if (AuAtomicLoad(this->GetSleepCounter())) + if (auto dwSleeping = AuAtomicLoad(this->GetSleepCounter())) { - if (count == 1) + if (uCount == 1) { InternalLTSWakeOne((void *)&this->dwState_); } else { - InternalLTSWakeAll((void *)&this->dwState_); + InternalLTSWakeCount((void *)&this->dwState_, AuMin(uCount, dwSleeping)); } } } @@ -236,16 +237,16 @@ namespace Aurora::Threading::Primitives // we cant efficiently access the conditions state or atomic guarantees... this->mutex.Lock(); // do not [re]move this lock fence - AuAtomicAdd(&this->dwState_, count); // this could be moved anywhere above the unlock, including above the lock. + AuAtomicAdd(&this->dwState_, uCount); // this could be moved anywhere above the unlock, including above the lock. this->mutex.Unlock(); - if (count == 1) + if (uCount == 1) { this->var.Signal(); } else { - if (count >= 3) // ...this is the only optimization we can hope to achieve + if (uCount >= 3) // ...this is the only optimization we can hope to achieve { // we can always save a few cycles by doing an atomic broadcast on a contended semaphore // waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms @@ -254,7 +255,7 @@ namespace Aurora::Threading::Primitives else // ...otherwise, do the handshake just a few times { // doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off - for (AU_ITERATE_N(i, count)) + for (AU_ITERATE_N(i, uCount)) { (void)i; this->var.Signal();