From 8c84ecf8928004b747bc275cc54bfb4f6dafc668 Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Sun, 9 Jul 2023 23:33:12 +0100 Subject: [PATCH] [*] Win8+: Experimental primitive improvements by taking notes from Win7 cycle pinching [*] +regression in condvar --- .../Primitives/AuConditionMutex.NT.cpp | 45 ++++++++++++------ .../Primitives/AuConditionMutex.NT.hpp | 4 +- .../Primitives/AuConditionVariable.NT.cpp | 2 + Source/Threading/Primitives/AuMutex.NT.cpp | 46 ++++++++++++++----- 4 files changed, 71 insertions(+), 26 deletions(-) diff --git a/Source/Threading/Primitives/AuConditionMutex.NT.cpp b/Source/Threading/Primitives/AuConditionMutex.NT.cpp index c66bc274..3e4335b1 100644 --- a/Source/Threading/Primitives/AuConditionMutex.NT.cpp +++ b/Source/Threading/Primitives/AuConditionMutex.NT.cpp @@ -67,15 +67,14 @@ namespace Aurora::Threading::Primitives { if (gUseNativeWaitCondvar) { - auto uCurrentValue = uValue + kFutexBitWait; // TODO: 1 bit? + auto uCurrentValue = uValue + kFutexBitWait; InternalLTSWaitOnAddressHighRes((void *)&uValueRef, &uCurrentValue, sizeof(uCurrentValue), 0); } else { pNtWaitForKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL); + AuAtomicSub(&uValueRef, kFutexBitWake); } - - AuAtomicSub(&uValueRef, kFutexBitWake); } } #endif @@ -86,11 +85,38 @@ namespace Aurora::Threading::Primitives #if defined(AURORA_FORCE_SRW_LOCKS) ::ReleaseSRWLockExclusive(&this->lock_); #else - auto &uValueRef = this->lock_.uWaitCount; - // Mirrors: ./AuMutex.NT.cpp // keep this codeblock in parity // defer to the comments in that source file + + if (gUseNativeWaitCondvar) + { + auto &uValueRef = this->lock_.uWaitCount; + + *(AuUInt8 *)&uValueRef = 0; + + while (true) + { + auto uValue = uValueRef; + + if (uValue < kFutexBitWait) + { + return; + } + + if (AuAtomicCompareExchange(&uValueRef, uValue - kFutexBitWait, uValue) == uValue) + { + pWakeByAddressSingle((void *)&this->lock_.uWaitCount); + return; + } + + SMPPause(); + } + return; + } + + auto &uValueRef = this->lock_.uWaitCount; + #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64) *(AuUInt8 *)&uValueRef = 0; #else @@ -127,14 +153,7 @@ namespace Aurora::Threading::Primitives if (AuAtomicCompareExchange(&uValueRef, uValue - kFutexBitWait + kFutexBitWake, uValue) == uValue) { - if (gUseNativeWaitCondvar) - { - pWakeByAddressSingle((void *)&uValueRef); - } - else - { - pNtReleaseKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL); - } + pNtReleaseKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL); return; } diff --git a/Source/Threading/Primitives/AuConditionMutex.NT.hpp b/Source/Threading/Primitives/AuConditionMutex.NT.hpp index fe81579f..da37790b 100644 --- a/Source/Threading/Primitives/AuConditionMutex.NT.hpp +++ b/Source/Threading/Primitives/AuConditionMutex.NT.hpp @@ -18,8 +18,8 @@ namespace Aurora::Threading::Primitives // sub = and // assuming the bits are unset/set respectively - static auto const kFutexBitWake = 256u; // 1^8 - static auto const kFutexBitWait = 512u; // 1^9, next byte over + static auto const kFutexBitWake = 256u; // 2^8 + static auto const kFutexBitWait = 512u; // 2^9, next byte over inline HANDLE gKeyedEventHandle { INVALID_HANDLE_VALUE }; diff --git a/Source/Threading/Primitives/AuConditionVariable.NT.cpp b/Source/Threading/Primitives/AuConditionVariable.NT.cpp index ba04393a..c64064a7 100644 --- a/Source/Threading/Primitives/AuConditionVariable.NT.cpp +++ b/Source/Threading/Primitives/AuConditionVariable.NT.cpp @@ -151,6 +151,8 @@ namespace Aurora::Threading::Primitives } else { + pMutex->Unlock(); + // Obligatory Windows XP+ resched bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr) != NTSTATUS_TIMEOUT; } diff --git a/Source/Threading/Primitives/AuMutex.NT.cpp b/Source/Threading/Primitives/AuMutex.NT.cpp index a3a7872c..4bc29d90 100644 --- a/Source/Threading/Primitives/AuMutex.NT.cpp +++ b/Source/Threading/Primitives/AuMutex.NT.cpp @@ -82,21 +82,24 @@ namespace Aurora::Threading::Primitives return true; } - AuUInt64 uStartTime = Time::SteadyClockNS(); - AuUInt64 uEndTime = uTimeout ? uStartTime + uTimeout : 0; + AuUInt64 uEndTime = uTimeout ? Time::SteadyClockNS() + uTimeout : 0; int iYieldCounter {}; if (gUseNativeWaitMutex) { - auto state = this->state_; - while (::_interlockedbittestandset((volatile LONG *)&this->state_, 0) != 0) + while (!TryLock()) { - if (!InternalLTSWaitOnAddressHighRes((void *)&this->state_, &state, sizeof(this->state_), uEndTime)) - { - return false; - } + auto &uValueRef = this->state_; + auto uValue = uValueRef | 1; + auto uNextValue = uValue + kFutexBitWait; - state = this->state_; + if (AuAtomicCompareExchange(&uValueRef, uNextValue, uValue) == uValue) + { + if (!InternalLTSWaitOnAddressHighRes((void *)&uValueRef, &uNextValue, sizeof(uNextValue), uEndTime)) + { + return false; + } + } } return true; @@ -218,8 +221,29 @@ namespace Aurora::Threading::Primitives { if (gUseNativeWaitMutex) { - this->state_ = 0; - pWakeByAddressSingle((void *)&this->state_); + auto &uValueRef = this->state_; + + *(AuUInt8 *)&uValueRef = 0; + + while (true) + { + auto uValue = uValueRef; + + if (uValue < kFutexBitWait) + { + return; + } + + if (AuAtomicCompareExchange(&uValueRef, uValue - kFutexBitWait, uValue) == uValue) + { + pWakeByAddressSingle((void *)&this->state_); + return; + } + + SMPPause(); + } + + return; } else {