[*] Re-optimize the primitives for Windows 8+ on top of a Windows XP+ core

This commit is contained in:
Reece Wilson 2023-06-15 20:52:28 +01:00
parent 28201db2d7
commit d389f9dda3
4 changed files with 150 additions and 135 deletions

View File

@ -9,6 +9,7 @@
#include "AuConditionMutex.Generic.hpp"
#include "SMTYield.hpp"
#include "AuProcAddresses.NT.hpp"
#include "../AuWakeInternal.hpp"
#if !defined(_AURUNTIME_GENERICCM)
@ -17,14 +18,18 @@ namespace Aurora::Threading::Primitives
Win32ConditionMutex::Win32ConditionMutex()
{
#if !defined(AURORA_FORCE_SRW_LOCKS)
if (!pWaitOnAddress && !pNtCreateKeyedEvent)
{
InitProcAddresses();
}
if (gKeyedEventHandle == INVALID_HANDLE_VALUE)
{
if (!pNtCreateKeyedEvent)
if (!gUseNativeWaitCondvar)
{
InitNTAddresses();
SysAssert(pNtCreateKeyedEvent);
pNtCreateKeyedEvent(&gKeyedEventHandle, -1, NULL, 0);
}
pNtCreateKeyedEvent(&gKeyedEventHandle, -1, NULL, 0);
}
#else
::InitializeSRWLock(&this->lock_);
@ -60,7 +65,15 @@ namespace Aurora::Threading::Primitives
if (AuAtomicCompareExchange(&uValueRef, uValue + kFutexBitWait, uValue) == uValue)
{
pNtWaitForKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL);
if (gUseNativeWaitCondvar)
{
auto uCurrentValue = uValue + kFutexBitWait; // TODO: 1 bit?
InternalLTSWaitOnAddressHighRes((void *)&uValueRef, &uCurrentValue, sizeof(uCurrentValue), 0);
}
else
{
pNtWaitForKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL);
}
AuAtomicSub(&uValueRef, kFutexBitWake);
}
@ -114,7 +127,14 @@ namespace Aurora::Threading::Primitives
if (AuAtomicCompareExchange(&uValueRef, uValue - kFutexBitWait + kFutexBitWake, uValue) == uValue)
{
pNtReleaseKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL);
if (gUseNativeWaitCondvar)
{
pWakeByAddressSingle((void *)&uValueRef);
}
else
{
pNtReleaseKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL);
}
return;
}

View File

@ -9,6 +9,7 @@
#include "AuConditionVariable.Generic.hpp"
#include <Time/Time.hpp>
#include "SMTYield.hpp"
#include "../AuWakeInternal.hpp"
#if !defined(_AURUNTIME_GENERICCV)
@ -56,9 +57,9 @@ namespace Aurora::Threading::Primitives
if (qwTimeout)
{
#if defined(AU_TRUST_NT_KERNEL_SCHED_TIMEOUT)
auto uEndTimeSteady = AuTime::SteadyClockNS() + qwTimeout;
#endif
//#if defined(AU_TRUST_NT_KERNEL_SCHED_TIMEOUT)
auto uEndTimeSteady = gUseNativeWaitCondvar ? AuTime::SteadyClockNS() + qwTimeout : 0; // we could nuke this again, if i really wanted to
// #endif
auto uEndTimeWall = AuTime::CurrentClockNS() + qwTimeout;
auto uTargetTimeNt = AuTime::ConvertTimestampNs(uEndTimeWall);
bool bIOU {};
@ -84,7 +85,22 @@ namespace Aurora::Threading::Primitives
this->mutex_->Unlock();
bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, &word) != NTSTATUS_TIMEOUT;
if (gUseNativeWaitCondvar)
{
// Reverted: 5b495f7fd9495aa55395666e166ac499955215dc
if (!bIOU)
{
bool b = true;
bIOU = CheckOut(b);
}
AuUInt8 uBlockBit { 1 };
bRet = InternalLTSWaitOnAddressHighRes(&this->wlist, &uBlockBit, 1, uEndTimeSteady);
}
else
{
bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, &word) != NTSTATUS_TIMEOUT;
}
this->mutex_->Lock();
}
@ -93,7 +109,18 @@ namespace Aurora::Threading::Primitives
this->mutex_->Unlock();
LARGE_INTEGER word;
word.QuadPart = 0;
bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr) != NTSTATUS_TIMEOUT;
if (gUseNativeWaitCondvar)
{
AuUInt8 uBlockBit { 1 };
pWakeByAddressAll(&this->wlist); // this is kinda sad
bRet = InternalLTSWaitOnAddressHighRes(&this->wlist, &uBlockBit, 1, uEndTimeSteady); // why is this even being called?
}
else
{
bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr) != NTSTATUS_TIMEOUT;
}
this->mutex_->Lock();
}
@ -169,7 +196,15 @@ namespace Aurora::Threading::Primitives
this->mutex_->Unlock();
pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr);
if (gUseNativeWaitCondvar)
{
AuUInt8 uBlockBit { 1 };
bRet = InternalLTSWaitOnAddressHighRes(&this->wlist, &uBlockBit, 1, 0);
}
else
{
pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr);
}
this->mutex_->Lock();
@ -267,7 +302,15 @@ namespace Aurora::Threading::Primitives
{
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << kShiftCountByBits) /*intentional clear*/, original) == original)
{
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
if (gUseNativeWaitCondvar)
{
pWakeByAddressSingle(&this->wlist);
}
else
{
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
}
return;
}
@ -283,33 +326,64 @@ namespace Aurora::Threading::Primitives
void ConditionVariableImpl::Broadcast()
{
#if !defined(AURORA_FORCE_SRW_LOCKS)
auto original = this->wlist;
auto expected = original;
expected = expected >> kShiftCountByBits;
auto uBroadcastIterations = expected;
while (expected && uBroadcastIterations)
if (gUseNativeWaitCondvar)
{
AuAtomicAdd(&this->signalCount, 1u);
auto original = this->wlist;
auto expected = original;
expected = expected >> kShiftCountByBits;
if (!expected)
{
return;
}
AuAtomicAdd(&this->signalCount, expected);
while (true)
{
if (AuAtomicCompareExchange(&this->wlist, 0u, original) == original)
{
pWakeByAddressAll(&this->signalCount);
return;
}
else
{
original = this->wlist;
expected = original >> kShiftCountByBits;
}
}
}
else
{
auto original = this->wlist;
auto expected = original;
expected = expected >> kShiftCountByBits;
auto uBroadcastIterations = expected;
while (expected && uBroadcastIterations)
{
bool bBreak {};
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << kShiftCountByBits) /*intentional clear*/, original) == original)
AuAtomicAdd(&this->signalCount, 1u);
while (expected && uBroadcastIterations)
{
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
bool bBreak {};
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << kShiftCountByBits) /*intentional clear*/, original) == original)
{
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
uBroadcastIterations--;
bBreak = true;
}
uBroadcastIterations--;
bBreak = true;
}
original = this->wlist;
expected = original >> kShiftCountByBits;
original = this->wlist;
expected = original >> kShiftCountByBits;
if (bBreak)
{
break;
if (bBreak)
{
break;
}
}
}
}

View File

@ -8,6 +8,7 @@
#include <Source/RuntimeInternal.hpp>
#include "AuMutex.Generic.hpp"
#include "SMTYield.hpp"
#include "../AuWakeInternal.hpp"
#if !defined(_AURUNTIME_GENERICMUTEX)
#include "AuMutex.NT.hpp"
@ -29,6 +30,7 @@ namespace Aurora::Threading::Primitives
::InitializeConditionVariable(&this->wakeup_);
#endif
}
this->state_ = 0;
}
@ -81,57 +83,17 @@ namespace Aurora::Threading::Primitives
}
AuUInt64 uStartTime = Time::SteadyClockNS();
AuUInt64 uEndTime = uStartTime + uTimeout;
AuUInt64 uEndTime = uTimeout ? uStartTime + uTimeout : 0;
int iYieldCounter {};
if (pWaitOnAddress &&
!gRuntimeConfig.threadingConfig.bPreferNt51XpMutexesOver81)
if (gUseNativeWaitMutex)
{
auto state = this->state_;
while (::_interlockedbittestandset((volatile LONG *)&this->state_, 0) != 0)
{
AuUInt32 uTimeoutMS = INFINITE;
if (uTimeout != 0)
if (!InternalLTSWaitOnAddressHighRes((void *)&this->state_, &state, sizeof(this->state_), uEndTime))
{
uStartTime = Time::SteadyClockNS();
if (uStartTime >= uEndTime)
{
return false;
}
uTimeoutMS = AuNSToMS<AuInt64>(uEndTime - uStartTime);
}
if (!uTimeoutMS)
{
auto uIndex = iYieldCounter++;
if (uIndex == 0 || uIndex == 1)
{
for (AU_ITERATE_N(i, 8)) SMPPause();
if (::_interlockedbittestandset((volatile LONG*)&this->state_, 0))
{
return true;
}
if (uIndex == 0)
{
continue;
}
if (uIndex == 1)
{
AuThreading::ContextYield();
continue;
}
}
return _interlockedbittestandset((volatile LONG*)&this->state_, 0);
}
else
{
(void)pWaitOnAddress(&this->state_, &state, sizeof(this->state_), uTimeoutMS);
return false;
}
state = this->state_;
@ -254,8 +216,12 @@ namespace Aurora::Threading::Primitives
void MutexImpl::Unlock()
{
if (!pWaitOnAddress ||
gRuntimeConfig.threadingConfig.bPreferNt51XpMutexesOver81)
if (gUseNativeWaitMutex)
{
this->state_ = 0;
pWakeByAddressSingle((void *)&this->state_);
}
else
{
#if defined(AURORA_FORCE_SRW_LOCKS)
::AcquireSRWLockExclusive(&this->atomicHolder_);
@ -267,7 +233,7 @@ namespace Aurora::Threading::Primitives
auto &uValueRef = this->state_;
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 9.1.1
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1
*(AuUInt8 *)&uValueRef = 0;
// From this point onwards, our thread could be subject to StoreLoad re-ordering
@ -285,7 +251,7 @@ namespace Aurora::Threading::Primitives
// | [logic] | interlocked atomic set kFutexBitWait
// | *byte = 0; | yield
// | auto uValue =[acquire]= uValueRef
// ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, paragraph 3)
// ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc)
// Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing
#else
@ -335,11 +301,6 @@ namespace Aurora::Threading::Primitives
#endif
}
else
{
this->state_ = 0;
pWakeByAddressSingle((void *)&this->state_);
}
}
AUKN_SYM IHyperWaitable *MutexNew()

View File

@ -9,14 +9,21 @@
#include "AuSemaphore.Generic.hpp"
#include "AuSemaphore.NT.hpp"
#include "SMTYield.hpp"
#include "../AuWakeInternal.hpp"
#if !defined(_AURUNTIME_GENERIC_SEMAPHORE)
namespace Aurora::Threading::Primitives
{
SemaphoreImpl::SemaphoreImpl(long iIntialValue) :
var(AuUnsafeRaiiToShared(&this->mutex))
{
this->value_ = iIntialValue;
if (!pWaitOnAddress && !pNtCreateKeyedEvent)
{
InitProcAddresses();
}
}
SemaphoreImpl::~SemaphoreImpl()
@ -56,7 +63,7 @@ namespace Aurora::Threading::Primitives
}
AuUInt64 uStart = AuTime::SteadyClockNS();
AuUInt64 uEnd = uStart + uTimeout;
AuUInt64 uEnd = uTimeout ? uStart + uTimeout : 0;
if (pWaitOnAddress)
{
@ -66,56 +73,9 @@ namespace Aurora::Threading::Primitives
while (!((old != 0) &&
(AuAtomicCompareExchange(&this->value_, old - 1, old) == old)))
{
AuUInt32 dwTimeoutMs = INFINITE;
if (uTimeout != 0)
if (!InternalLTSWaitOnAddressHighRes(&this->value_, &old, sizeof(this->value_), uEnd))
{
uStart = Time::SteadyClockNS();
if (uStart >= uEnd)
{
return false;
}
dwTimeoutMs = AuNSToMS<AuInt64>(uEnd - uStart);
}
old = 0;
if (dwTimeoutMs == 0)
{
auto uIndex = uYieldCounter++;
if (uIndex == 0 || uIndex == 1)
{
for (AU_ITERATE_N(i, 8)) SMPPause();
{
auto old = this->value_;
if ((old != 0 && AuAtomicCompareExchange(&this->value_, old - 1, old) == old))
{
return true;
}
}
if (uIndex == 0)
{
continue;
}
if (uIndex == 1)
{
AuThreading::ContextYield();
continue;
}
}
{
auto old = this->value_;
return old != 0 && AuAtomicCompareExchange(&this->value_, old - 1, old) == old;
}
}
else
{
(void)pWaitOnAddress(&this->value_, &old, sizeof(this->value_), dwTimeoutMs);
return false;
}
old = this->value_;