[*] Re-optimize the primitives for Windows 8+ on top of a Windows XP+ core
This commit is contained in:
parent
28201db2d7
commit
d389f9dda3
@ -9,6 +9,7 @@
|
||||
#include "AuConditionMutex.Generic.hpp"
|
||||
#include "SMTYield.hpp"
|
||||
#include "AuProcAddresses.NT.hpp"
|
||||
#include "../AuWakeInternal.hpp"
|
||||
|
||||
#if !defined(_AURUNTIME_GENERICCM)
|
||||
|
||||
@ -17,15 +18,19 @@ namespace Aurora::Threading::Primitives
|
||||
Win32ConditionMutex::Win32ConditionMutex()
|
||||
{
|
||||
#if !defined(AURORA_FORCE_SRW_LOCKS)
|
||||
if (gKeyedEventHandle == INVALID_HANDLE_VALUE)
|
||||
if (!pWaitOnAddress && !pNtCreateKeyedEvent)
|
||||
{
|
||||
if (!pNtCreateKeyedEvent)
|
||||
{
|
||||
InitNTAddresses();
|
||||
InitProcAddresses();
|
||||
}
|
||||
|
||||
if (gKeyedEventHandle == INVALID_HANDLE_VALUE)
|
||||
{
|
||||
if (!gUseNativeWaitCondvar)
|
||||
{
|
||||
SysAssert(pNtCreateKeyedEvent);
|
||||
pNtCreateKeyedEvent(&gKeyedEventHandle, -1, NULL, 0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
::InitializeSRWLock(&this->lock_);
|
||||
#endif
|
||||
@ -59,8 +64,16 @@ namespace Aurora::Threading::Primitives
|
||||
auto uValue = uValueRef | 1;
|
||||
|
||||
if (AuAtomicCompareExchange(&uValueRef, uValue + kFutexBitWait, uValue) == uValue)
|
||||
{
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
auto uCurrentValue = uValue + kFutexBitWait; // TODO: 1 bit?
|
||||
InternalLTSWaitOnAddressHighRes((void *)&uValueRef, &uCurrentValue, sizeof(uCurrentValue), 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pNtWaitForKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL);
|
||||
}
|
||||
|
||||
AuAtomicSub(&uValueRef, kFutexBitWake);
|
||||
}
|
||||
@ -113,8 +126,15 @@ namespace Aurora::Threading::Primitives
|
||||
}
|
||||
|
||||
if (AuAtomicCompareExchange(&uValueRef, uValue - kFutexBitWait + kFutexBitWake, uValue) == uValue)
|
||||
{
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
pWakeByAddressSingle((void *)&uValueRef);
|
||||
}
|
||||
else
|
||||
{
|
||||
pNtReleaseKeyedEvent(gKeyedEventHandle, (void *)&uValueRef, 0, NULL);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "AuConditionVariable.Generic.hpp"
|
||||
#include <Time/Time.hpp>
|
||||
#include "SMTYield.hpp"
|
||||
#include "../AuWakeInternal.hpp"
|
||||
|
||||
#if !defined(_AURUNTIME_GENERICCV)
|
||||
|
||||
@ -56,9 +57,9 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
if (qwTimeout)
|
||||
{
|
||||
#if defined(AU_TRUST_NT_KERNEL_SCHED_TIMEOUT)
|
||||
auto uEndTimeSteady = AuTime::SteadyClockNS() + qwTimeout;
|
||||
#endif
|
||||
//#if defined(AU_TRUST_NT_KERNEL_SCHED_TIMEOUT)
|
||||
auto uEndTimeSteady = gUseNativeWaitCondvar ? AuTime::SteadyClockNS() + qwTimeout : 0; // we could nuke this again, if i really wanted to
|
||||
// #endif
|
||||
auto uEndTimeWall = AuTime::CurrentClockNS() + qwTimeout;
|
||||
auto uTargetTimeNt = AuTime::ConvertTimestampNs(uEndTimeWall);
|
||||
bool bIOU {};
|
||||
@ -84,7 +85,22 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
this->mutex_->Unlock();
|
||||
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
// Reverted: 5b495f7fd9495aa55395666e166ac499955215dc
|
||||
if (!bIOU)
|
||||
{
|
||||
bool b = true;
|
||||
bIOU = CheckOut(b);
|
||||
}
|
||||
|
||||
AuUInt8 uBlockBit { 1 };
|
||||
bRet = InternalLTSWaitOnAddressHighRes(&this->wlist, &uBlockBit, 1, uEndTimeSteady);
|
||||
}
|
||||
else
|
||||
{
|
||||
bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, &word) != NTSTATUS_TIMEOUT;
|
||||
}
|
||||
|
||||
this->mutex_->Lock();
|
||||
}
|
||||
@ -93,7 +109,18 @@ namespace Aurora::Threading::Primitives
|
||||
this->mutex_->Unlock();
|
||||
LARGE_INTEGER word;
|
||||
word.QuadPart = 0;
|
||||
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
AuUInt8 uBlockBit { 1 };
|
||||
pWakeByAddressAll(&this->wlist); // this is kinda sad
|
||||
bRet = InternalLTSWaitOnAddressHighRes(&this->wlist, &uBlockBit, 1, uEndTimeSteady); // why is this even being called?
|
||||
}
|
||||
else
|
||||
{
|
||||
bRet = pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr) != NTSTATUS_TIMEOUT;
|
||||
}
|
||||
|
||||
this->mutex_->Lock();
|
||||
}
|
||||
|
||||
@ -169,7 +196,15 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
this->mutex_->Unlock();
|
||||
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
AuUInt8 uBlockBit { 1 };
|
||||
bRet = InternalLTSWaitOnAddressHighRes(&this->wlist, &uBlockBit, 1, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
pNtWaitForKeyedEvent(gKeyedEventHandle, &this->wlist, 0, nullptr);
|
||||
}
|
||||
|
||||
this->mutex_->Lock();
|
||||
|
||||
@ -266,8 +301,16 @@ namespace Aurora::Threading::Primitives
|
||||
while (expected)
|
||||
{
|
||||
if (AuAtomicCompareExchange(&this->wlist, ((expected - 1) << kShiftCountByBits) /*intentional clear*/, original) == original)
|
||||
{
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
pWakeByAddressSingle(&this->wlist);
|
||||
}
|
||||
else
|
||||
{
|
||||
pNtReleaseKeyedEvent(gKeyedEventHandle, &this->wlist, FALSE, nullptr);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -283,6 +326,36 @@ namespace Aurora::Threading::Primitives
|
||||
void ConditionVariableImpl::Broadcast()
|
||||
{
|
||||
#if !defined(AURORA_FORCE_SRW_LOCKS)
|
||||
|
||||
if (gUseNativeWaitCondvar)
|
||||
{
|
||||
auto original = this->wlist;
|
||||
auto expected = original;
|
||||
expected = expected >> kShiftCountByBits;
|
||||
|
||||
if (!expected)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
AuAtomicAdd(&this->signalCount, expected);
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (AuAtomicCompareExchange(&this->wlist, 0u, original) == original)
|
||||
{
|
||||
pWakeByAddressAll(&this->signalCount);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
original = this->wlist;
|
||||
expected = original >> kShiftCountByBits;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto original = this->wlist;
|
||||
auto expected = original;
|
||||
expected = expected >> kShiftCountByBits;
|
||||
@ -313,6 +386,7 @@ namespace Aurora::Threading::Primitives
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
::WakeAllConditionVariable(&this->winCond_);
|
||||
#endif
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Source/RuntimeInternal.hpp>
|
||||
#include "AuMutex.Generic.hpp"
|
||||
#include "SMTYield.hpp"
|
||||
#include "../AuWakeInternal.hpp"
|
||||
|
||||
#if !defined(_AURUNTIME_GENERICMUTEX)
|
||||
#include "AuMutex.NT.hpp"
|
||||
@ -29,6 +30,7 @@ namespace Aurora::Threading::Primitives
|
||||
::InitializeConditionVariable(&this->wakeup_);
|
||||
#endif
|
||||
}
|
||||
|
||||
this->state_ = 0;
|
||||
}
|
||||
|
||||
@ -81,59 +83,19 @@ namespace Aurora::Threading::Primitives
|
||||
}
|
||||
|
||||
AuUInt64 uStartTime = Time::SteadyClockNS();
|
||||
AuUInt64 uEndTime = uStartTime + uTimeout;
|
||||
AuUInt64 uEndTime = uTimeout ? uStartTime + uTimeout : 0;
|
||||
int iYieldCounter {};
|
||||
|
||||
if (pWaitOnAddress &&
|
||||
!gRuntimeConfig.threadingConfig.bPreferNt51XpMutexesOver81)
|
||||
if (gUseNativeWaitMutex)
|
||||
{
|
||||
auto state = this->state_;
|
||||
while (::_interlockedbittestandset((volatile LONG *)&this->state_, 0) != 0)
|
||||
{
|
||||
AuUInt32 uTimeoutMS = INFINITE;
|
||||
|
||||
if (uTimeout != 0)
|
||||
{
|
||||
uStartTime = Time::SteadyClockNS();
|
||||
if (uStartTime >= uEndTime)
|
||||
if (!InternalLTSWaitOnAddressHighRes((void *)&this->state_, &state, sizeof(this->state_), uEndTime))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
uTimeoutMS = AuNSToMS<AuInt64>(uEndTime - uStartTime);
|
||||
}
|
||||
|
||||
if (!uTimeoutMS)
|
||||
{
|
||||
auto uIndex = iYieldCounter++;
|
||||
if (uIndex == 0 || uIndex == 1)
|
||||
{
|
||||
for (AU_ITERATE_N(i, 8)) SMPPause();
|
||||
|
||||
if (::_interlockedbittestandset((volatile LONG*)&this->state_, 0))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (uIndex == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uIndex == 1)
|
||||
{
|
||||
AuThreading::ContextYield();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return _interlockedbittestandset((volatile LONG*)&this->state_, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
(void)pWaitOnAddress(&this->state_, &state, sizeof(this->state_), uTimeoutMS);
|
||||
}
|
||||
|
||||
state = this->state_;
|
||||
}
|
||||
|
||||
@ -254,8 +216,12 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
void MutexImpl::Unlock()
|
||||
{
|
||||
if (!pWaitOnAddress ||
|
||||
gRuntimeConfig.threadingConfig.bPreferNt51XpMutexesOver81)
|
||||
if (gUseNativeWaitMutex)
|
||||
{
|
||||
this->state_ = 0;
|
||||
pWakeByAddressSingle((void *)&this->state_);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(AURORA_FORCE_SRW_LOCKS)
|
||||
::AcquireSRWLockExclusive(&this->atomicHolder_);
|
||||
@ -267,7 +233,7 @@ namespace Aurora::Threading::Primitives
|
||||
auto &uValueRef = this->state_;
|
||||
|
||||
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
|
||||
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 9.1.1
|
||||
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1
|
||||
*(AuUInt8 *)&uValueRef = 0;
|
||||
|
||||
// From this point onwards, our thread could be subject to StoreLoad re-ordering
|
||||
@ -285,7 +251,7 @@ namespace Aurora::Threading::Primitives
|
||||
// | [logic] | interlocked atomic set kFutexBitWait
|
||||
// | *byte = 0; | yield
|
||||
// | auto uValue =[acquire]= uValueRef
|
||||
// ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, paragraph 3)
|
||||
// ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc)
|
||||
|
||||
// Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing
|
||||
#else
|
||||
@ -335,11 +301,6 @@ namespace Aurora::Threading::Primitives
|
||||
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
this->state_ = 0;
|
||||
pWakeByAddressSingle((void *)&this->state_);
|
||||
}
|
||||
}
|
||||
|
||||
AUKN_SYM IHyperWaitable *MutexNew()
|
||||
|
@ -9,14 +9,21 @@
|
||||
#include "AuSemaphore.Generic.hpp"
|
||||
#include "AuSemaphore.NT.hpp"
|
||||
#include "SMTYield.hpp"
|
||||
#include "../AuWakeInternal.hpp"
|
||||
|
||||
#if !defined(_AURUNTIME_GENERIC_SEMAPHORE)
|
||||
|
||||
namespace Aurora::Threading::Primitives
|
||||
{
|
||||
SemaphoreImpl::SemaphoreImpl(long iIntialValue) :
|
||||
var(AuUnsafeRaiiToShared(&this->mutex))
|
||||
{
|
||||
this->value_ = iIntialValue;
|
||||
|
||||
if (!pWaitOnAddress && !pNtCreateKeyedEvent)
|
||||
{
|
||||
InitProcAddresses();
|
||||
}
|
||||
}
|
||||
|
||||
SemaphoreImpl::~SemaphoreImpl()
|
||||
@ -56,7 +63,7 @@ namespace Aurora::Threading::Primitives
|
||||
}
|
||||
|
||||
AuUInt64 uStart = AuTime::SteadyClockNS();
|
||||
AuUInt64 uEnd = uStart + uTimeout;
|
||||
AuUInt64 uEnd = uTimeout ? uStart + uTimeout : 0;
|
||||
|
||||
if (pWaitOnAddress)
|
||||
{
|
||||
@ -66,58 +73,11 @@ namespace Aurora::Threading::Primitives
|
||||
while (!((old != 0) &&
|
||||
(AuAtomicCompareExchange(&this->value_, old - 1, old) == old)))
|
||||
{
|
||||
AuUInt32 dwTimeoutMs = INFINITE;
|
||||
|
||||
if (uTimeout != 0)
|
||||
{
|
||||
uStart = Time::SteadyClockNS();
|
||||
if (uStart >= uEnd)
|
||||
if (!InternalLTSWaitOnAddressHighRes(&this->value_, &old, sizeof(this->value_), uEnd))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
dwTimeoutMs = AuNSToMS<AuInt64>(uEnd - uStart);
|
||||
}
|
||||
|
||||
old = 0;
|
||||
|
||||
if (dwTimeoutMs == 0)
|
||||
{
|
||||
auto uIndex = uYieldCounter++;
|
||||
if (uIndex == 0 || uIndex == 1)
|
||||
{
|
||||
for (AU_ITERATE_N(i, 8)) SMPPause();
|
||||
|
||||
{
|
||||
auto old = this->value_;
|
||||
if ((old != 0 && AuAtomicCompareExchange(&this->value_, old - 1, old) == old))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (uIndex == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uIndex == 1)
|
||||
{
|
||||
AuThreading::ContextYield();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto old = this->value_;
|
||||
return old != 0 && AuAtomicCompareExchange(&this->value_, old - 1, old) == old;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
(void)pWaitOnAddress(&this->value_, &old, sizeof(this->value_), dwTimeoutMs);
|
||||
}
|
||||
|
||||
old = this->value_;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user