[*] Optimize for Win8.1 scheduler that can bypass SRW CVs

[*] Update readme
This commit is contained in:
Reece Wilson 2022-12-28 10:22:44 +00:00
parent 6e78ce5e07
commit dbbcd14b57
3 changed files with 223 additions and 64 deletions

View File

@ -57,18 +57,21 @@ Discord: [Invite](https://discord.gg/XYjCGWWa4J)
| Platform | Support | | Platform | Support |
| ----------- | ------- | | ----------- | ------- |
| NT/XP | ❌ |
| NT/Win7 | ⚠️ | | NT/Win7 | ⚠️ |
| NT/Win8.1+ | ✅ | | NT/Win8.1+ | ⚠️ |
| NT/Win10 RS4+ | ✅ |
| NT/Win11 | ✅ |
| NT/UWP | 🕖 | | NT/UWP | 🕖 |
| NT/GameOS|❌ | | NT/GameOS| ❌ |
| Linux | ✅ | | Linux | ✅ |
| FreeBSD 9 | ❌ | | FreeBSD 9 | ❌ |
| FreeBSD 11 | ❌ | | FreeBSD 11 | ❌ |
| OpenBSD | ❌ | | OpenBSD | ❌ |
| XNU/NS-like | ❌ | | XNU/NS-like | ❌ |
Win7: some apis are inherently limited before Win8.1. Win7/8: memory management (AuProcess) is limited. \
For client applications, win7 should not be crippled. Applications that don't need ::mmap-like functionality **with pre-reserved address allocations** should put your minimum requirements into the Vista era of NTs.
## Performance ## Performance

View File

@ -13,10 +13,34 @@
namespace Aurora::Threading::Primitives namespace Aurora::Threading::Primitives
{ {
static BOOL (_stdcall *WaitOnAddress_f)(
volatile VOID *Address,
PVOID CompareAddress,
SIZE_T AddressSize,
DWORD dwMilliseconds
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WaitOnAddress_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WaitOnAddress"));
#else
WaitOnAddress;
#endif
static void(_stdcall *WakeByAddressSingle_f)(
PVOID Address
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WakeByAddressSingle_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WakeByAddressSingle"));
#else
WakeByAddressSingle;
#endif
Mutex::Mutex() Mutex::Mutex()
{ {
InitializeSRWLock(&this->atomicHolder_); if (!WaitOnAddress_f)
InitializeConditionVariable(&this->wakeup_); {
::InitializeSRWLock(&this->atomicHolder_);
::InitializeConditionVariable(&this->wakeup_);
}
this->state_ = 0; this->state_ = 0;
} }
@ -32,7 +56,7 @@ namespace Aurora::Threading::Primitives
bool Mutex::TryLock() bool Mutex::TryLock()
{ {
return _interlockedbittestandset(&this->state_, 0) == 0; return ::_interlockedbittestandset(&this->state_, 0) == 0;
} }
bool Mutex::HasLockImplementation() bool Mutex::HasLockImplementation()
@ -50,48 +74,93 @@ namespace Aurora::Threading::Primitives
{ {
bool returnValue = false; bool returnValue = false;
AcquireSRWLockShared(&this->atomicHolder_); if (this->TryLock())
AuInt64 startTime = Time::SteadyClockMS();
AuInt64 endTime = startTime + timeout;
BOOL status = false;
while (!TryLock())
{ {
AuUInt32 timeoutMs = INFINITE; return true;
if (timeout != 0)
{
startTime = Time::SteadyClockMS();
if (startTime >= endTime)
{
goto exitWin32;
}
timeoutMs = endTime - startTime;
}
status = SleepConditionVariableSRW(&this->wakeup_, &this->atomicHolder_, timeoutMs, CONDITION_VARIABLE_LOCKMODE_SHARED);
if (!status)
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
goto exitWin32;
}
} }
returnValue = true; AuInt64 uStartTime = Time::SteadyClockMS();
AuInt64 uEndTime = uStartTime + timeout;
exitWin32: if (WaitOnAddress_f)
ReleaseSRWLockShared(&this->atomicHolder_); {
return returnValue; auto state = this->state_;
while (::_interlockedbittestandset(&this->state_, 0) != 0)
{
AuUInt32 uTimeoutMS = INFINITE;
if (timeout != 0)
{
uStartTime = Time::SteadyClockMS();
if (uStartTime >= uEndTime)
{
return false;
}
uTimeoutMS = uEndTime - uStartTime;
}
if (!WaitOnAddress_f(&this->state_, &state, sizeof(this->state_), uTimeoutMS))
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
return false;
}
state = this->state_;
}
return true;
}
else
{
::AcquireSRWLockShared(&this->atomicHolder_);
BOOL status = false;
while (!this->TryLock())
{
AuUInt32 uTimeoutMS = INFINITE;
if (timeout != 0)
{
uStartTime = Time::SteadyClockMS();
if (uStartTime >= uEndTime)
{
goto exitWin32;
}
uTimeoutMS = uEndTime - uStartTime;
}
status = ::SleepConditionVariableSRW(&this->wakeup_, &this->atomicHolder_, uTimeoutMS, CONDITION_VARIABLE_LOCKMODE_SHARED);
if (!status)
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
goto exitWin32;
}
}
returnValue = true;
exitWin32:
::ReleaseSRWLockShared(&this->atomicHolder_);
return returnValue;
}
} }
void Mutex::Unlock() void Mutex::Unlock()
{ {
AcquireSRWLockExclusive(&this->atomicHolder_); if (!WaitOnAddress_f)
this->state_ = 0; {
ReleaseSRWLockExclusive(&this->atomicHolder_); ::AcquireSRWLockExclusive(&this->atomicHolder_);
WakeAllConditionVariable(&this->wakeup_); this->state_ = 0;
::ReleaseSRWLockExclusive(&this->atomicHolder_);
::WakeAllConditionVariable(&this->wakeup_);
}
else
{
this->state_ = 0;
WakeByAddressSingle_f((void *)&this->state_);
}
} }
AUKN_SYM IWaitable *MutexNew() AUKN_SYM IWaitable *MutexNew()

View File

@ -12,11 +12,44 @@
#if !defined(_AURUNTIME_GENERIC_SEMAPHORE) #if !defined(_AURUNTIME_GENERIC_SEMAPHORE)
namespace Aurora::Threading::Primitives namespace Aurora::Threading::Primitives
{ {
static BOOL(_stdcall *WaitOnAddress_f)(
volatile VOID *Address,
PVOID CompareAddress,
SIZE_T AddressSize,
DWORD dwMilliseconds
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WaitOnAddress_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WaitOnAddress"));
#else
WaitOnAddress;
#endif
static void(_stdcall *WakeByAddressSingle_f)(
PVOID Address
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WakeByAddressSingle_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WakeByAddressSingle"));
#else
WakeByAddressSingle;
#endif
static void(_stdcall *WakeByAddressAll_f)(
PVOID Address
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WakeByAddressAll_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WakeByAddressAll"));
#else
WakeByAddressAll;
#endif
Semaphore::Semaphore(long iIntialValue) Semaphore::Semaphore(long iIntialValue)
{ {
this->value_ = iIntialValue; this->value_ = iIntialValue;
::InitializeSRWLock(&this->lock_); if (!WaitOnAddress_f)
::InitializeConditionVariable(&this->winCond_); {
::InitializeSRWLock(&this->lock_);
::InitializeConditionVariable(&this->winCond_);
}
} }
Semaphore::~Semaphore() Semaphore::~Semaphore()
@ -42,33 +75,72 @@ namespace Aurora::Threading::Primitives
bool Semaphore::Lock(AuUInt64 uTimeout) bool Semaphore::Lock(AuUInt64 uTimeout)
{ {
if (this->TryLock())
{
return true;
}
AuUInt64 uStart = AuTime::SteadyClockMS(); AuUInt64 uStart = AuTime::SteadyClockMS();
AuUInt64 uEnd = uStart + uTimeout; AuUInt64 uEnd = uStart + uTimeout;
::AcquireSRWLockShared(&this->lock_); // we use atomics. using shared is fine, let's not get congested early
while (!TryLock())
{
AuUInt32 dwTimeoutMs = INFINITE;
if (uTimeout != 0) if (WaitOnAddress_f)
{
auto old = this->value_;
//!tryLock (with old in a scope we can access)
while (!((old != 0) &&
(AuAtomicCompareExchange(&this->value_, old - 1, old) == old)))
{ {
uStart = Time::SteadyClockMS(); AuUInt32 timeoutMs = INFINITE;
if (uStart >= uEnd)
if (uTimeout != 0)
{
uStart = Time::SteadyClockMS();
if (uStart >= uEnd)
{
return false;
}
timeoutMs = uEnd - uStart;
}
if (!WaitOnAddress_f(&this->value_, &old, sizeof(this->value_), timeoutMs))
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
return false;
}
old = this->value_;
}
return true;
}
else
{
::AcquireSRWLockShared(&this->lock_); // we use atomics. using shared is fine, let's not get congested early
while (!TryLock())
{
AuUInt32 dwTimeoutMs = INFINITE;
if (uTimeout != 0)
{
uStart = Time::SteadyClockMS();
if (uStart >= uEnd)
{
::ReleaseSRWLockShared(&this->lock_);
return false;
}
dwTimeoutMs = uEnd - uStart;
}
if (!::SleepConditionVariableSRW(&this->winCond_, &this->lock_, AuUInt32(dwTimeoutMs), CONDITION_VARIABLE_LOCKMODE_SHARED))
{ {
::ReleaseSRWLockShared(&this->lock_); ::ReleaseSRWLockShared(&this->lock_);
return false; return false;
} }
dwTimeoutMs = uEnd - uStart;
}
if (!::SleepConditionVariableSRW(&this->winCond_, &this->lock_, AuUInt32(dwTimeoutMs), CONDITION_VARIABLE_LOCKMODE_SHARED))
{
::ReleaseSRWLockShared(&this->lock_);
return false;
} }
::ReleaseSRWLockShared(&this->lock_);
} }
::ReleaseSRWLockShared(&this->lock_);
return true; return true;
} }
@ -81,10 +153,25 @@ namespace Aurora::Threading::Primitives
void Semaphore::Unlock(long count) void Semaphore::Unlock(long count)
{ {
::AcquireSRWLockShared(&this->lock_); if (!WaitOnAddress_f)
AuAtomicAdd<AuInt32>(&this->value_, count); {
::WakeAllConditionVariable(&this->winCond_); ::AcquireSRWLockShared(&this->lock_);
::ReleaseSRWLockShared(&this->lock_); AuAtomicAdd<AuInt32>(&this->value_, count);
::WakeAllConditionVariable(&this->winCond_);
::ReleaseSRWLockShared(&this->lock_);
}
else
{
AuAtomicAdd<AuInt32>(&this->value_, count);
if (count == 1)
{
WakeByAddressSingle_f(&this->value_);
}
else
{
WakeByAddressAll_f(&this->value_);
}
}
} }
void Semaphore::Unlock() void Semaphore::Unlock()