[*] Optimize for Win8.1 scheduler that can bypass SRW CVs

[*] Update readme
This commit is contained in:
Reece Wilson 2022-12-28 10:22:44 +00:00
parent 6e78ce5e07
commit dbbcd14b57
3 changed files with 223 additions and 64 deletions

View File

@ -57,18 +57,21 @@ Discord: [Invite](https://discord.gg/XYjCGWWa4J)
| Platform | Support |
| ----------- | ------- |
| NT/XP | ❌ |
| NT/Win7 | ⚠️ |
| NT/Win8.1+ | ✅ |
| NT/Win8.1+ | ⚠️ |
| NT/Win10 RS4+ | ✅ |
| NT/Win11 | ✅ |
| NT/UWP | 🕖 |
| NT/GameOS|❌ |
| NT/GameOS| ❌ |
| Linux | ✅ |
| FreeBSD 9 | ❌ |
| FreeBSD 11 | ❌ |
| OpenBSD | ❌ |
| XNU/NS-like | ❌ |
Win7: some apis are inherently limited before Win8.1.
For client applications, win7 should not be crippled.
Win7/8: memory management (AuProcess) is limited. \
Applications that don't need ::mmap-like functionality **with pre-reserved address allocations** should put your minimum requirements into the Vista era of NTs.
## Performance

View File

@ -13,10 +13,34 @@
namespace Aurora::Threading::Primitives
{
static BOOL (_stdcall *WaitOnAddress_f)(
volatile VOID *Address,
PVOID CompareAddress,
SIZE_T AddressSize,
DWORD dwMilliseconds
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WaitOnAddress_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WaitOnAddress"));
#else
WaitOnAddress;
#endif
static void(_stdcall *WakeByAddressSingle_f)(
PVOID Address
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WakeByAddressSingle_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WakeByAddressSingle"));
#else
WakeByAddressSingle;
#endif
Mutex::Mutex()
{
InitializeSRWLock(&this->atomicHolder_);
InitializeConditionVariable(&this->wakeup_);
if (!WaitOnAddress_f)
{
::InitializeSRWLock(&this->atomicHolder_);
::InitializeConditionVariable(&this->wakeup_);
}
this->state_ = 0;
}
@ -32,7 +56,7 @@ namespace Aurora::Threading::Primitives
bool Mutex::TryLock()
{
return _interlockedbittestandset(&this->state_, 0) == 0;
return ::_interlockedbittestandset(&this->state_, 0) == 0;
}
bool Mutex::HasLockImplementation()
@ -50,48 +74,93 @@ namespace Aurora::Threading::Primitives
{
bool returnValue = false;
AcquireSRWLockShared(&this->atomicHolder_);
AuInt64 startTime = Time::SteadyClockMS();
AuInt64 endTime = startTime + timeout;
BOOL status = false;
while (!TryLock())
if (this->TryLock())
{
AuUInt32 timeoutMs = INFINITE;
if (timeout != 0)
{
startTime = Time::SteadyClockMS();
if (startTime >= endTime)
{
goto exitWin32;
}
timeoutMs = endTime - startTime;
}
status = SleepConditionVariableSRW(&this->wakeup_, &this->atomicHolder_, timeoutMs, CONDITION_VARIABLE_LOCKMODE_SHARED);
if (!status)
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
goto exitWin32;
}
return true;
}
returnValue = true;
AuInt64 uStartTime = Time::SteadyClockMS();
AuInt64 uEndTime = uStartTime + timeout;
exitWin32:
ReleaseSRWLockShared(&this->atomicHolder_);
return returnValue;
if (WaitOnAddress_f)
{
auto state = this->state_;
while (::_interlockedbittestandset(&this->state_, 0) != 0)
{
AuUInt32 uTimeoutMS = INFINITE;
if (timeout != 0)
{
uStartTime = Time::SteadyClockMS();
if (uStartTime >= uEndTime)
{
return false;
}
uTimeoutMS = uEndTime - uStartTime;
}
if (!WaitOnAddress_f(&this->state_, &state, sizeof(this->state_), uTimeoutMS))
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
return false;
}
state = this->state_;
}
return true;
}
else
{
::AcquireSRWLockShared(&this->atomicHolder_);
BOOL status = false;
while (!this->TryLock())
{
AuUInt32 uTimeoutMS = INFINITE;
if (timeout != 0)
{
uStartTime = Time::SteadyClockMS();
if (uStartTime >= uEndTime)
{
goto exitWin32;
}
uTimeoutMS = uEndTime - uStartTime;
}
status = ::SleepConditionVariableSRW(&this->wakeup_, &this->atomicHolder_, uTimeoutMS, CONDITION_VARIABLE_LOCKMODE_SHARED);
if (!status)
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
goto exitWin32;
}
}
returnValue = true;
exitWin32:
::ReleaseSRWLockShared(&this->atomicHolder_);
return returnValue;
}
}
void Mutex::Unlock()
{
AcquireSRWLockExclusive(&this->atomicHolder_);
this->state_ = 0;
ReleaseSRWLockExclusive(&this->atomicHolder_);
WakeAllConditionVariable(&this->wakeup_);
if (!WaitOnAddress_f)
{
::AcquireSRWLockExclusive(&this->atomicHolder_);
this->state_ = 0;
::ReleaseSRWLockExclusive(&this->atomicHolder_);
::WakeAllConditionVariable(&this->wakeup_);
}
else
{
this->state_ = 0;
WakeByAddressSingle_f((void *)&this->state_);
}
}
AUKN_SYM IWaitable *MutexNew()

View File

@ -12,11 +12,44 @@
#if !defined(_AURUNTIME_GENERIC_SEMAPHORE)
namespace Aurora::Threading::Primitives
{
static BOOL(_stdcall *WaitOnAddress_f)(
volatile VOID *Address,
PVOID CompareAddress,
SIZE_T AddressSize,
DWORD dwMilliseconds
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WaitOnAddress_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WaitOnAddress"));
#else
WaitOnAddress;
#endif
static void(_stdcall *WakeByAddressSingle_f)(
PVOID Address
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WakeByAddressSingle_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WakeByAddressSingle"));
#else
WakeByAddressSingle;
#endif
static void(_stdcall *WakeByAddressAll_f)(
PVOID Address
) =
#if defined(AURORA_PLATFORM_WIN32)
decltype(WakeByAddressAll_f)(GetProcAddress(LoadLibraryA("API-MS-Win-Core-Synch-l1-2-0.dll"), "WakeByAddressAll"));
#else
WakeByAddressAll;
#endif
Semaphore::Semaphore(long iIntialValue)
{
this->value_ = iIntialValue;
::InitializeSRWLock(&this->lock_);
::InitializeConditionVariable(&this->winCond_);
if (!WaitOnAddress_f)
{
::InitializeSRWLock(&this->lock_);
::InitializeConditionVariable(&this->winCond_);
}
}
Semaphore::~Semaphore()
@ -42,33 +75,72 @@ namespace Aurora::Threading::Primitives
bool Semaphore::Lock(AuUInt64 uTimeout)
{
if (this->TryLock())
{
return true;
}
AuUInt64 uStart = AuTime::SteadyClockMS();
AuUInt64 uEnd = uStart + uTimeout;
::AcquireSRWLockShared(&this->lock_); // we use atomics. using shared is fine, let's not get congested early
while (!TryLock())
{
AuUInt32 dwTimeoutMs = INFINITE;
if (uTimeout != 0)
if (WaitOnAddress_f)
{
auto old = this->value_;
//!tryLock (with old in a scope we can access)
while (!((old != 0) &&
(AuAtomicCompareExchange(&this->value_, old - 1, old) == old)))
{
uStart = Time::SteadyClockMS();
if (uStart >= uEnd)
AuUInt32 timeoutMs = INFINITE;
if (uTimeout != 0)
{
uStart = Time::SteadyClockMS();
if (uStart >= uEnd)
{
return false;
}
timeoutMs = uEnd - uStart;
}
if (!WaitOnAddress_f(&this->value_, &old, sizeof(this->value_), timeoutMs))
{
SysAssertExp(GetLastError() == ERROR_TIMEOUT);
return false;
}
old = this->value_;
}
return true;
}
else
{
::AcquireSRWLockShared(&this->lock_); // we use atomics. using shared is fine, let's not get congested early
while (!TryLock())
{
AuUInt32 dwTimeoutMs = INFINITE;
if (uTimeout != 0)
{
uStart = Time::SteadyClockMS();
if (uStart >= uEnd)
{
::ReleaseSRWLockShared(&this->lock_);
return false;
}
dwTimeoutMs = uEnd - uStart;
}
if (!::SleepConditionVariableSRW(&this->winCond_, &this->lock_, AuUInt32(dwTimeoutMs), CONDITION_VARIABLE_LOCKMODE_SHARED))
{
::ReleaseSRWLockShared(&this->lock_);
return false;
}
dwTimeoutMs = uEnd - uStart;
}
if (!::SleepConditionVariableSRW(&this->winCond_, &this->lock_, AuUInt32(dwTimeoutMs), CONDITION_VARIABLE_LOCKMODE_SHARED))
{
::ReleaseSRWLockShared(&this->lock_);
return false;
}
::ReleaseSRWLockShared(&this->lock_);
}
::ReleaseSRWLockShared(&this->lock_);
return true;
}
@ -81,10 +153,25 @@ namespace Aurora::Threading::Primitives
void Semaphore::Unlock(long count)
{
::AcquireSRWLockShared(&this->lock_);
AuAtomicAdd<AuInt32>(&this->value_, count);
::WakeAllConditionVariable(&this->winCond_);
::ReleaseSRWLockShared(&this->lock_);
if (!WaitOnAddress_f)
{
::AcquireSRWLockShared(&this->lock_);
AuAtomicAdd<AuInt32>(&this->value_, count);
::WakeAllConditionVariable(&this->winCond_);
::ReleaseSRWLockShared(&this->lock_);
}
else
{
AuAtomicAdd<AuInt32>(&this->value_, count);
if (count == 1)
{
WakeByAddressSingle_f(&this->value_);
}
else
{
WakeByAddressAll_f(&this->value_);
}
}
}
void Semaphore::Unlock()