From 28201db2d7faf34ebb5440813d21e0ab012ef57e Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Thu, 15 Jun 2023 20:44:27 +0100 Subject: [PATCH] [+] Improve WoA on Windows 8+ [+] AuThreading::WaitOnAddressSteady --- Include/Aurora/Runtime.hpp | 18 +- Include/Aurora/Threading/WakeOnAddress.hpp | 7 + Source/AuProcAddresses.NT.cpp | 11 + Source/AuProcAddresses.NT.hpp | 9 + Source/AuProcAddresses.cpp | 6 + Source/Threading/AuWakeInternal.hpp | 17 ++ Source/Threading/AuWakeOnAddress.cpp | 269 +++++++++++++++++---- Source/Threading/AuWakeOnAddress.hpp | 3 +- 8 files changed, 287 insertions(+), 53 deletions(-) create mode 100644 Source/Threading/AuWakeInternal.hpp diff --git a/Include/Aurora/Runtime.hpp b/Include/Aurora/Runtime.hpp index c2d2da63..54e96eb6 100644 --- a/Include/Aurora/Runtime.hpp +++ b/Include/Aurora/Runtime.hpp @@ -343,14 +343,16 @@ namespace Aurora struct ThreadingConfig { bool bNoThreadNames { false }; - bool bPlatformIsSMPProcessorOptimized { true }; // Whether to attempt to using mm_pause or similar before yielding into the kernel - AuUInt8 uSpinLoopPowerA { 7 }; // Nudgable spinloop power. This is our local userland niceness factor; where 1 << n is the amount of yield instructions to stall for - bool bPreferNt51XpMutexesOver81 { true }; // Fun Fact: Undocumented Windows XP APIs are still better than whatever the fuck shit fest they sharted out under Windows Vista and 8.1 - }; // Wth the former set of apis, we are still nothing more than a futex intended for nothing more than x86 bittestandset with undefined - // bahviour on the higher bits, and we're crippled by some annoying thread switch function. Windows Vista superseded the dumb kernel-io - // based switching apis everyone thought they had to use with bloat on top of this very same 5.1 era api. - // And to end it all off, Windows 8.1 wait/wake on address forces relative millisecond precision, in the first (?) MS OS to drop tick based [re]scheduling. - // Our main mutex is one edge case where undcoumented XP era scheduling apis are better than the garbage indiasoft wants you to use in . + bool bPlatformIsSMPProcessorOptimized { true }; // Whether to attempt to using mm_pause or similar before yielding into the kernel + AuUInt8 uSpinLoopPowerA { 7 }; // Nudgable spinloop power. This is our local userland niceness factor; where 1 << n is the amount of yield instructions to stall for + + bool bPreferNt51XpMutexesOver8 { false }; // Fun Fact: Undocumented Windows XP APIs are still better than whatever the fuck shit fest they sharted out under Windows Vista and maybe 8.1 + bool bPerferNt51XpCondvarsOver8 { false }; // Wth the former set of apis, we are still nothing more than a futex intended for nothing more than x86 bittestandset with undefined + }; // bahviour on the higher bits, and we're crippled by some annoying thread switch function. Windows Vista superseded the dumb kernel-io + // based switching apis everyone thought they had to use with bloat on top of this very same 5.1 era api. + // ~~ And to end it all off, Windows 8.1 wait/wake on address forces relative millisecond precision, in the first (?) MS OS to drop tick based [re]scheduling. ~~ (officially) + // Our main mutex is one edge case where undcoumented XP era scheduling apis are better than the garbage indiasoft wants you to use in . + struct RuntimeStartInfo { diff --git a/Include/Aurora/Threading/WakeOnAddress.hpp b/Include/Aurora/Threading/WakeOnAddress.hpp index 9e9318de..d100c4d5 100644 --- a/Include/Aurora/Threading/WakeOnAddress.hpp +++ b/Include/Aurora/Threading/WakeOnAddress.hpp @@ -20,8 +20,15 @@ namespace Aurora::Threading void *pCompareAddress, AuUInt8 uWordSize); + // Relative timeout variant of nanosecond resolution WoA. nanoseconds in steady clock time. 0 = indefinite AUKN_SYM bool WaitOnAddress(void *pTargetAddress, void *pCompareAddress, AuUInt8 uWordSize, AuUInt64 qwNanoseconds); + + // Absolute timeout variant of nanosecond resolution WoA. Nanoseconds are in steady clock time. 0 = indefinite + AUKN_SYM bool WaitOnAddressSteady(void *pTargetAddress, + void *pCompareAddress, + AuUInt8 uWordSize, + AuUInt64 qwNanoseconds); } \ No newline at end of file diff --git a/Source/AuProcAddresses.NT.cpp b/Source/AuProcAddresses.NT.cpp index e3542505..9442b4fc 100644 --- a/Source/AuProcAddresses.NT.cpp +++ b/Source/AuProcAddresses.NT.cpp @@ -51,6 +51,7 @@ namespace Aurora ADD_GET_PROC(Nt, NtReleaseKeyedEvent) ADD_GET_PROC(Nt, NtOpenKeyedEvent) ADD_GET_PROC(Nt, NtCreateKeyedEvent) + ADD_GET_PROC(Nt, RtlWaitOnAddress) ADD_GET_PROC_BI(Kernel32, KernelBase, VirtualAlloc2) ADD_GET_PROC_BI(Kernel32, KernelBase, MapViewOfFile3) @@ -79,5 +80,15 @@ namespace Aurora pNtDelayExecution = nullptr /* ... (you dont need it, but it'll help a ton) */; #endif + + gUseNativeWaitMutex = (pWaitOnAddress && + !gRuntimeConfig.threadingConfig.bPreferNt51XpMutexesOver8 && + (pRtlWaitOnAddress || AuBuild::kCurrentPlatform != AuBuild::EPlatform::ePlatformWin32)) || + !pNtWaitForKeyedEvent; + + gUseNativeWaitCondvar = (pWaitOnAddress && + !gRuntimeConfig.threadingConfig.bPerferNt51XpCondvarsOver8 && + (pRtlWaitOnAddress || AuBuild::kCurrentPlatform != AuBuild::EPlatform::ePlatformWin32)) || + !pNtWaitForKeyedEvent; } } \ No newline at end of file diff --git a/Source/AuProcAddresses.NT.hpp b/Source/AuProcAddresses.NT.hpp index e662fbab..fb29a9d9 100644 --- a/Source/AuProcAddresses.NT.hpp +++ b/Source/AuProcAddresses.NT.hpp @@ -91,10 +91,19 @@ namespace Aurora POBJECT_ATTRIBUTES Attr, ULONG Flags ); + + inline NTSTATUS(__stdcall *pRtlWaitOnAddress)( + const void *addr, + const void *cmp, + SIZE_T size, + const LARGE_INTEGER *timeout); #if defined(AURORA_PLATFORM_WIN32) inline NTSTATUS(_stdcall *pRtlGetVersion)( PRTL_OSVERSIONINFOW lpVersionInformation ); #endif + + inline bool gUseNativeWaitMutex {}; + inline bool gUseNativeWaitCondvar {}; } \ No newline at end of file diff --git a/Source/AuProcAddresses.cpp b/Source/AuProcAddresses.cpp index 85b74bc6..ef7454bf 100644 --- a/Source/AuProcAddresses.cpp +++ b/Source/AuProcAddresses.cpp @@ -15,6 +15,12 @@ namespace Aurora { void InitProcAddresses() { + static bool gDumbInitOnce {}; + if (AuExchange(gDumbInitOnce, true)) + { + return; + } + #if defined(AURORA_IS_MODERNNT_DERIVED) InitNTAddresses(); #endif diff --git a/Source/Threading/AuWakeInternal.hpp b/Source/Threading/AuWakeInternal.hpp new file mode 100644 index 00000000..38278c97 --- /dev/null +++ b/Source/Threading/AuWakeInternal.hpp @@ -0,0 +1,17 @@ +/*** + Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved. + + File: AuWakeInternal.hpp + Date: 2023-6-15 + Author: Reece + Note: Defines a LTS prototype for nanosecond-resolution WoA API for Desktop class Win32 primitives +***/ +#pragma once + +namespace Aurora::Threading +{ + bool InternalLTSWaitOnAddressHighRes(void *pTargetAddress, + void *pCompareAddress, + AuUInt8 uWordSize, + AuUInt64 qwNanosecondsAbs); +} \ No newline at end of file diff --git a/Source/Threading/AuWakeOnAddress.cpp b/Source/Threading/AuWakeOnAddress.cpp index 6499e7a5..0ab72bcf 100644 --- a/Source/Threading/AuWakeOnAddress.cpp +++ b/Source/Threading/AuWakeOnAddress.cpp @@ -11,6 +11,7 @@ #if defined(AURORA_PLATFORM_WIN32) #include #endif +#include