From eef79aab73196eaac2c5b60c0c5f014b5fa16044 Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Sat, 17 Jun 2023 17:07:02 +0100 Subject: [PATCH] [*] Update Runtime and sync comments --- Aurora/Runtime | 2 +- Tests/Public/2. Hello Threading/Main.cpp | 48 +++++++++++------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/Aurora/Runtime b/Aurora/Runtime index b60b580..451b902 160000 --- a/Aurora/Runtime +++ b/Aurora/Runtime @@ -1 +1 @@ -Subproject commit b60b580d62fb773e082e8ddd8d2e726148c0938e +Subproject commit 451b9025c073bc3af316fcb244627e16c02953e9 diff --git a/Tests/Public/2. Hello Threading/Main.cpp b/Tests/Public/2. Hello Threading/Main.cpp index 86e0f1b..633236e 100644 --- a/Tests/Public/2. Hello Threading/Main.cpp +++ b/Tests/Public/2. Hello Threading/Main.cpp @@ -36,7 +36,9 @@ TEST(Allocationless, SOO) static_assert(sizeof(AuThreadPrimitives::RWLockSOO) < (sizeof(std::shared_timed_mutex) / 2)); static_assert(230 < sizeof(std::shared_timed_mutex)); - static_assert(103 < sizeof(AuThreadPrimitives::RWLockSOO)); + static_assert(103 > sizeof(AuThreadPrimitives::RWLockSOO)); + static_assert(90 > sizeof(AuThreadPrimitives::RWLockSOO)); + static_assert(89 > sizeof(AuThreadPrimitives::RWLockSOO)); static_assert(sizeof(AuThreadPrimitives::MutexSOO) < sizeof(SRWLOCK) + sizeof(CONDITION_VARIABLE) + 4); // (SRWLOCKs are too dumb to used by themselves) static_assert(sizeof(AuThreadPrimitives::ConditionVariableSOO) < sizeof(std::condition_variable)); @@ -67,19 +69,6 @@ TEST(Allocationless, SOO) #endif } - -TEST(Allocationless, SOOShorthand) -{ - AuThreadPrimitives::ConditionMutex condMutex; - AuThreadPrimitives::ConditionVariable condVariable(AuUnsafeRaiiToShared(condMutex.AsPointer())); - AuThreadPrimitives::Mutex mutex; - AuThreadPrimitives::Event event(false, true, false); - AuThreadPrimitives::Semaphore semaphore; - AuThreadPrimitives::CriticalSection cs; - AuThreadPrimitives::RWLock rwLock; - AuThreadPrimitives::RWRenterableLock rwLock2; -} - /** * @brief Single threaded mutex lock test (rentrant mutexes are called critical sections in this subsystem) */ @@ -567,14 +556,16 @@ TEST(WaitOnAddress, WaitOnConstantForTimeout20SecondsUpdateAfter50ms) // ...Windows 7 again: expect to see a factor of 4-6x slowdown in time to wake // trust me bro numbers i9 9900k @ 5Ghz (emulated): // * rare undershoots: ~23'000 - // * more often than not ~24'700 - 38'000 - // * worst case peaks: ~55'000 - // * very very rarely: ~70'000 + // * more often than not ~11'100 - 15'000 + // * worst case peaks: ~20'000 + // * very very rarely: ~34'000 // and Windows 11 (8+ native), i7 12700KF @ 5Ghz: // * rare undershoots: ~4'800 // * more often than not ~5'100 // * worst case peaks: ~30'000 - // and Windows 11 (8+ native), i7 12700KF @ 5Ghz (emulated - patched to force it): + // and Windows 10 (8+ native), [approx] i5 4400k - i5 6400: + // * approx: ~10'000, ~12'000 + // and Windows 11 (8+ emulated - patched to force it), i7 12700KF @ 5Ghz: // * around: ~7'500 // and Microsofts game developer docs state: // * expect 5,000 nanosecond switches on the Xbox @@ -596,23 +587,26 @@ TEST(WaitOnAddress, WaitOnConstantForTimeout20SecondsUpdateAfter50ms) // > (1000 - (0.005 * 1000 * 1)) / 144 (Win11 best case) // 6.909722222222222 // - // > (1000 - (0.05 * 1000 * 1)) / 144 (Win 7 average ballpark) - // 6.597222222222222 + // > (1000 - (0.03 * 1000 * 1)) / 144 (Win 7 average ballpark) + // 6.736111111111111 // - // > (1000 - (0.02 * 1000 * 1)) / 144 (Win 7 average ballpark) - // 6.805555555555555 + // > (1000 - (0.011 * 1000 * 1)) / 144 (Win 7 average ballpark) + // 6.868055555555555 // - // > (1000 - (0.02 * 10000 * 1)) / 144 (0.02ms optimistic but reasonable, 10'000 iterations) - // 5.555555555555555 - // ~= 1.4ms for 10'000 switches (?) + // > (1000 - (0.017 * 10000 * 1)) / 144 (17'000 nanoseconds worst chuggier cases, 10'000 iterations) + // 5.763888888888889 + // ~= 1.18ms for 10'000 switches under Windows 7(?) // - // > (1000 - (0.005 * 10000 * 1)) / 144 (Windows 11 worst case at 10'000 iterations) + // > (1000 - (0.005 * 10000 * 1)) / 144 (Windows 11 expected performant case at 10'000 iterations) // 6.597222222222222 // ~= 0.347ms for 10'000 switches (?) - // ~4x slower in emulation mode UNDER a crippled kernel. Win10+ can be forced into emu mode and achieve much faster results. + // ~3.4x slower in emulation mode UNDER a crippled kernel. Win10+ can be forced into emu mode and achieve much faster results. // ...implying the 4x slowdown is the improvement between os generations? not the sync interface? // ...maybe hardware? // + // Note: reiterating on an important value stated previously, these values assume 0.05ms per time to wake on Windows 7. + // A more realistic time to wake is 12'000 nanoseconds on overpowered hardware if the API is only being lightly used as polyfill. + // // -> No real game-breaking difference in CPU time allocation for any given 144 Hhz frame. Even an order of magnitude increase wouldn't make a difference. // Two orders, maybe so; just don't target hundreds of thousands of switches per second. It should be possible to still get ok performance out of Windows 7. // Saving ~1ms in a frame allocation would be nice, but i'll take that trade-off for a sync primitive that doesnt exist.