[*] Update Runtime and sync comments

This commit is contained in:
Reece Wilson 2023-06-17 17:07:02 +01:00
parent 3dac9c85fc
commit eef79aab73
2 changed files with 22 additions and 28 deletions

@ -1 +1 @@
Subproject commit b60b580d62fb773e082e8ddd8d2e726148c0938e Subproject commit 451b9025c073bc3af316fcb244627e16c02953e9

View File

@ -36,7 +36,9 @@ TEST(Allocationless, SOO)
static_assert(sizeof(AuThreadPrimitives::RWLockSOO) < (sizeof(std::shared_timed_mutex) / 2)); static_assert(sizeof(AuThreadPrimitives::RWLockSOO) < (sizeof(std::shared_timed_mutex) / 2));
static_assert(230 < sizeof(std::shared_timed_mutex)); static_assert(230 < sizeof(std::shared_timed_mutex));
static_assert(103 < sizeof(AuThreadPrimitives::RWLockSOO)); static_assert(103 > sizeof(AuThreadPrimitives::RWLockSOO));
static_assert(90 > sizeof(AuThreadPrimitives::RWLockSOO));
static_assert(89 > sizeof(AuThreadPrimitives::RWLockSOO));
static_assert(sizeof(AuThreadPrimitives::MutexSOO) < sizeof(SRWLOCK) + sizeof(CONDITION_VARIABLE) + 4); // (SRWLOCKs are too dumb to used by themselves) static_assert(sizeof(AuThreadPrimitives::MutexSOO) < sizeof(SRWLOCK) + sizeof(CONDITION_VARIABLE) + 4); // (SRWLOCKs are too dumb to used by themselves)
static_assert(sizeof(AuThreadPrimitives::ConditionVariableSOO) < sizeof(std::condition_variable)); static_assert(sizeof(AuThreadPrimitives::ConditionVariableSOO) < sizeof(std::condition_variable));
@ -67,19 +69,6 @@ TEST(Allocationless, SOO)
#endif #endif
} }
TEST(Allocationless, SOOShorthand)
{
AuThreadPrimitives::ConditionMutex condMutex;
AuThreadPrimitives::ConditionVariable condVariable(AuUnsafeRaiiToShared(condMutex.AsPointer()));
AuThreadPrimitives::Mutex mutex;
AuThreadPrimitives::Event event(false, true, false);
AuThreadPrimitives::Semaphore semaphore;
AuThreadPrimitives::CriticalSection cs;
AuThreadPrimitives::RWLock rwLock;
AuThreadPrimitives::RWRenterableLock rwLock2;
}
/** /**
* @brief Single threaded mutex lock test (rentrant mutexes are called critical sections in this subsystem) * @brief Single threaded mutex lock test (rentrant mutexes are called critical sections in this subsystem)
*/ */
@ -567,14 +556,16 @@ TEST(WaitOnAddress, WaitOnConstantForTimeout20SecondsUpdateAfter50ms)
// ...Windows 7 again: expect to see a factor of 4-6x slowdown in time to wake // ...Windows 7 again: expect to see a factor of 4-6x slowdown in time to wake
// trust me bro numbers i9 9900k @ 5Ghz (emulated): // trust me bro numbers i9 9900k @ 5Ghz (emulated):
// * rare undershoots: ~23'000 // * rare undershoots: ~23'000
// * more often than not ~24'700 - 38'000 // * more often than not ~11'100 - 15'000
// * worst case peaks: ~55'000 // * worst case peaks: ~20'000
// * very very rarely: ~70'000 // * very very rarely: ~34'000
// and Windows 11 (8+ native), i7 12700KF @ 5Ghz: // and Windows 11 (8+ native), i7 12700KF @ 5Ghz:
// * rare undershoots: ~4'800 // * rare undershoots: ~4'800
// * more often than not ~5'100 // * more often than not ~5'100
// * worst case peaks: ~30'000 // * worst case peaks: ~30'000
// and Windows 11 (8+ native), i7 12700KF @ 5Ghz (emulated - patched to force it): // and Windows 10 (8+ native), [approx] i5 4400k - i5 6400:
// * approx: ~10'000, ~12'000
// and Windows 11 (8+ emulated - patched to force it), i7 12700KF @ 5Ghz:
// * around: ~7'500 // * around: ~7'500
// and Microsofts game developer docs state: // and Microsofts game developer docs state:
// * expect 5,000 nanosecond switches on the Xbox // * expect 5,000 nanosecond switches on the Xbox
@ -596,23 +587,26 @@ TEST(WaitOnAddress, WaitOnConstantForTimeout20SecondsUpdateAfter50ms)
// > (1000 - (0.005 * 1000 * 1)) / 144 (Win11 best case) // > (1000 - (0.005 * 1000 * 1)) / 144 (Win11 best case)
// 6.909722222222222 // 6.909722222222222
// //
// > (1000 - (0.05 * 1000 * 1)) / 144 (Win 7 average ballpark) // > (1000 - (0.03 * 1000 * 1)) / 144 (Win 7 average ballpark)
// 6.597222222222222 // 6.736111111111111
// //
// > (1000 - (0.02 * 1000 * 1)) / 144 (Win 7 average ballpark) // > (1000 - (0.011 * 1000 * 1)) / 144 (Win 7 average ballpark)
// 6.805555555555555 // 6.868055555555555
// //
// > (1000 - (0.02 * 10000 * 1)) / 144 (0.02ms optimistic but reasonable, 10'000 iterations) // > (1000 - (0.017 * 10000 * 1)) / 144 (17'000 nanoseconds worst chuggier cases, 10'000 iterations)
// 5.555555555555555 // 5.763888888888889
// ~= 1.4ms for 10'000 switches (?) // ~= 1.18ms for 10'000 switches under Windows 7(?)
// //
// > (1000 - (0.005 * 10000 * 1)) / 144 (Windows 11 worst case at 10'000 iterations) // > (1000 - (0.005 * 10000 * 1)) / 144 (Windows 11 expected performant case at 10'000 iterations)
// 6.597222222222222 // 6.597222222222222
// ~= 0.347ms for 10'000 switches (?) // ~= 0.347ms for 10'000 switches (?)
// ~4x slower in emulation mode UNDER a crippled kernel. Win10+ can be forced into emu mode and achieve much faster results. // ~3.4x slower in emulation mode UNDER a crippled kernel. Win10+ can be forced into emu mode and achieve much faster results.
// ...implying the 4x slowdown is the improvement between os generations? not the sync interface? // ...implying the 4x slowdown is the improvement between os generations? not the sync interface?
// ...maybe hardware? // ...maybe hardware?
// //
// Note: reiterating on an important value stated previously, these values assume 0.05ms per time to wake on Windows 7.
// A more realistic time to wake is 12'000 nanoseconds on overpowered hardware if the API is only being lightly used as polyfill.
//
// -> No real game-breaking difference in CPU time allocation for any given 144 Hhz frame. Even an order of magnitude increase wouldn't make a difference. // -> No real game-breaking difference in CPU time allocation for any given 144 Hhz frame. Even an order of magnitude increase wouldn't make a difference.
// Two orders, maybe so; just don't target hundreds of thousands of switches per second. It should be possible to still get ok performance out of Windows 7. // Two orders, maybe so; just don't target hundreds of thousands of switches per second. It should be possible to still get ok performance out of Windows 7.
// Saving ~1ms in a frame allocation would be nice, but i'll take that trade-off for a sync primitive that doesnt exist. // Saving ~1ms in a frame allocation would be nice, but i'll take that trade-off for a sync primitive that doesnt exist.