[*] Handhold non-MSVC compilers

This commit is contained in:
Reece Wilson 2023-08-23 16:38:22 +01:00
parent 0c5d140bd4
commit 524365b5da
2 changed files with 33 additions and 25 deletions

View File

@ -118,12 +118,16 @@ namespace Aurora::Threading::Primitives
auto &uValueRef = this->lock_.uWaitCount;
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
*(AuUInt8 *)&uValueRef = 0;
#if defined(AURORA_COMPILER_MSVC)
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
*(AuUInt8 *)&uValueRef = 0;
#else
InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
#endif
#else
InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
__sync_lock_release((AuUInt8 *)&uValueRef);
#endif
while (true)
{
auto uOld = uValueRef;

View File

@ -273,31 +273,35 @@ namespace Aurora::Threading::Primitives
auto &uValueRef = this->state_;
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1
*(AuUInt8 *)&uValueRef = 0;
#if defined(AURORA_COMPILER_MSVC)
#if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
// Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1
*(AuUInt8 *)&uValueRef = 0;
// From this point onwards, our thread could be subject to StoreLoad re-ordering
// ...but it should not matter.
// Given the memory model of x86[64], we can only really expect to be out of order during an unfenced load operation, which in this class, can only be expected under this function before the CAS.
// No other place reads.
// From this point onwards, our thread could be subject to StoreLoad re-ordering
// ...but it should not matter.
// Given the memory model of x86[64], we can only really expect to be out of order during an unfenced load operation, which in this class, can only be expected under this function before the CAS.
// No other place reads.
// Re-ordering race condition 1: one thread wins an atomic bit set, that we dont catch until the CAS, resulting in: a slow implicit fence under the cas, a mm_pause stall, a compare, and a return
// alt: uValueRef reads zero, resulting in a preemptive return while no threads need to be awoken
// Re-ordering race condition 2: we unlock, multiple threads enter ::Lock(), we somehow read `uValue = uValueRef` as zero, and then the first atomic bitsetandtest winner thread signals the keyed mutex
// I fail to see how:
// *byte = 0; | |
// | interlocked atomicbitset | interlocked atomicbitset fail
// | [logic] | interlocked atomic set kFutexBitWait
// | *byte = 0; | yield
// | auto uValue =[acquire]= uValueRef
// ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc)
// Re-ordering race condition 1: one thread wins an atomic bit set, that we dont catch until the CAS, resulting in: a slow implicit fence under the cas, a mm_pause stall, a compare, and a return
// alt: uValueRef reads zero, resulting in a preemptive return while no threads need to be awoken
// Re-ordering race condition 2: we unlock, multiple threads enter ::Lock(), we somehow read `uValue = uValueRef` as zero, and then the first atomic bitsetandtest winner thread signals the keyed mutex
// I fail to see how:
// *byte = 0; | |
// | interlocked atomicbitset | interlocked atomicbitset fail
// | [logic] | interlocked atomic set kFutexBitWait
// | *byte = 0; | yield
// | auto uValue =[acquire]= uValueRef
// ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc)
// Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing
_ReadWriteBarrier();
// Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing
_ReadWriteBarrier();
#else
InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
#endif
#else
InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
__sync_lock_release((AuUInt8 *)&uValueRef); // __atomic_store_explicit((AuUInt8 *)&uValueRef, 0, __ATOMIC_RELEASE)
#endif
while (true)