[*] Handhold non-MSVC compilers

2023-08-23 16:38:22 +01:00 · 2023-08-23 16:38:22 +01:00 · 524365b5da
commit 524365b5da
parent 0c5d140bd4
2 changed files with 33 additions and 25 deletions
--- a/Source/Threading/Primitives/AuConditionMutex.NT.cpp
+++ b/Source/Threading/Primitives/AuConditionMutex.NT.cpp
@ -118,12 +118,16 @@ namespace Aurora::Threading::Primitives

        auto &uValueRef = this->lock_.uWaitCount;

-    #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
-        *(AuUInt8 *)&uValueRef = 0;
+    #if defined(AURORA_COMPILER_MSVC)
+        #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
+            *(AuUInt8 *)&uValueRef = 0;
+        #else
+            InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
+        #endif
    #else
-        InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
+        __sync_lock_release((AuUInt8 *)&uValueRef);
    #endif
-    
+
        while (true)
        {
            auto uOld = uValueRef;
--- a/Source/Threading/Primitives/AuMutex.NT.cpp
+++ b/Source/Threading/Primitives/AuMutex.NT.cpp
@ -273,31 +273,35 @@ namespace Aurora::Threading::Primitives

        auto &uValueRef = this->state_;

-    #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
-        //  Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1
-        *(AuUInt8 *)&uValueRef = 0;
+    #if defined(AURORA_COMPILER_MSVC)
+        #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64)
+            //  Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1
+            *(AuUInt8 *)&uValueRef = 0;

-        // From this point onwards, our thread could be subject to StoreLoad re-ordering
-        // ...but it should not matter.
-            
-        // Given the memory model of x86[64], we can only really expect to be out of order during an unfenced load operation, which in this class, can only be expected under this function before the CAS.
-        // No other place reads.
+            // From this point onwards, our thread could be subject to StoreLoad re-ordering
+            // ...but it should not matter.
+                
+            // Given the memory model of x86[64], we can only really expect to be out of order during an unfenced load operation, which in this class, can only be expected under this function before the CAS.
+            // No other place reads.

-        // Re-ordering race condition 1: one thread wins an atomic bit set, that we dont catch until the CAS, resulting in: a slow implicit fence under the cas, a mm_pause stall, a compare, and a return
-        //                                                             alt: uValueRef reads zero, resulting in a preemptive return while no threads need to be awoken
-        // Re-ordering race condition 2: we unlock, multiple threads enter ::Lock(), we somehow read `uValue = uValueRef` as zero, and then the first atomic bitsetandtest winner thread signals the keyed mutex
-        // I fail to see how:
-        // *byte = 0; |                                    |
-        //            | interlocked atomicbitset           | interlocked atomicbitset fail 
-        //            | [logic]                            | interlocked atomic set kFutexBitWait
-        //            | *byte = 0;                         | yield
-        //            | auto uValue =[acquire]= uValueRef 
-        // ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc)
+            // Re-ordering race condition 1: one thread wins an atomic bit set, that we dont catch until the CAS, resulting in: a slow implicit fence under the cas, a mm_pause stall, a compare, and a return
+            //                                                             alt: uValueRef reads zero, resulting in a preemptive return while no threads need to be awoken
+            // Re-ordering race condition 2: we unlock, multiple threads enter ::Lock(), we somehow read `uValue = uValueRef` as zero, and then the first atomic bitsetandtest winner thread signals the keyed mutex
+            // I fail to see how:
+            // *byte = 0; |                                    |
+            //            | interlocked atomicbitset           | interlocked atomicbitset fail 
+            //            | [logic]                            | interlocked atomic set kFutexBitWait
+            //            | *byte = 0;                         | yield
+            //            | auto uValue =[acquire]= uValueRef 
+            // ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc)

-        // Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing
-        _ReadWriteBarrier();
+            // Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing
+            _ReadWriteBarrier();
+        #else
+            InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
+        #endif
    #else
-        InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF);
+        __sync_lock_release((AuUInt8 *)&uValueRef); // __atomic_store_explicit((AuUInt8 *)&uValueRef, 0, __ATOMIC_RELEASE)
    #endif

        while (true)