diff --git a/Include/Aurora/Threading/Waitables/FutexCondWaitable.hpp b/Include/Aurora/Threading/Waitables/FutexCondWaitable.hpp index 3e23dbbe..abd6b67f 100644 --- a/Include/Aurora/Threading/Waitables/FutexCondWaitable.hpp +++ b/Include/Aurora/Threading/Waitables/FutexCondWaitable.hpp @@ -100,7 +100,7 @@ namespace Aurora::Threading::Waitables AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uAtomicSleeping; + uWaiters = AuAtomicLoad(&this->uAtomicSleeping); if (uWaiters > 0) { AuAtomicAdd(&this->uAtomicState, 1u); @@ -124,7 +124,7 @@ namespace Aurora::Threading::Waitables AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uAtomicSleeping; + uWaiters = AuAtomicLoad(&this->uAtomicSleeping); if (uWaiters > 0) { AuAtomicAdd(&this->uAtomicState, uWaiters); @@ -149,7 +149,7 @@ namespace Aurora::Threading::Waitables AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uAtomicSleeping; + uWaiters = AuAtomicLoad(&this->uAtomicSleeping); if (uWaiters > 0) { auto uMin = AuMin(uWaiters, uThreads); @@ -176,7 +176,7 @@ namespace Aurora::Threading::Waitables auline bool TryLock3() { - auto old = this->uAtomicState; + auto old = AuAtomicLoad(&this->uAtomicState); return ((old != 0 && AuAtomicCompareExchange(&this->uAtomicState, old - 1, old) == old)); } diff --git a/Include/Aurora/Threading/Waitables/FutexSemaphoreWaitable.hpp b/Include/Aurora/Threading/Waitables/FutexSemaphoreWaitable.hpp index 11fe5339..ffc5ca48 100644 --- a/Include/Aurora/Threading/Waitables/FutexSemaphoreWaitable.hpp +++ b/Include/Aurora/Threading/Waitables/FutexSemaphoreWaitable.hpp @@ -78,7 +78,7 @@ namespace Aurora::Threading::Waitables { AuAtomicAdd(&this->uAtomicState, AuUInt32(uThreads)); - if (auto uSleeping = this->uAtomicSleeping) + if (auto uSleeping = AuAtomicLoad(&this->uAtomicSleeping)) { WakeNOnAddress((const void *)&this->uAtomicState, uSleeping); } diff --git a/Include/Aurora/Threading/Waitables/FutexWaitable.hpp b/Include/Aurora/Threading/Waitables/FutexWaitable.hpp index 879cf2a4..cdf1f681 100644 --- a/Include/Aurora/Threading/Waitables/FutexWaitable.hpp +++ b/Include/Aurora/Threading/Waitables/FutexWaitable.hpp @@ -68,13 +68,9 @@ namespace Aurora::Threading::Waitables inline void Unlock() override { - #if defined(AURORA_COMPILER_MSVC) - this->uAtomicState = 0; - #else - __sync_lock_release(&this->uAtomicState); - #endif + AuAtomicClearU8Lock(&this->uAtomicState); - if (auto uSleeping = this->uAtomicSleeping) + if (auto uSleeping = AuAtomicLoad(&this->uAtomicSleeping)) { WakeOnAddress((const void *)&this->uAtomicState); } diff --git a/Source/Threading/Primitives/AuConditionEx.cpp b/Source/Threading/Primitives/AuConditionEx.cpp index 8812aa4b..8cff8529 100644 --- a/Source/Threading/Primitives/AuConditionEx.cpp +++ b/Source/Threading/Primitives/AuConditionEx.cpp @@ -117,7 +117,7 @@ namespace Aurora::Threading::Primitives AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uWaiters_; + uWaiters = AuAtomicLoad(&this->uWaiters_); if (uWaiters > 0) { this->s_.Unlock(); @@ -141,7 +141,7 @@ namespace Aurora::Threading::Primitives AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uWaiters_; + uWaiters = AuAtomicLoad(&this->uWaiters_); if (uWaiters > 0) { this->s_.Unlock(uWaiters); diff --git a/Source/Threading/Primitives/AuConditionMutex.Linux.cpp b/Source/Threading/Primitives/AuConditionMutex.Linux.cpp index ba00112a..ef9aedd2 100644 --- a/Source/Threading/Primitives/AuConditionMutex.Linux.cpp +++ b/Source/Threading/Primitives/AuConditionMutex.Linux.cpp @@ -14,9 +14,6 @@ namespace Aurora::Threading::Primitives { - #define barrier() __asm__ __volatile__("sfence": : :"memory") - #define compilerReorderBarrier() __asm__ __volatile__("": : :"memory") - LinuxConditionMutex::LinuxConditionMutex() { @@ -188,9 +185,8 @@ namespace Aurora::Threading::Primitives void LinuxConditionMutex::Unlock() { - __sync_lock_release(&this->uState_); - compilerReorderBarrier(); - if (this->uSleeping_) + AuAtomicClearU8Lock(&this->uState_); + if (AuAtomicLoad(&this->uSleeping_)) { futex_wake(&this->uState_, 1); } diff --git a/Source/Threading/Primitives/AuConditionMutex.NT.cpp b/Source/Threading/Primitives/AuConditionMutex.NT.cpp index 90ce1e74..cc98833d 100644 --- a/Source/Threading/Primitives/AuConditionMutex.NT.cpp +++ b/Source/Threading/Primitives/AuConditionMutex.NT.cpp @@ -118,15 +118,7 @@ namespace Aurora::Threading::Primitives auto &uValueRef = this->lock_.uWaitCount; - #if defined(AURORA_COMPILER_MSVC) - #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64) - *(AuUInt8 *)&uValueRef = 0; - #else - InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF); - #endif - #else - __sync_lock_release((AuUInt8 *)&uValueRef); - #endif + AuAtomicClearU8Lock(&uValueRef); while (true) { diff --git a/Source/Threading/Primitives/AuConditionVariable.Linux.cpp b/Source/Threading/Primitives/AuConditionVariable.Linux.cpp index c5ee7260..2eefb015 100644 --- a/Source/Threading/Primitives/AuConditionVariable.Linux.cpp +++ b/Source/Threading/Primitives/AuConditionVariable.Linux.cpp @@ -138,7 +138,7 @@ namespace Aurora::Threading::Primitives AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uSleeping_; + uWaiters = AuAtomicLoad(&this->uSleeping_); if (uWaiters > 0) { AuAtomicAdd(&this->uState_, 1u); @@ -163,7 +163,7 @@ namespace Aurora::Threading::Primitives AuUInt32 uWaitCount {}; AuUInt32 uWaiters {}; - uWaiters = this->uSleeping_; + uWaiters = AuAtomicLoad(&this->uSleeping_); if (uWaiters > 0) { AuAtomicAdd(&this->uState_, uWaiters); diff --git a/Source/Threading/Primitives/AuMutex.Linux.cpp b/Source/Threading/Primitives/AuMutex.Linux.cpp index 566fce39..86cbc293 100755 --- a/Source/Threading/Primitives/AuMutex.Linux.cpp +++ b/Source/Threading/Primitives/AuMutex.Linux.cpp @@ -17,9 +17,6 @@ namespace Aurora::Threading::Primitives { - #define barrier() __asm__ __volatile__("sfence": : :"memory") - #define compilerReorderBarrier() __asm__ __volatile__("": : :"memory") - MutexImpl::MutexImpl() { @@ -153,9 +150,9 @@ namespace Aurora::Threading::Primitives void MutexImpl::Unlock() { - __sync_lock_release(&this->state_); - compilerReorderBarrier(); - if (this->dwSleeping_) + AuAtomicClearU8Lock(&this->state_); + + if (AuAtomicLoad(&this->dwSleeping_)) { futex_wake(&this->state_, 1); } diff --git a/Source/Threading/Primitives/AuMutex.NT.cpp b/Source/Threading/Primitives/AuMutex.NT.cpp index c6814966..eb16987b 100644 --- a/Source/Threading/Primitives/AuMutex.NT.cpp +++ b/Source/Threading/Primitives/AuMutex.NT.cpp @@ -273,36 +273,38 @@ namespace Aurora::Threading::Primitives auto &uValueRef = this->state_; - #if defined(AURORA_COMPILER_MSVC) - #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64) - // Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1 - *(AuUInt8 *)&uValueRef = 0; - - // From this point onwards, our thread could be subject to StoreLoad re-ordering - // ...but it should not matter. - - // Given the memory model of x86[64], we can only really expect to be out of order during an unfenced load operation, which in this class, can only be expected under this function before the CAS. - // No other place reads. - - // Re-ordering race condition 1: one thread wins an atomic bit set, that we dont catch until the CAS, resulting in: a slow implicit fence under the cas, a mm_pause stall, a compare, and a return - // alt: uValueRef reads zero, resulting in a preemptive return while no threads need to be awoken - // Re-ordering race condition 2: we unlock, multiple threads enter ::Lock(), we somehow read `uValue = uValueRef` as zero, and then the first atomic bitsetandtest winner thread signals the keyed mutex - // I fail to see how: - // *byte = 0; | | - // | interlocked atomicbitset | interlocked atomicbitset fail - // | [logic] | interlocked atomic set kFutexBitWait - // | *byte = 0; | yield - // | auto uValue =[acquire]= uValueRef - // ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc) - - // Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing - _ReadWriteBarrier(); - #else - InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF); - #endif - #else - __sync_lock_release((AuUInt8 *)&uValueRef); // __atomic_store_explicit((AuUInt8 *)&uValueRef, 0, __ATOMIC_RELEASE) - #endif + //#if defined(AURORA_COMPILER_MSVC) + // #if defined(AURORA_ARCH_X86) || defined(AURORA_ARCH_X64) + // // Intel 64 and IA - 32 Architectures Software Developer's Manual, Volume 3A: Section: 8.2.3.1 + // *(AuUInt8 *)&uValueRef = 0; + // + // // From this point onwards, our thread could be subject to StoreLoad re-ordering + // // ...but it should not matter. + // + // // Given the memory model of x86[64], we can only really expect to be out of order during an unfenced load operation, which in this class, can only be expected under this function before the CAS. + // // No other place reads. + // + // // Re-ordering race condition 1: one thread wins an atomic bit set, that we dont catch until the CAS, resulting in: a slow implicit fence under the cas, a mm_pause stall, a compare, and a return + // // alt: uValueRef reads zero, resulting in a preemptive return while no threads need to be awoken + // // Re-ordering race condition 2: we unlock, multiple threads enter ::Lock(), we somehow read `uValue = uValueRef` as zero, and then the first atomic bitsetandtest winner thread signals the keyed mutex + // // I fail to see how: + // // *byte = 0; | | + // // | interlocked atomicbitset | interlocked atomicbitset fail + // // | [logic] | interlocked atomic set kFutexBitWait + // // | *byte = 0; | yield + // // | auto uValue =[acquire]= uValueRef + // // ...would result in the second thread missing the third threads atomic set kFutexBitWait (cst (?) on the account of 8.2.3.1, 8.2.3.8, etc) + // + // // Also note: mfence is far too expensive and the _ReadWriteBarrier() intrinsics do absolutely nothing + // _ReadWriteBarrier(); + // #else + // InterlockedAndRelease((volatile LONG *)&uValueRef, ~0xFF); + // #endif + //#else + // __sync_lock_release((AuUInt8 *)&uValueRef); // __atomic_store_explicit((AuUInt8 *)&uValueRef, 0, __ATOMIC_RELEASE) + //#endif + // merged with ROXTL + AuAtomicClearU8Lock(&uValueRef); while (true) { diff --git a/Source/Threading/Primitives/AuRWLock.cpp b/Source/Threading/Primitives/AuRWLock.cpp index 168f6d1b..b309f24f 100644 --- a/Source/Threading/Primitives/AuRWLock.cpp +++ b/Source/Threading/Primitives/AuRWLock.cpp @@ -18,12 +18,6 @@ namespace Aurora::Threading::Primitives #define ViewParent ((T *)(((char *)this) - (bIsReadView ? RWLockImpl::kOffsetOfRead : RWLockImpl::kOffsetOfWrite))) #endif -#if defined(AURORA_COMPILER_MSVC) - #define RWLOCK_REORDER_BARRIER() ::MemoryBarrier(); -#else - #define RWLOCK_REORDER_BARRIER() -#endif - static const auto kRWThreadWriterHardContextSwitchBias = 15; template @@ -358,7 +352,7 @@ namespace Aurora::Threading::Primitives } else { - auto uOld = this->state_; + auto uOld = AuAtomicLoad(&this->state_); if (uOld < 0) { if (this->reentrantWriteLockHandle_ == GetThreadCookie()) @@ -399,7 +393,7 @@ namespace Aurora::Threading::Primitives while (true) { AuInt32 iCurState; - while ((iCurState = this->state_) != 0) + while ((iCurState = AuAtomicLoad(&this->state_)) != 0) { AuInt64 uSecondTimeout = 0; @@ -409,13 +403,12 @@ namespace Aurora::Threading::Primitives auto pSemaphore = this->GetFutexConditionWriter(); AuInt32 iCurState; - while ((iCurState = this->state_) != 0) + while ((iCurState = AuAtomicLoad(&this->state_)) != 0) { bool bStatusTwo {}; AuAtomicAdd(&this->writersPending_, 1); static const AuUInt32 kExpect { 0 }; - RWLOCK_REORDER_BARRIER(); - if ((iCurState = this->state_) == 0) + if ((iCurState = AuAtomicLoad(&this->state_)) == 0) { bStatus = true; bStatusTwo = true; @@ -655,8 +648,7 @@ namespace Aurora::Threading::Primitives } else { - /* atomic read */ - bElevation = this->writersPending_ > 0; + bElevation = AuAtomicLoad(&this->writersPending_) > 0; } if (bElevation) @@ -682,21 +674,13 @@ namespace Aurora::Threading::Primitives if (!gUseFutexRWLock) { AU_LOCK_GUARD(this->mutex_); - #if defined(AURORA_COMPILER_MSVC) - this->state_ = 0; - #else - __sync_lock_release(&this->state_); - #endif + AuAtomicStore(&this->state_, 0); bElevationPending = this->writersPending_ > 0; } else { - bElevationPending = this->writersPending_ > 0; - #if defined(AURORA_COMPILER_MSVC) - this->state_ = 0; - #else - __sync_lock_release(&this->state_); - #endif + AuAtomicStore(&this->state_, 0); + bElevationPending = AuAtomicLoad(&this->writersPending_) > 0; } if (bElevationPending) @@ -818,15 +802,14 @@ namespace Aurora::Threading::Primitives auto pSemaphore = this->GetFutexConditionWriter(); AuInt32 iCurState; - while ((iCurState = this->state_) != 1) + while ((iCurState = AuAtomicLoad(&this->state_)) != 1) { bool bStatusTwo {}; bool bStatus {}; AuAtomicAdd(&this->writersPending_, 1); static const AuUInt32 kExpect { 0 }; - RWLOCK_REORDER_BARRIER(); - if ((iCurState = this->state_) == 1) + if ((iCurState = AuAtomicLoad(&this->state_)) == 1) { bStatus = true; bStatusTwo = true;