From a3c3810c3b80bbc29dd8226439e46a2d787c9cef Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Sun, 23 Jun 2024 04:40:51 +0100 Subject: [PATCH] [*] Harden weakly ordered systems: atomic stores (rare) and lock clears (common) should perform a full memory barrier (gross, but required) BEFORE the atomic store (allowing other threads to acquire) --- Include/auROXTL/auAtomic.hpp | 66 ++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/Include/auROXTL/auAtomic.hpp b/Include/auROXTL/auAtomic.hpp index 2512cca..a9df9d5 100644 --- a/Include/auROXTL/auAtomic.hpp +++ b/Include/auROXTL/auAtomic.hpp @@ -57,8 +57,8 @@ // AuAtomicUnset (ret: bool) // AuAtomicAndUnsetBit (ret: T) // - -// AuAtomicLoadWeak (tbd) -// (no weak store) (tbd) +// AuAtomicLoadWeak +// AuAtomicStoreWeak // - // AuAtomicLoad // AuAtomicStore @@ -66,8 +66,8 @@ // AuAtomicOrSetBit // AuAtomicOr // AuAtomicAnd -// AuAtomicAdd -// AuAtomicSub +// AuAtomicAdd (warn: returns post op atomic value) +// AuAtomicSub (warn: returns post op atomic value) template struct AuAtomicUtils @@ -138,6 +138,8 @@ struct AuAtomicUtils // static T LoadWeak(T *in); + + static void StoreWeak(T *in, T value); }; #if defined(AURORA_COMPILER_MSVC) @@ -419,6 +421,12 @@ inline auline T AuAtomicUtils::LoadWeak(T *in) return AuAtomicUtils::Load(in); } +template +inline void AuAtomicUtils::StoreWeak(T *in, T val) +{ + *in = val; +} + #if defined(AURORA_COMPILER_CLANG) #define ATOMIC_PREFIX_HAX(name) __c11_ ## name #else @@ -434,19 +442,11 @@ inline auline T AuAtomicUtils::Load(T *in) } else { -#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) - const auto read = *in; - ::_ReadWriteBarrier(); - return read; -#elif defined(AURORA_COMPILER_MSVC) - ::MemoryBarrier(); // works on all legacy MSVC targets including AMD64, IA64, and POWER - return *in; +#if defined(AURORA_COMPILER_MSVC) + return *(volatile T*)in; #else #if defined(AURORA_COMPILER_CLANG) -#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) - __sync_synchronize(); // brute force on unknown archs. gcc-like compilers will accept this -#endif if constexpr (AuIsSame_v) { return ATOMIC_PREFIX_HAX(atomic_load)((_Atomic(AuUInt8) *)(in), __ATOMIC_ACQUIRE); @@ -501,12 +501,15 @@ inline auline void AuAtomicUtils::Store(T *in, T val) else { #if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) - *in = val; + *(volatile T*)in = val; #elif defined(AURORA_COMPILER_MSVC) #if 0 - * in = val; ::MemoryBarrier(); + *(volatile T *)in = val; #else + // assume /volatile:iso + MemoryBarrier(); + if constexpr (AuIsSame_v) { ::InterlockedExchange8((CHAR volatile *)in, (CHAR)val); @@ -543,8 +546,12 @@ inline auline void AuAtomicUtils::Store(T *in, T val) { static_assert(AuIsVoid_v, "T"); } + #endif #else + #if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) + __sync_synchronize(); + #endif if constexpr (AuIsSame_v) { ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), val, __ATOMIC_RELEASE); @@ -581,9 +588,6 @@ inline auline void AuAtomicUtils::Store(T *in, T val) { static_assert(AuIsVoid_v, "T"); } -#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) - __sync_synchronize(); -#endif #endif } } @@ -593,17 +597,21 @@ inline auline void AuAtomicUtils::ClearU8Lock(AuUInt8 *in) { #if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) - *in = 0; - ::_ReadWriteBarrier(); + *(volatile AuUInt8 *)in = 0; #elif defined(AURORA_COMPILER_MSVC) + // assume /volatile:iso + MemoryBarrier(); ::_InterlockedExchange8((volatile char *)in, 0); // i think this will work on aarch64 and most risc architectures //InterlockedAndRelease((volatile LONG *)in, ~0xFF); #else - ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), 0, __ATOMIC_RELEASE); + // Flush non-atomic operations within L1 or other store/load queue #if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) __sync_synchronize(); #endif + + // Clear the lock + ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), 0, __ATOMIC_RELEASE); #endif } @@ -687,6 +695,13 @@ T AuAtomicLoadWeak(T *in) return AuAtomicUtils::LoadWeak(in); } +template +auline +void AuAtomicStoreWeak(T *in, T value) +{ + AuAtomicUtils::StoreWeak(in, value); +} + template auline void AuAtomicClearU8Lock(T *in) @@ -786,6 +801,13 @@ T AuAtomicLoadWeak(volatile T *in) return AuAtomicUtils::LoadWeak((T *)in); } +template +auline +void AuAtomicStoreWeak(volatile T *in, T value) +{ + AuAtomicUtils::StoreWeak((T *)in, value); +} + template auline void AuAtomicClearU8Lock(volatile T *in)