[*] Harden weakly ordered systems: atomic stores (rare) and lock clears (common) should perform a full memory barrier (gross, but required) BEFORE the atomic store (allowing other threads to acquire)

This commit is contained in:
Reece Wilson 2024-06-23 04:40:51 +01:00
parent c9be4cc419
commit a3c3810c3b

View File

@ -57,8 +57,8 @@
// AuAtomicUnset (ret: bool) // AuAtomicUnset (ret: bool)
// AuAtomicAndUnsetBit (ret: T) // AuAtomicAndUnsetBit (ret: T)
// - // -
// AuAtomicLoadWeak (tbd) // AuAtomicLoadWeak
// (no weak store) (tbd) // AuAtomicStoreWeak
// - // -
// AuAtomicLoad // AuAtomicLoad
// AuAtomicStore // AuAtomicStore
@ -66,8 +66,8 @@
// AuAtomicOrSetBit // AuAtomicOrSetBit
// AuAtomicOr // AuAtomicOr
// AuAtomicAnd // AuAtomicAnd
// AuAtomicAdd // AuAtomicAdd (warn: returns post op atomic value)
// AuAtomicSub // AuAtomicSub (warn: returns post op atomic value)
template <class T> template <class T>
struct AuAtomicUtils struct AuAtomicUtils
@ -138,6 +138,8 @@ struct AuAtomicUtils
// //
static T LoadWeak(T *in); static T LoadWeak(T *in);
static void StoreWeak(T *in, T value);
}; };
#if defined(AURORA_COMPILER_MSVC) #if defined(AURORA_COMPILER_MSVC)
@ -419,6 +421,12 @@ inline auline T AuAtomicUtils<T>::LoadWeak(T *in)
return AuAtomicUtils<T>::Load(in); return AuAtomicUtils<T>::Load(in);
} }
template <class T>
inline void AuAtomicUtils<T>::StoreWeak(T *in, T val)
{
*in = val;
}
#if defined(AURORA_COMPILER_CLANG) #if defined(AURORA_COMPILER_CLANG)
#define ATOMIC_PREFIX_HAX(name) __c11_ ## name #define ATOMIC_PREFIX_HAX(name) __c11_ ## name
#else #else
@ -434,19 +442,11 @@ inline auline T AuAtomicUtils<T>::Load(T *in)
} }
else else
{ {
#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) #if defined(AURORA_COMPILER_MSVC)
const auto read = *in; return *(volatile T*)in;
::_ReadWriteBarrier();
return read;
#elif defined(AURORA_COMPILER_MSVC)
::MemoryBarrier(); // works on all legacy MSVC targets including AMD64, IA64, and POWER
return *in;
#else #else
#if defined(AURORA_COMPILER_CLANG) #if defined(AURORA_COMPILER_CLANG)
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize(); // brute force on unknown archs. gcc-like compilers will accept this
#endif
if constexpr (AuIsSame_v<AuUInt8, T>) if constexpr (AuIsSame_v<AuUInt8, T>)
{ {
return ATOMIC_PREFIX_HAX(atomic_load)((_Atomic(AuUInt8) *)(in), __ATOMIC_ACQUIRE); return ATOMIC_PREFIX_HAX(atomic_load)((_Atomic(AuUInt8) *)(in), __ATOMIC_ACQUIRE);
@ -501,12 +501,15 @@ inline auline void AuAtomicUtils<T>::Store(T *in, T val)
else else
{ {
#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) #if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
*in = val; *(volatile T*)in = val;
#elif defined(AURORA_COMPILER_MSVC) #elif defined(AURORA_COMPILER_MSVC)
#if 0 #if 0
* in = val;
::MemoryBarrier(); ::MemoryBarrier();
*(volatile T *)in = val;
#else #else
// assume /volatile:iso
MemoryBarrier();
if constexpr (AuIsSame_v<AuUInt8, T>) if constexpr (AuIsSame_v<AuUInt8, T>)
{ {
::InterlockedExchange8((CHAR volatile *)in, (CHAR)val); ::InterlockedExchange8((CHAR volatile *)in, (CHAR)val);
@ -543,8 +546,12 @@ inline auline void AuAtomicUtils<T>::Store(T *in, T val)
{ {
static_assert(AuIsVoid_v<T>, "T"); static_assert(AuIsVoid_v<T>, "T");
} }
#endif #endif
#else #else
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize();
#endif
if constexpr (AuIsSame_v<AuUInt8, T>) if constexpr (AuIsSame_v<AuUInt8, T>)
{ {
ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), val, __ATOMIC_RELEASE); ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), val, __ATOMIC_RELEASE);
@ -581,9 +588,6 @@ inline auline void AuAtomicUtils<T>::Store(T *in, T val)
{ {
static_assert(AuIsVoid_v<T>, "T"); static_assert(AuIsVoid_v<T>, "T");
} }
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize();
#endif
#endif #endif
} }
} }
@ -593,17 +597,21 @@ inline auline
void AuAtomicUtils<AuUInt8>::ClearU8Lock(AuUInt8 *in) void AuAtomicUtils<AuUInt8>::ClearU8Lock(AuUInt8 *in)
{ {
#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) #if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
*in = 0; *(volatile AuUInt8 *)in = 0;
::_ReadWriteBarrier();
#elif defined(AURORA_COMPILER_MSVC) #elif defined(AURORA_COMPILER_MSVC)
// assume /volatile:iso
MemoryBarrier();
::_InterlockedExchange8((volatile char *)in, 0); ::_InterlockedExchange8((volatile char *)in, 0);
// i think this will work on aarch64 and most risc architectures // i think this will work on aarch64 and most risc architectures
//InterlockedAndRelease((volatile LONG *)in, ~0xFF); //InterlockedAndRelease((volatile LONG *)in, ~0xFF);
#else #else
ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), 0, __ATOMIC_RELEASE); // Flush non-atomic operations within L1 or other store/load queue
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)) #if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize(); __sync_synchronize();
#endif #endif
// Clear the lock
ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), 0, __ATOMIC_RELEASE);
#endif #endif
} }
@ -687,6 +695,13 @@ T AuAtomicLoadWeak(T *in)
return AuAtomicUtils<T>::LoadWeak(in); return AuAtomicUtils<T>::LoadWeak(in);
} }
template <class T>
auline
void AuAtomicStoreWeak(T *in, T value)
{
AuAtomicUtils<T>::StoreWeak(in, value);
}
template <class T> template <class T>
auline auline
void AuAtomicClearU8Lock(T *in) void AuAtomicClearU8Lock(T *in)
@ -786,6 +801,13 @@ T AuAtomicLoadWeak(volatile T *in)
return AuAtomicUtils<T>::LoadWeak((T *)in); return AuAtomicUtils<T>::LoadWeak((T *)in);
} }
template <class T>
auline
void AuAtomicStoreWeak(volatile T *in, T value)
{
AuAtomicUtils<T>::StoreWeak((T *)in, value);
}
template <class T> template <class T>
auline auline
void AuAtomicClearU8Lock(volatile T *in) void AuAtomicClearU8Lock(volatile T *in)