[*] Harden weakly ordered systems: atomic stores (rare) and lock clears (common) should perform a full memory barrier (gross, but required) BEFORE the atomic store (allowing other threads to acquire)

This commit is contained in:
Reece Wilson 2024-06-23 04:40:51 +01:00
parent c9be4cc419
commit a3c3810c3b

View File

@ -57,8 +57,8 @@
// AuAtomicUnset (ret: bool)
// AuAtomicAndUnsetBit (ret: T)
// -
// AuAtomicLoadWeak (tbd)
// (no weak store) (tbd)
// AuAtomicLoadWeak
// AuAtomicStoreWeak
// -
// AuAtomicLoad
// AuAtomicStore
@ -66,8 +66,8 @@
// AuAtomicOrSetBit
// AuAtomicOr
// AuAtomicAnd
// AuAtomicAdd
// AuAtomicSub
// AuAtomicAdd (warn: returns post op atomic value)
// AuAtomicSub (warn: returns post op atomic value)
template <class T>
struct AuAtomicUtils
@ -138,6 +138,8 @@ struct AuAtomicUtils
//
static T LoadWeak(T *in);
static void StoreWeak(T *in, T value);
};
#if defined(AURORA_COMPILER_MSVC)
@ -419,6 +421,12 @@ inline auline T AuAtomicUtils<T>::LoadWeak(T *in)
return AuAtomicUtils<T>::Load(in);
}
template <class T>
inline void AuAtomicUtils<T>::StoreWeak(T *in, T val)
{
*in = val;
}
#if defined(AURORA_COMPILER_CLANG)
#define ATOMIC_PREFIX_HAX(name) __c11_ ## name
#else
@ -434,19 +442,11 @@ inline auline T AuAtomicUtils<T>::Load(T *in)
}
else
{
#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
const auto read = *in;
::_ReadWriteBarrier();
return read;
#elif defined(AURORA_COMPILER_MSVC)
::MemoryBarrier(); // works on all legacy MSVC targets including AMD64, IA64, and POWER
return *in;
#if defined(AURORA_COMPILER_MSVC)
return *(volatile T*)in;
#else
#if defined(AURORA_COMPILER_CLANG)
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize(); // brute force on unknown archs. gcc-like compilers will accept this
#endif
if constexpr (AuIsSame_v<AuUInt8, T>)
{
return ATOMIC_PREFIX_HAX(atomic_load)((_Atomic(AuUInt8) *)(in), __ATOMIC_ACQUIRE);
@ -501,12 +501,15 @@ inline auline void AuAtomicUtils<T>::Store(T *in, T val)
else
{
#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
*in = val;
*(volatile T*)in = val;
#elif defined(AURORA_COMPILER_MSVC)
#if 0
* in = val;
::MemoryBarrier();
*(volatile T *)in = val;
#else
// assume /volatile:iso
MemoryBarrier();
if constexpr (AuIsSame_v<AuUInt8, T>)
{
::InterlockedExchange8((CHAR volatile *)in, (CHAR)val);
@ -543,8 +546,12 @@ inline auline void AuAtomicUtils<T>::Store(T *in, T val)
{
static_assert(AuIsVoid_v<T>, "T");
}
#endif
#else
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize();
#endif
if constexpr (AuIsSame_v<AuUInt8, T>)
{
ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), val, __ATOMIC_RELEASE);
@ -581,9 +588,6 @@ inline auline void AuAtomicUtils<T>::Store(T *in, T val)
{
static_assert(AuIsVoid_v<T>, "T");
}
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize();
#endif
#endif
}
}
@ -593,17 +597,21 @@ inline auline
void AuAtomicUtils<AuUInt8>::ClearU8Lock(AuUInt8 *in)
{
#if defined(AURORA_COMPILER_MSVC) && (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
*in = 0;
::_ReadWriteBarrier();
*(volatile AuUInt8 *)in = 0;
#elif defined(AURORA_COMPILER_MSVC)
// assume /volatile:iso
MemoryBarrier();
::_InterlockedExchange8((volatile char *)in, 0);
// i think this will work on aarch64 and most risc architectures
//InterlockedAndRelease((volatile LONG *)in, ~0xFF);
#else
ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), 0, __ATOMIC_RELEASE);
// Flush non-atomic operations within L1 or other store/load queue
#if !(defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
__sync_synchronize();
#endif
// Clear the lock
ATOMIC_PREFIX_HAX(atomic_store)((_Atomic(AuUInt8) *)(in), 0, __ATOMIC_RELEASE);
#endif
}
@ -687,6 +695,13 @@ T AuAtomicLoadWeak(T *in)
return AuAtomicUtils<T>::LoadWeak(in);
}
template <class T>
auline
void AuAtomicStoreWeak(T *in, T value)
{
AuAtomicUtils<T>::StoreWeak(in, value);
}
template <class T>
auline
void AuAtomicClearU8Lock(T *in)
@ -786,6 +801,13 @@ T AuAtomicLoadWeak(volatile T *in)
return AuAtomicUtils<T>::LoadWeak((T *)in);
}
template <class T>
auline
void AuAtomicStoreWeak(volatile T *in, T value)
{
AuAtomicUtils<T>::StoreWeak((T *)in, value);
}
template <class T>
auline
void AuAtomicClearU8Lock(volatile T *in)