From ff4d386563e0fa4748be391b13971843e0d9ab48 Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Thu, 14 Nov 2024 11:18:47 +0000 Subject: [PATCH] [+] Aurora::Threading::EWaitMethod::eAnd [+] Aurora::Threading::EWaitMethod::eAndNot --- Include/Aurora/Threading/WakeOnAddress.hpp | 22 ++++--- Source/Threading/AuWakeOnAddress.cpp | 72 ++++++++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) diff --git a/Include/Aurora/Threading/WakeOnAddress.hpp b/Include/Aurora/Threading/WakeOnAddress.hpp index 3f07093d..e3e9d34f 100644 --- a/Include/Aurora/Threading/WakeOnAddress.hpp +++ b/Include/Aurora/Threading/WakeOnAddress.hpp @@ -50,26 +50,32 @@ require these. Further platform support can be added with this; only a semaphore or conditionvar/mutex pair is required to bootstrap this path. - Memory note: Weakly ordered memory is an alien concept. AuAtomicXXX operations ensure all previous stores are - visible across all cores (useful for semaphore increment and mutex-unlock operations), and that loads + Memory note: Weakly ordered memory architecture is an alien concept. AuAtomicXXX operations ensure all previous stores + are visible across all cores (useful for semaphore increment and mutex-unlock operations), and that loads are evaluated in order. For all intents and purposes, you should treat the au ecosystem like any other strongly ordered processor and program pair. For memeworthy lockless algorithms, you can use spec-of-the-year atomic word containers and related methods; we dont care about optimizing some midwits weakly-ordered cas spinning and ABA-hell container, thats genuinely believed to be the best thing ever. Sincerely, you are doing something wrong if you're write-locking a container for any notable length of - time, and more often than not, lock-free algorithms are bloated to all hell, just to end up losing in - most real world use cases. + time, and more often than not, lock-free algorithms are bloated to all hell, just to end up losing to + read/write mutex guarded algorithms in most real world use cases - using an atomic pointer over lock bits + makes no difference besides from the amount of bugs you can expect to creep into your less flexible code. - tldr: Dont worry about memory ordering or ABA. Use the locks, atomic utilities, and primitives as expected. - (you'll be fine) + tldr: Dont worry about memory ordering or ABA. Use the provided locks, AuAtomic ops, and thread primitives as expected. + (you'll be fine. trust me bro.) + + Configuration reminder: + NT 6.2+ platforms may be optimized for the expected defacto case of EWaitMethod::eNotEqual / no "-Special". + If you're implementing special primitives or using AuFutexSemaphore with timeline acquisitions, remember to + set ThreadingConfig::bPreferEmulatedWakeOnAddress=true at Aurora::RuntimeStart ***/ #pragma once namespace Aurora::Threading { - // Break sleep when volatile pTargetAddress [...] constant pCompareAddress + // Specifies to break a thread context yield when volatile pTargetAddress [... EWaitMethod operation ...] constant pCompareAddress AUE_DEFINE(EWaitMethod, ( - eNotEqual, eEqual, eLessThanCompare, eGreaterThanCompare, eLessThanOrEqualsCompare, eGreaterThanOrEqualsCompare + eNotEqual, eEqual, eLessThanCompare, eGreaterThanCompare, eLessThanOrEqualsCompare, eGreaterThanOrEqualsCompare, eAnd, eAndNotMask )) AUKN_SYM void WakeAllOnAddress(const void *pTargetAddress); diff --git a/Source/Threading/AuWakeOnAddress.cpp b/Source/Threading/AuWakeOnAddress.cpp index d82847ca..274e5f2d 100644 --- a/Source/Threading/AuWakeOnAddress.cpp +++ b/Source/Threading/AuWakeOnAddress.cpp @@ -48,6 +48,12 @@ namespace Aurora::Threading case EWaitMethod::eLessThanOrEqualsCompare: \ preface DoOfMethodType(__VA_ARGS__); \ break; \ + case EWaitMethod::eAnd: \ + preface DoOfMethodType(__VA_ARGS__); \ + break; \ + case EWaitMethod::eAndNotMask: \ + preface DoOfMethodType(__VA_ARGS__); \ + break; \ } static const int gShouldSpinOnlyInCPU = 1; // TODO: havent decided @@ -376,6 +382,39 @@ namespace Aurora::Threading } } + if constexpr (eMethod == EWaitMethod::eAnd) + { + switch (uSize) + { + case 1: + return !((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask)); + case 2: + return !((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask)); + case 4: + return !((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask)); + case 8: + return !((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask)); + default: + return false; + } + } + + if constexpr (eMethod == EWaitMethod::eAndNotMask) + { + switch (uSize) + { + case 1: + return !((AuReadU8(pHot, 0) & uMask) & ~(AuReadU8(pBuf2, 0) & uMask)); + case 2: + return !((AuReadU16(pHot, 0) & uMask) & ~(AuReadU16(pBuf2, 0) & uMask)); + case 4: + return !((AuReadU32(pHot, 0) & uMask) & ~(AuReadU32(pBuf2, 0) & uMask)); + case 8: + return !((AuReadU64(pHot, 0) & uMask) & ~(AuReadU64(pBuf2, 0) & uMask)); + default: + return false; + } + } } else { @@ -482,6 +521,39 @@ namespace Aurora::Threading } } + if constexpr (eMethod == EWaitMethod::eAnd) + { + switch (uSize) + { + case 1: + return !(AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0)); + case 2: + return !(AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0)); + case 4: + return !(AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0)); + case 8: + return !(AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0)); + default: + return false; + } + } + + if constexpr (eMethod == EWaitMethod::eAndNotMask) + { + switch (uSize) + { + case 1: + return !(AuReadU8(pHot, 0) & ~AuReadU8(pBuf2, 0)); + case 2: + return !(AuReadU16(pHot, 0) & ~AuReadU16(pBuf2, 0)); + case 4: + return !(AuReadU32(pHot, 0) & ~AuReadU32(pBuf2, 0)); + case 8: + return !(AuReadU64(pHot, 0) & ~AuReadU64(pBuf2, 0)); + default: + return false; + } + } } return false;