From 54f7ee0b64b16755d509c1eb8bc8dedf6b6d2a69 Mon Sep 17 00:00:00 2001
From: Jamie Reece Wilson <me@reece.sx>
Date: Thu, 14 Nov 2024 11:18:47 +0000
Subject: [PATCH] [+] Aurora::Threading::EWaitMethod::eAnd [+]
 Aurora::Threading::EWaitMethod::eNotAnd

(amended: replace eAndNotMask with eNotAnd)
---
 Include/Aurora/Threading/WakeOnAddress.hpp | 22 ++++---
 Source/Threading/AuWakeOnAddress.cpp       | 72 ++++++++++++++++++++++
 2 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/Include/Aurora/Threading/WakeOnAddress.hpp b/Include/Aurora/Threading/WakeOnAddress.hpp
index 3f07093d..b36e94f2 100644
--- a/Include/Aurora/Threading/WakeOnAddress.hpp
+++ b/Include/Aurora/Threading/WakeOnAddress.hpp
@@ -50,26 +50,32 @@
                   require these. Further platform support can be added with this; only a semaphore or conditionvar/mutex
                   pair is required to bootstrap this path.
                 
-        Memory note: Weakly ordered memory is an alien concept. AuAtomicXXX operations ensure all previous stores are
-                     visible across all cores (useful for semaphore increment and mutex-unlock operations), and that loads
+        Memory note: Weakly ordered memory architecture is an alien concept. AuAtomicXXX operations ensure all previous stores
+                     are visible across all cores (useful for semaphore increment and mutex-unlock operations), and that loads
                      are evaluated in order. For all intents and purposes, you should treat the au ecosystem like any
                      other strongly ordered processor and program pair. For memeworthy lockless algorithms, you can use
                      spec-of-the-year atomic word containers and related methods; we dont care about optimizing some midwits
                      weakly-ordered cas spinning and ABA-hell container, thats genuinely believed to be the best thing ever.
                      Sincerely, you are doing something wrong if you're write-locking a container for any notable length of
-                     time, and more often than not, lock-free algorithms are bloated to all hell, just to end up losing in
-                     most real world use cases.
+                     time, and more often than not, lock-free algorithms are bloated to all hell, just to end up losing to
+                     read/write mutex guarded algorithms in most real world use cases - using an atomic pointer over lock bits
+                     makes no difference besides from the amount of bugs you can expect to creep into your less flexible code.
 
-                     tldr: Dont worry about memory ordering or ABA. Use the locks, atomic utilities, and primitives as expected.
-                           (you'll be fine)
+               tldr: Dont worry about memory ordering or ABA. Use the provided locks, AuAtomic ops, and thread primitives as expected.
+                     (you'll be fine. trust me bro.)
+
+        Configuration reminder: 
+                    NT 6.2+ platforms may be optimized for the expected defacto case of EWaitMethod::eNotEqual / no "-Special".
+                    If you're implementing special primitives or using AuFutexSemaphore with timeline acquisitions, remember to
+                    set ThreadingConfig::bPreferEmulatedWakeOnAddress=true at Aurora::RuntimeStart
 ***/
 #pragma once
 
 namespace Aurora::Threading
 {
-    // Break sleep when volatile pTargetAddress [...] constant pCompareAddress
+    // Specifies to break a thread context yield when volatile pTargetAddress [... EWaitMethod operation ...] constant pCompareAddress
     AUE_DEFINE(EWaitMethod, (
-        eNotEqual, eEqual, eLessThanCompare, eGreaterThanCompare, eLessThanOrEqualsCompare, eGreaterThanOrEqualsCompare
+        eNotEqual, eEqual, eLessThanCompare, eGreaterThanCompare, eLessThanOrEqualsCompare, eGreaterThanOrEqualsCompare, eAnd, eNotAnd
     ))
 
     AUKN_SYM void WakeAllOnAddress(const void *pTargetAddress);
diff --git a/Source/Threading/AuWakeOnAddress.cpp b/Source/Threading/AuWakeOnAddress.cpp
index d82847ca..354f2af1 100644
--- a/Source/Threading/AuWakeOnAddress.cpp
+++ b/Source/Threading/AuWakeOnAddress.cpp
@@ -48,6 +48,12 @@ namespace Aurora::Threading
     case EWaitMethod::eLessThanOrEqualsCompare:                                          \
         preface DoOfMethodType<EWaitMethod::eLessThanOrEqualsCompare>(__VA_ARGS__);      \
         break;                                                                           \
+    case EWaitMethod::eAnd:                                                              \
+        preface DoOfMethodType<EWaitMethod::eAnd>(__VA_ARGS__);                          \
+        break;                                                                           \
+    case EWaitMethod::eNotAnd:                                                           \
+        preface DoOfMethodType<EWaitMethod::eNotAnd>(__VA_ARGS__);                       \
+        break;                                                                           \
     }
 
     static const int gShouldSpinOnlyInCPU = 1; // TODO: havent decided
@@ -376,6 +382,39 @@ namespace Aurora::Threading
                 }
             }
 
+            if constexpr (eMethod == EWaitMethod::eAnd)
+            {
+                switch (uSize)
+                {
+                case 1:
+                    return !((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask));
+                case 2:
+                    return !((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask));
+                case 4:
+                    return !((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask));
+                case 8:
+                    return !((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask));
+                default:
+                    return false;
+                }
+            }
+
+            if constexpr (eMethod == EWaitMethod::eNotAnd)
+            {
+                switch (uSize)
+                {
+                case 1:
+                    return ((AuReadU8(pHot, 0) & uMask) & (AuReadU8(pBuf2, 0) & uMask));
+                case 2:
+                    return ((AuReadU16(pHot, 0) & uMask) & (AuReadU16(pBuf2, 0) & uMask));
+                case 4:
+                    return ((AuReadU32(pHot, 0) & uMask) & (AuReadU32(pBuf2, 0) & uMask));
+                case 8:
+                    return ((AuReadU64(pHot, 0) & uMask) & (AuReadU64(pBuf2, 0) & uMask));
+                default:
+                    return false;
+                }
+            }
         }
         else
         {
@@ -482,6 +521,39 @@ namespace Aurora::Threading
                 }
             }
 
+            if constexpr (eMethod == EWaitMethod::eAnd)
+            {
+                switch (uSize)
+                {
+                case 1:
+                    return !(AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0));
+                case 2:
+                    return !(AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0));
+                case 4:
+                    return !(AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0));
+                case 8:
+                    return !(AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0));
+                default:
+                    return false;
+                }
+            }
+
+            if constexpr (eMethod == EWaitMethod::eNotAnd)
+            {
+                switch (uSize)
+                {
+                case 1:
+                    return (AuReadU8(pHot, 0) & AuReadU8(pBuf2, 0));
+                case 2:
+                    return (AuReadU16(pHot, 0) & AuReadU16(pBuf2, 0));
+                case 4:
+                    return (AuReadU32(pHot, 0) & AuReadU32(pBuf2, 0));
+                case 8:
+                    return (AuReadU64(pHot, 0) & AuReadU64(pBuf2, 0));
+                default:
+                    return false;
+                }
+            }
         }
 
         return false;