/***
    Copyright (C) 2023 J Reece Wilson (a/k/a "Reece"). All rights reserved.

    File: WakeOnAddress.hpp
    Date: 2023-3-11
    Author: Reece
    Note: 
        This API can be configured to run in one of two modes - Emulation and Wrapper modes

        In Emulation Mode:
              1: Wakes occur in FIFO order so long as the thread is in the kernel
              2: uWordSize can be any length not exceeding 32 bytes
          otherwise Wrapper Mode:
              1: Wakes are orderless
              2: uWordSize must be less than or equal to 8 bytes (todo: no?)
              3: only the least significant 32bits are guaranteed to be used as wake signals
              4: The special EWaitMethod variants will suffer a performance hit
          in either mode:
              1: WaitOnAddress[...] can wake at anytime the wakeup method is successful
              2: WaitOnAddress[...] can drop any wakeup if the wakeup method would fail

        By default:
            Windows XP - Windows 7 => Emulation Mode
            Windows 10+            => Wrapper Mode
            Linux                  => Emulation Mode; however, Wrapper Mode is available 
            **************************************************************************************
            All platforms : ThreadingConfig::bPreferEmulatedWakeOnAddress = !AuBuild::kIsNtDerived
            **************************************************************************************
          
        Also note: Alongside Wrapper Mode, there is an internal set of APIs that allow for 32-bit word WoA support for 
                   AuThread primitives. These are only used if the operating system has a futex interface available at
                   runtime. MacOS, iOS, and <= Windows 7 support requires these paths to be disabled. In other cases,
                   the internal wrapper and Wrapper Mode should use this path to quickly yield to kernel 

                   Generally speaking, AuThreadPrimitives will use the futex layer or some OS specific mechanism to 
                   bail out into the kernels' thread scheduler as quickly as possible. 
                   In any mode, AuThreadPrimitives will go from: Primitive -> kernel/platform; or
                                                                 Primitive -> WoA Internal Wrapper -> kernel/platform
                   In ThreadingConfig::bPreferEmulatedWakeOnAddress mode, AuThreading::WaitOnAddress -> Emulation Mode.
                   In !ThreadingConfig::bPreferEmulatedWakeOnAddress mode, AuThreading::WaitOnAddress -> Wrapper Mode -> [...] 
                                                                           [...] -> Internal Wrapper -> kernel/platform
                   In any mode, the futex reference primitives including AuBarrier, AuInitOnce, AuFutexMutex, etc, 
                    will always go from: inlined header template definition -> relinked symbol -> AuThreading::WaitOnAddress 
                    -> [...].

                  Note that some edge case platforms can follow AuThreadPrimitives *.Generic -> Internal Wrapper -> [...] 
                                                                [...] -> AuThreading::WaitOnAddress -> Emulation Mode.
                  This is only the case when, we lack OS specific wait paths for our primitives; and lack a native 
                  wait on address interface to develop the internal wrapper. Fortunately, only more esoteric UNIX machines
                  require these. Further platform support can be added with this; only a semaphore or conditionvar/mutex
                  pair is required to bootstrap this path.
                
        Memory note: Weakly ordered memory is an alien concept. AuAtomicXXX operations ensure all previous stores are
                     visible across all cores (useful for semaphore increment and mutex-unlock operations), and that loads
                     are evaluated in order. For all intents and purposes, you should treat the au ecosystem like any
                     other strongly ordered processor and program pair. For memeworthy lockless algorithms, you can use
                     spec-of-the-year atomic word containers and related methods; we dont care about optimizing some midwits
                     weakly-ordered cas spinning and ABA-hell container, thats genuinely believed to be the best thing ever.
                     Sincerely, you are doing something wrong if you're write-locking a container for any notable length of
                     time, and more often than not, lock-free algorithms are bloated to all hell, just to end up losing in
                     most real world use cases.

                     tldr: Dont worry about memory ordering or ABA. Use the locks, atomic utilities, and primitives as expected.
                           (you'll be fine)
***/
#pragma once

namespace Aurora::Threading
{
    // Break sleep when volatile pTargetAddress [...] constant pCompareAddress
    AUE_DEFINE(EWaitMethod, (
        eNotEqual, eEqual, eLessThanCompare, eGreaterThanCompare, eLessThanOrEqualsCompare, eGreaterThanOrEqualsCompare
    ))

    AUKN_SYM void WakeAllOnAddress(const void *pTargetAddress);

    AUKN_SYM void WakeOnAddress(const void *pTargetAddress);

    // WakeAllOnAddress with a uNMaximumThreads which may or may not be respected
    AUKN_SYM void WakeNOnAddress(const void *pTargetAddress,
                                 AuUInt8 uNMaximumThreads);

    // On systems with processors of shared execution pipelines, these try-series of operations will spin (eg: mm_pause) for a configurable 
    //  amount of time, or enter a low power mode, so long as the the process-wide state isn't overly contested. This means you can use these 
    //  arbitrarily without worrying about an accidental thundering mm_pause herd. If you wish to call WaitOnAddress[...] afterwards, you should  
    //  report you already spun via optAlreadySpun. If the application is configured to spin later on, this hint may be used to prevent a double spin.
    AUKN_SYM bool TryWaitOnAddress(const void *pTargetAddress,
                                   const void *pCompareAddress,
                                   AuUInt8 uWordSize);

    AUKN_SYM bool TryWaitOnAddressSpecial(EWaitMethod eMethod,
                                          const void *pTargetAddress,
                                          const void *pCompareAddress,
                                          AuUInt8 uWordSize);

    // On systems with processors of shared execution pipelines, these try-series of operations will spin (eg: mm_pause) for a configurable 
    //  amount of time, or enter a low power mode, so long as the the process-wide state isn't overly contested. This means you can use these 
    //  arbitrarily without worrying about an accidental thundering mm_pause herd. If you wish to call WaitOnAddress[...] afterwards, you should  
    //  report you already spun via optAlreadySpun. If the application is configured to spin later on, this hint may be used to prevent a double spin.
    //  In the case of a pTargetAddress != pCompareAddress condition, the optional check parameter is used to verify the wake condition.
    //  Otherwise, spinning will continue.
    AUKN_SYM bool TryWaitOnAddressEx(const void *pTargetAddress,
                                     const void *pCompareAddress,
                                     AuUInt8 uWordSize,
                                     const AuFunction<bool(const void *, const void *, AuUInt8)> &check);

    // See: TryWaitOnAddressEx
    AUKN_SYM bool TryWaitOnAddressSpecialEx(EWaitMethod eMethod,
                                            const void *pTargetAddress,
                                            const void *pCompareAddress,
                                            AuUInt8 uWordSize,
                                            const AuFunction<bool(const void *, const void *, AuUInt8)> &check);

    // Relative timeout variant of nanosecond resolution eNotEqual WoA. 0 = indefinite.
    // In Wrapper Mode, it is possible to bypass the WoA implementation, and bail straight into the kernel.
    // For improved order and EWaitMethod, do not use Wrapper Mode.
    AUKN_SYM bool WaitOnAddress(const void *pTargetAddress,
                                const void *pCompareAddress,
                                AuUInt8 uWordSize,
                                AuUInt64 qwNanoseconds,
                                AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
    
    // Relative timeout variant of nanosecond resolution WoA. 0 = indefinite
    // Emulation Mode over Wrapper Mode is recommended for applications that heavily depend on these wait functions.
    AUKN_SYM bool WaitOnAddressSpecial(EWaitMethod eMethod,
                                       const void *pTargetAddress,
                                       const void *pCompareAddress,
                                       AuUInt8 uWordSize,
                                       AuUInt64 qwNanoseconds,
                                       AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);

    // Absolute timeout variant of nanosecond resolution eNotEqual WoA. Nanoseconds are in steady clock time. 0 = indefinite
    // In Wrapper Mode, it is possible to bypass the WoA implementation, and bail straight into the kernel.
    // For improved order and EWaitMethod, do not use Wrapper Mode.
    AUKN_SYM bool WaitOnAddressSteady(const void *pTargetAddress,
                                      const void *pCompareAddress,
                                      AuUInt8 uWordSize,
                                      AuUInt64 qwNanoseconds,
                                      AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);

    // Absolute timeout variant of nanosecond resolution WoA. Nanoseconds are in steady clock time. 0 = indefinite
    // Emulation Mode over Wrapper Mode is recommended for applications that heavily depend on these wait functions.
    AUKN_SYM bool WaitOnAddressSpecialSteady(EWaitMethod eMethod,
                                             const void *pTargetAddress,
                                             const void *pCompareAddress,
                                             AuUInt8 uWordSize,
                                             AuUInt64 qwNanoseconds,
                                             AuOptional<bool> optAlreadySpun = {} /*hint: do not spin before switching. subject to global config.*/);
}