/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: ThreadPool.cpp Date: 2021-10-30 Author: Reece ***/ #include #include "Async.hpp" #include "ThreadPool.hpp" #include "AsyncApp.hpp" #include "WorkItem.hpp" #include "AuSchedular.hpp" #include "ThreadWorkerQueueShim.hpp" #include "IAsyncRunnable.hpp" #include "AuAsyncFuncRunnable.hpp" #include "AuAsyncFuncWorker.hpp" namespace Aurora::Async { //STATIC_TLS(WorkerId_t, tlsWorkerId); static thread_local AuWPtr tlsCurrentThreadPool; inline auto GetWorkerInternal(const AuSPtr &pool) { if (pool.get() == AuStaticCast(gAsyncApp)) { return AuUnsafeRaiiToShared(AuStaticCast(gAsyncApp)); } return AuStaticPointerCast(pool); } AUKN_SYM WorkerPId_t GetCurrentWorkerPId() { auto lkPool = tlsCurrentThreadPool.lock(); if (!lkPool) { return {}; } auto cpy = *lkPool->tlsWorkerId; if (auto pPool = AuTryLockMemoryType(cpy.pool)) { return WorkerPId_t(pPool, cpy); } else { return {}; } } // ThreadPool::ThreadPool() : shutdownEvent_(false, false, true) { this->pRWReadView = this->rwlock_->AsReadable(); } // internal pool interface bool ThreadPool::WaitFor(WorkerId_t unlocker, const AuSPtr &primitive, AuUInt32 timeoutMs) { return WaitFor(WorkerPId_t { AuAsync::GetCurrentWorkerPId().GetPool(), unlocker }, primitive, timeoutMs); } bool ThreadPool::WaitFor(WorkerPId_t unlocker, const AuSPtr &primitive, AuUInt32 timeoutMs) { AuUInt64 uEndTimeNS = timeoutMs ? AuTime::SteadyClockNS() + AuMSToNS(timeoutMs) : 0; if (auto pCurThread = GetThreadState()) { bool bStat {}; { bStat = !bool(unlocker); } if (!bStat) { // old shid (to clean up) bool bWorkerIdMatches = (unlocker.second == pCurThread->thread.id.second) || ((unlocker.second == Async::kThreadIdAny) && (GetThreadWorkersCount(unlocker.first) == 1)); bStat = (unlocker.first == pCurThread->thread.id.first) && (unlocker.GetPool().get() == this) && (bWorkerIdMatches); } if (bStat) { while (true) { AuUInt32 didntAsk; bool bTimedOut {}; if (primitive->TryLock()) { return true; } this->InternalRunOne(pCurThread, true, false, didntAsk); if (uEndTimeNS) { bTimedOut = AuTime::SteadyClockNS() >= uEndTimeNS; } if (primitive->TryLock()) { return true; } if (!AuAtomicLoad(&this->shuttingdown_) || bTimedOut) { return false; } } } } { AuSPtr pHandle; if (auto pPool = unlocker) { auto pPoolEx = AuStaticCast(unlocker.GetPool()); AU_LOCK_GLOBAL_GUARD(pPoolEx->rwlock_->AsReadable()); auto pShutdownLock = Aurora::Threading::GetShutdownReadLock(); if ((pHandle = AuStaticCast(unlocker.GetPool())->GetThreadHandle(unlocker))) { AU_LOCK_GUARD(pHandle->externalFencesLock); if (pHandle->exitingflag2) { pShutdownLock->Unlock(); bool bRet = primitive->TryLock(); pShutdownLock->Lock(); return bRet; } else { pHandle->externalFences.push_back(primitive.get()); } } else if (unlocker.GetPool().get() == this) { pShutdownLock->Unlock(); bool bRet = primitive->LockMS(timeoutMs); pShutdownLock->Lock(); return bRet; } } bool bRet = primitive->LockAbsNS(uEndTimeNS); if (pHandle) { AU_LOCK_GLOBAL_GUARD(pHandle->externalFencesLock); AuTryRemove(pHandle->externalFences, primitive.get()); } return bRet; } } void ThreadPool::Run(WorkerId_t target, AuSPtr runnable) { return this->Run(target, runnable, true); } void ThreadPool::Run(WorkerId_t target, AuSPtr runnable, bool bIncrement) { AuSPtr pWorker; auto pGroupState = GetGroup(target.first); SysAssert(static_cast(pGroupState), "couldn't dispatch a task to an offline group"); if (target.second != Async::kThreadIdAny) { pWorker = pGroupState->GetThreadByIndex(target.second); if (!pWorker) { runnable->CancelAsync(); return; } if (pWorker->shutdown.bDropSubmissions) { runnable->CancelAsync(); return; } } if (bIncrement) { AuAtomicAdd(&this->uAtomicCounter, 1u); } pGroupState->workQueue.AddWorkEntry(AuMakePair(target.second, runnable)); if (target.second == Async::kThreadIdAny) { pGroupState->SignalAll(); } else { pWorker->sync.SetEvent(true, true); } } IThreadPool *ThreadPool::ToThreadPool() { return this; } // ithreadpool size_t ThreadPool::GetThreadWorkersCount(ThreadGroup_t group) { return GetGroup(group)->workers.size(); } void ThreadPool::SetRunningMode(bool eventRunning) { this->runnersRunning_ = eventRunning; } bool ThreadPool::Spawn(WorkerId_t workerId) { return Spawn(workerId, false); } bool ThreadPool::Create(WorkerId_t workerId) { return Spawn(workerId, true); } bool ThreadPool::InRunnerMode() { return this->runnersRunning_; } #define ASYNC_THREADGROUP_TLS_PREP \ auto &weakTlsHandle = tlsCurrentThreadPool; \ auto tlsHandle = AuTryLockMemoryType(weakTlsHandle); \ if (!tlsHandle || tlsHandle.get() != this) \ { \ weakTlsHandle = AuSharedFromThis(); \ } #define ASYNC_THREADGROUP_TLS_UNSET \ if (tlsHandle && tlsHandle.get() != this) \ { \ tlsCurrentThreadPool = weakTlsHandle; \ } bool ThreadPool::Poll() { AuUInt32 uCount {}; ASYNC_THREADGROUP_TLS_PREP; auto bRet = InternalRunOne(GetThreadStateLocal(), false, false, uCount); ASYNC_THREADGROUP_TLS_UNSET; return bRet; } bool ThreadPool::RunOnce() { AuUInt32 uCount {}; ASYNC_THREADGROUP_TLS_PREP; auto bRet = InternalRunOne(GetThreadStateLocal(), true, false, uCount); ASYNC_THREADGROUP_TLS_UNSET; return bRet; } bool ThreadPool::Run() { bool ranOnce {}; ASYNC_THREADGROUP_TLS_PREP; auto pJobRunner = GetThreadStateLocal(); if (!pJobRunner) { ASYNC_THREADGROUP_TLS_UNSET; this->shutdownEvent_->LockMS(0); return true; } auto auThread = AuThreads::GetThread(); while ((!auThread->Exiting()) && ((AuAtomicLoad(&this->shuttingdown_) & 2) != 2) && (!pJobRunner->shutdown.bBreakMainLoop)) { AuUInt32 uCount {}; // Do work (blocking) if (!InternalRunOne(pJobRunner, true, true, uCount)) { if ((AuAtomicLoad(&this->shuttingdown_) & 2) == 2) { return ranOnce; } } ranOnce = true; } ASYNC_THREADGROUP_TLS_UNSET; return ranOnce; } bool ThreadPool::InternalRunOne(AuSPtr state, bool block, bool bUntilWork, AuUInt32 &uCount) { bool bSuccess {}; if (!state) { SysPushErrorUninitialized("Not an async thread"); return false; } EarlyExitTick(); { auto asyncLoop = state->asyncLoop; asyncLoop->OnFrame(); if (asyncLoop->GetSourceCount() > 1) { if (block) { asyncLoop->WaitAny(0); } else { asyncLoop->PumpNonblocking(); } bSuccess = PollInternal(state, false, bUntilWork, uCount); } else { bSuccess = PollInternal(state, block, bUntilWork, uCount); } } EarlyExitTick(); return bSuccess; } #if defined(__AUHAS_COROUTINES_CO_AWAIT) && defined(AU_LANG_CPP_20_) AuVoidTask ThreadPool::PollInternal_ForceCoRoutine(AuSPtr state, bool block, bool bUntilWork, AuUInt32 &uCount, bool &bRet) { bRet = PollInternal_Base(state, block, bUntilWork, uCount); co_return; } #endif bool ThreadPool::PollInternal(AuSPtr state, bool block, bool bUntilWork, AuUInt32 &uCount) { #if defined(__AUHAS_COROUTINES_CO_AWAIT) && defined(AU_LANG_CPP_20_) if (state->stackState.uStackCallDepth && gRuntimeConfig.async.bEnableCpp20RecursiveCallstack) { bool bRet {}; PollInternal_ForceCoRoutine(state, block, bUntilWork, uCount, bRet); return bRet; } #endif return PollInternal_Base(state, block, bUntilWork, uCount); } bool ThreadPool::PollInternal_Base(AuSPtr state, bool block, bool bUntilWork, AuUInt32 &uCount) { if (!state) { SysPushErrorUninitialized("Not an async thread"); return false; } auto group = state->parent.lock(); { AU_LOCK_GUARD(state->sync.cvWorkMutex); do { bool bFailedOOM = group->workQueue.Dequeue(state->pendingWorkItems, state->stackState.uWorkMultipopCount, state->thread.id.second); state->sync.UpdateCVState(state.get()); // Consider blocking for more work if (!block) { break; } // OOM: hardened: sleep for 0.01MS if the heap for task dequeue is full. // Until the mixed heap object is implemented, we can only dequeue 2^16 tasks globally at a time into a reserved heap. if (!bFailedOOM) { if (state->pendingWorkItems.empty()) { AuThreading::SleepNs(10'000); continue; } else { break; } } // Block if no work items are present if (state->pendingWorkItems.empty()) { // pre-wakeup thread terminating check if (state->thread.pThread->Exiting()) { break; } if (AuAtomicLoad(&this->shuttingdown_) & 2) { break; } // OOM: hardened: do not sleep after OOM re-try if (group->workQueue.IsEmpty(this, state->thread.id)) { state->sync.cvVariable->WaitForSignal(); } if (AuAtomicLoad(&this->shuttingdown_) & 2) { break; } // Post-wakeup thread terminating check if (state->thread.pThread->Exiting()) { break; } } if (state->pendingWorkItems.empty() && ( (this->GetThreadState()->asyncLoop->GetSourceCount() > 1) || this->GetThreadState()->asyncLoop->CommitPending())) //(this->ToKernelWorkQueue()->IsSignaledPeek())) { return false; } } while (state->pendingWorkItems.empty() && (block && bUntilWork)); if (!block && !(this->shuttingdown_ & 2)) // quick hack: is worthy of io reset by virtue of having polled externally (most likely for IO ticks, unlikely for intraprocess ticks) { AU_LOCK_GLOBAL_GUARD(group->workQueue.mutex); // dont atomically increment our work counters [signal under mutex group]... AU_LOCK_GUARD(group->workersMutex); // dont atomically increment our work counters [broadcast]... // ...these primitives are far less expensive to hit than resetting kernel primitives // AU_LOCK_GUARD(state->cvWorkMutex) used to protect us if (group->workQueue.IsEmpty(this, state->thread.id)) { state->sync.eventLs->Reset(); // ...until we're done AuAtomicStore(&state->sync.cvLSActive, 0u); } } } if (state->pendingWorkItems.empty()) { if (InRunnerMode()) { if ((AuAtomicLoad(&this->uAtomicCounter) == 0) && this->IsDepleted(state)) { Shutdown(); } } return false; } int runningTasks {}; auto uStartCookie = state->stackState.uStackCookie++; // Account for // while (AuAsync.GetCurrentPool()->runForever()); // in the first task (or deeper) if (InRunnerMode() && state->stackState.uStackCallDepth) // are we one call deep? { if ((AuAtomicLoad(&this->uAtomicCounter) == state->stackState.uStackCallDepth) && this->IsDepleted(state)) { return false; } } // for (auto itr = state->pendingWorkItems.begin(); itr != state->pendingWorkItems.end(); ) { if (state->thread.pThread->Exiting()) { break; } // Dispatch auto oops = itr->second; // Remove from our local job queue itr = state->pendingWorkItems.erase(itr); state->stackState.uStackCallDepth++; //SysBenchmark(fmt::format("RunAsync: {}", block)); // Dispatch if (oops) { oops->RunAsync(); } uCount++; // Atomically decrement global task counter runningTasks = AuAtomicSub(&this->uAtomicCounter, 1u); state->stackState.uStackCallDepth--; if (uStartCookie != state->stackState.uStackCookie) { uStartCookie = state->stackState.uStackCookie; itr = state->pendingWorkItems.begin(); } } // Return popped work back to the groups work pool when our -pump loops were preempted if (state->pendingWorkItems.size()) { AU_LOCK_GUARD(state->sync.cvWorkMutex); for (const auto &item : state->pendingWorkItems) { group->workQueue.AddWorkEntry(item); } state->pendingWorkItems.clear(); state->sync.cvVariable->Broadcast(); state->sync.eventLs->Set(); } // Account for // while (AuAsync.GetCurrentPool()->runForever()); // in the top most task if (InRunnerMode()) { if ((runningTasks == 0) && (AuAtomicLoad(&this->uAtomicCounter) == 0) && this->IsDepleted(state)) { Shutdown(); } } return true; } // While much of this subsystem needs good rewrite, under no circumstance should the shutdown process be "simpified" or "cleaned up" // This is our expected behaviour. Any changes will likely introduce hard to catch bugs across various softwares and exit conditions. void ThreadPool::Shutdown() { AU_DEBUG_MEMCRUNCH; auto trySelfPid = AuAsync::GetCurrentWorkerPId(); // Update shutting down flag // Specify the root-level shutdown flag for 'ok, u can work, but you're shutting down soon [microseconds, probably]' { if (AuAtomicTestAndSet(&this->shuttingdown_, 0) != 0) { return; } } auto pLocalRunner = this->GetThreadStateNoWarn(); AuList toBarrier; // wait for regular prio work to complete { for (auto pGroup : this->threadGroups_) { if (!pGroup) { continue; } AU_LOCK_GLOBAL_GUARD(pGroup->workersMutex); for (auto &[id, worker] : pGroup->workers) { if (trySelfPid == worker->thread.id) { continue; } toBarrier.push_back(worker->thread.id); } } for (const auto &id : toBarrier) { if (trySelfPid == id) { continue; } this->Barrier(id, 0, false, false /* no reject*/); // absolute safest point in shutdown; sync to already submitted work } } // increment abort cookies { for (const auto &id : toBarrier) { if (trySelfPid == id) { continue; } AuAtomicAdd(&this->uAtomicShutdownCookie, 1u); } } // set shutdown flags { AuAtomicTestAndSet(&this->shuttingdown_, 1); } // Finally set the shutdown flag on all of our thread contexts // then release them from the runners/workers list // then release all group contexts AuList threads; AuList> states; { AU_LOCK_GLOBAL_GUARD(this->pRWReadView); for (auto pGroup : this->threadGroups_) { if (!pGroup) { continue; } for (auto &[id, pState] : pGroup->workers) { // main loop: if (pState) { states.push_back(pState); pState->shuttingdown = true; } else { pState->shuttingdown = true; } // thread object: if (pState->thread.bOwnsThread) { pState->thread.pThread->SendExitSignal(); threads.push_back(pState->thread.pThread); } // unrefreeze signals: auto &event = pState->running; if (event) { event->Set(); } } } } // Break all condvar loops, just in case for (const auto &pState : states) { pState->sync.SetEvent(); } // Final sync to exit { for (const auto &id : toBarrier) { if (trySelfPid == id) { continue; } auto handle = this->GetThreadHandle(id); if (handle) { handle->shutdown.bDropSubmissions = false; handle->isDeadEvent->LockMS(250); } } } // Sync to shutdown threads to prevent a race condition whereby the async subsystem shuts down before the threads auto pSelf = AuThreads::GetThread(); for (const auto &thread : threads) { if (thread.get() != pSelf) { thread->Terminate(); } } // Is dead flag this->shutdownEvent_->Set(); // if (pLocalRunner) { pLocalRunner->shutdown.bIsKillerThread = true; } // Notify observing threads of our work exhaustion for (const auto &wOther : this->listWeakDepsParents_) { if (auto pThat = AuTryLockMemoryType(wOther)) { if (pThat->InRunnerMode()) { continue; } if (!pThat->IsSelfDepleted(nullptr)) { continue; } if (pThat->uAtomicCounter) { continue; } pThat->Shutdown(); } } } bool ThreadPool::Exiting() { return this->shuttingdown_ || GetThreadState()->exiting; } AuUInt32 ThreadPool::PollAndCount(bool bStrict) { AuUInt32 uCount {}; ASYNC_THREADGROUP_TLS_PREP; auto bRanAtLeastOne = this->InternalRunOne(this->GetThreadStateNoWarn(), false, false, uCount); ASYNC_THREADGROUP_TLS_UNSET; return uCount ? uCount : (bStrict ? bRanAtLeastOne : 0); } AuUInt32 ThreadPool::RunAllPending() { AuUInt32 uCount {}; AuUInt32 uCountTotal {}; ASYNC_THREADGROUP_TLS_PREP; do { uCount = 0; (void)this->InternalRunOne(this->GetThreadStateNoWarn(), false, true, uCount); uCountTotal += uCount; } while (uCount); ASYNC_THREADGROUP_TLS_UNSET; return uCountTotal; } AuSPtr ThreadPool::NewWorkItem(const WorkerId_t &worker, const AuSPtr &task) { // Error pass-through if (!task) { return {}; } return AuMakeShared(this, WorkerPId_t { this->SharedFromThis(), worker }, task); } AuSPtr ThreadPool::NewWorkFunction(const WorkerId_t &worker, AuVoidFunc callback) { SysAssert(callback); return AuMakeShared(this, WorkerPId_t { this->SharedFromThis(), worker }, AuMove(callback)); } AuSPtr ThreadPool::NewFence() { return AuMakeShared(this, AuAsync::GetCurrentWorkerPId(), AuSPtr{}); } AuThreads::ThreadShared_t ThreadPool::ResolveHandle(WorkerId_t id) { auto pState = GetThreadHandle(id); if (!pState) { return {}; } return pState->thread.pThread; } AuBST> ThreadPool::GetThreads() { AU_DEBUG_MEMCRUNCH; AuBST> ret; for (auto pGroup : this->threadGroups_) { AuList workers; if (!pGroup) { continue; } AU_LOCK_GUARD(pGroup->workersMutex); AuTryReserve(workers, pGroup->workers.size()); for (const auto &thread : pGroup->workers) { workers.push_back(thread.second->thread.id.second); } ret[pGroup->group] = workers; } return ret; } WorkerId_t ThreadPool::GetCurrentThread() { return tlsWorkerId; } AuSPtr ThreadPool::GetIOProcessor(WorkerId_t id) { if (auto pState = this->GetThreadHandle(id)) { return pState->singletons.GetIOProcessor({ this->SharedFromThis(), id }); } return {}; } AuSPtr ThreadPool::GetIOGroup(WorkerId_t id) { if (auto pState = this->GetThreadHandle(id)) { return pState->singletons.GetIOGroup({ this->SharedFromThis(), id }); } return {}; } AuSPtr ThreadPool::GetIONetInterface(WorkerId_t id) { if (auto pState = this->GetThreadHandle(id)) { return pState->singletons.GetIONetInterface({ this->SharedFromThis(), id }); } return {}; } AuSPtr ThreadPool::GetIONetWorker(WorkerId_t id) { if (auto pState = this->GetThreadHandle(id)) { return pState->singletons.GetIONetWorker({ this->SharedFromThis(), id }); } return {}; } bool ThreadPool::Sync(WorkerId_t workerId, AuUInt32 timeoutMs, bool requireSignal) { //AU_LOCK_GUARD(this->pRWReadView); auto currentWorkerId = GetCurrentThread().second; if (workerId.second == Async::kThreadIdAny) { decltype(GroupState::workers) workers; { AU_LOCK_GLOBAL_GUARD(this->pRWReadView); if (auto pGroup = GetGroup(workerId.first)) { workers = pGroup->workers; } } for (auto &jobWorker : workers) { if (!Barrier(jobWorker.second->thread.id, timeoutMs, requireSignal && jobWorker.second->thread.id.second != currentWorkerId, false)) // BAD!, should subtract time elapsed, clamp to, i dunno, 5ms min? { return false; } } } else { return Barrier(workerId, timeoutMs, requireSignal && workerId.second != currentWorkerId, false); } return true; } void ThreadPool::Signal(WorkerId_t workerId) { auto group = GetGroup(workerId.first); if (workerId.second == Async::kThreadIdAny) { AU_LOCK_GLOBAL_GUARD(group->workersMutex); for (auto &jobWorker : group->workers) { jobWorker.second->running->Set(); } } else if (auto pThread = GetThreadHandle(workerId)) { pThread->running->Set(); } } void ThreadPool::Wakeup(WorkerId_t workerId) { auto group = GetGroup(workerId.first); if (workerId.second == Async::kThreadIdAny) { group->SignalAll(false); } else if (auto pThread = GetThreadHandle(workerId)) { pThread->sync.SetEvent(true, false); } } AuSPtr ThreadPool::WorkerToLoopSource(WorkerId_t workerId) { auto a = GetThreadHandle(workerId); if (!a) { return {}; } return a->sync.eventLs; } void ThreadPool::SyncAllSafe() { AU_LOCK_GLOBAL_GUARD(this->pRWReadView); for (auto pGroup : this->threadGroups_) { if (!pGroup) { continue; } for (auto &jobWorker : pGroup->workers) { SysAssert(Barrier(jobWorker.second->thread.id, 0, false, false)); } } } void ThreadPool::AddFeature(WorkerId_t id, AuSPtr pFeature, bool bNonBlock) { auto pWorkItem = DispatchOn({ this->SharedFromThis(), id }, [=]() { auto pState = GetThreadState(); { AU_LOCK_GUARD(pState->tlsFeatures.mutex); pState->tlsFeatures.features.push_back(pFeature); } pFeature->Init(); }); if (!bNonBlock) { pWorkItem->BlockUntilComplete(); } } void ThreadPool::AssertInThreadGroup(ThreadGroup_t group) { SysAssert(static_cast(tlsWorkerId).first == group); } void ThreadPool::AssertWorker(WorkerId_t id) { SysAssert(static_cast(tlsWorkerId) == id); } AuSPtr ThreadPool::ToKernelWorkQueue() { return this->GetThreadState()->asyncLoop; } AuSPtr ThreadPool::ToKernelWorkQueue(WorkerId_t workerId) { auto worker = this->GetThreadHandle(workerId); if (!worker) { SysPushErrorGeneric("Couldn't find requested worker"); return {}; } return worker->asyncLoop; } bool ThreadPool::IsSelfDepleted(const AuSPtr &pState) { AuSPtr pLoopQueue; if (pState) { pLoopQueue = pState->asyncLoop; } else { if (auto pLocalThread = this->GetThreadStateLocal()) { pLoopQueue = pLocalThread->asyncLoop; } } if (!pLoopQueue) { return true; } return pLoopQueue->GetSourceCount() <= 1 + this->uAtomicIOProcessorsWorthlessSources + this->uAtomicIOProcessors; } bool ThreadPool::IsDepleted(const AuSPtr &state) { if (!IsSelfDepleted(state)) { return false; } for (const auto &wOther : this->listWeakDeps_) { if (auto pThat = AuTryLockMemoryType(wOther)) { if (!pThat->IsSelfDepleted(nullptr)) { return false; } if (AuAtomicLoad(&pThat->uAtomicCounter)) { return false; } } } return true; } void ThreadPool::AddDependency(AuSPtr pPool) { if (!pPool) { return; } auto pOther = AuStaticCast(pPool); this->listWeakDeps_.push_back(pOther); pOther->listWeakDepsParents_.push_back(AuSharedFromThis()); } AuSPtr ThreadPool::GetShutdownEvent() { return AuSPtr(AuSharedFromThis(), this->shutdownEvent_.AsPointer()); } // Unimplemented fiber hooks, 'twas used for science. no longer in use int ThreadPool::CtxPollPush() { // TOOD (Reece): implement a context switching library // Refer to the old implementation of this on pastebin return 0; } void ThreadPool::CtxPollReturn(const AuSPtr &state, int status, bool hitTask) { } bool ThreadPool::CtxYield() { bool ranAtLeastOne = false; // !!! auto pA = this->GetThreadStateNoWarn(); if (AuAtomicLoad(&this->shuttingdown_) & 2) // fast { if (pA->shutdown.bDropSubmissions) { return false; } } AuUInt32 uCount {}; #if 1 return this->InternalRunOne(pA, false, false, uCount); #else do { uCount = 0; ranAtLeastOne |= this->InternalRunOne(pA, false, uCount); } while (uCount); return uCount || ranAtLeastOne; #endif } // void ThreadPool::IncrementAbortFenceOnPool() { AuAtomicAdd(&this->uAtomicShutdownCookie, 1u); } void ThreadPool::IncrementAbortFenceOnWorker(WorkerId_t workerId) { auto group = GetGroup(workerId.first); if (workerId.second == kThreadIdAny) { AU_LOCK_GLOBAL_GUARD(group->workersMutex); for (auto &[jobWorker, pState]: group->workers) { AuAtomicAdd(&pState->shutdown.uShutdownFence, 1u); } } else { if (auto pState = this->GetThreadHandle(workerId)) { AuAtomicAdd(&pState->shutdown.uShutdownFence, 1u); } } } AuUInt64 ThreadPool::QueryAbortFence(AuOptional optWorkerId) { if (auto pState = this->GetThreadHandle(optWorkerId.value_or(GetCurrentWorkerPId()))) { return (AuUInt64(pState->shutdown.uShutdownFence) << 32ull) | AuUInt64(this->uAtomicShutdownCookie); } else { return this->uAtomicShutdownCookie; } } bool ThreadPool::QueryShouldAbort(AuOptional optWorkerId, AuUInt64 uFenceMagic) { auto uSelfCookie = AuBitsToLower(uFenceMagic); if (uSelfCookie != AuAtomicLoad(&this->uAtomicShutdownCookie)) { return true; } auto uThreadCookie = AuBitsToHigher(uFenceMagic); if (!uThreadCookie) { return false; } if (auto pState = this->GetThreadHandle(optWorkerId.value_or(GetCurrentWorkerPId()))) { return uThreadCookie != pState->shutdown.uShutdownFence; } else { return false; } } // internal api bool ThreadPool::Spawn(WorkerId_t workerId, bool create) { AU_LOCK_GLOBAL_GUARD(this->rwlock_->AsWritable()); if (create) { tlsCurrentThreadPool = AuSharedFromThis(); } AuSPtr pGroup; // Try fetch or allocate group { if (!(pGroup = threadGroups_[workerId.first])) { pGroup = AuMakeShared(); if (!pGroup->Init()) { SysPushErrorMemory("Not enough memory to intiialize a new group state"); return false; } pGroup->group = workerId.first; this->threadGroups_[workerId.first] = pGroup; } } // Assert worker does not already exist { AuSPtr* ret; if (AuTryFind(pGroup->workers, workerId.second, ret)) { SysPushErrorGeneric("Thread ID already exists"); return false; } } auto pThreadState = pGroup->CreateWorker(workerId, create); if (!pThreadState) { return {}; } if (this->pHeap) { AuSetAllocator(pThreadState->pendingWorkItems, this->pHeap.get()); } if (!create) { pThreadState->thread.pThread= AuThreads::ThreadShared(AuThreads::ThreadInfo( AuMakeShared(AuThreads::IThreadVectorsFunctional::OnEntry_t(std::bind(&ThreadPool::Entrypoint, this, workerId)), AuThreads::IThreadVectorsFunctional::OnExit_t{}), gRuntimeConfig.async.threadPoolDefaultStackSize )); if (!pThreadState->thread.pThread) { return {}; } pThreadState->thread.pThread->Run(); } else { pThreadState->thread.pThread = AuSPtr(AuThreads::GetThread(), [](AuThreads::IAuroraThread *){}); // TODO: this is just a hack // we should implement this properly pThreadState->thread.pThread->AddLastHopeTlsHook(AuMakeShared([]() -> void { }, []() -> void { auto pid = GetCurrentWorkerPId(); if (pid) { GetWorkerInternal(pid.GetPool())->ThisExiting(); } })); tlsCurrentThreadPool = AuWeakFromThis(); tlsWorkerId = WorkerPId_t(AuSharedFromThis(), workerId); } pGroup->AddWorker(workerId.second, pThreadState); return true; } // private api AU_NOINLINE bool ThreadPool::Barrier(WorkerId_t workerId, AuUInt32 ms, bool requireSignal, bool drop) { auto self = GetThreadState(); if (!self) { return {}; } auto &semaphore = self->syncSema; auto unsafeSemaphore = semaphore.AsPointer(); bool failed {}; auto work = AuMakeShared( [=]() { auto state = GetThreadState(); if (drop) { state->shutdown.bDropSubmissions = true; } if (requireSignal) { state->running->Reset(); } unsafeSemaphore->Unlock(1); if (requireSignal) { state->running->Lock(); } }, [&]() { unsafeSemaphore->Unlock(1); failed = true; } ); if (!work) { return false; } Run(workerId, work); return WaitFor(workerId, AuUnsafeRaiiToShared(semaphore.AsPointer()), ms) && !failed; } void ThreadPool::Entrypoint(WorkerId_t id) { { AU_LOCK_GLOBAL_GUARD(this->pRWReadView); } tlsCurrentThreadPool = AuWeakFromThis(); tlsWorkerId = WorkerPId_t(AuSharedFromThis(), id); auto job = GetThreadState(); Run(); if (id != WorkerId_t {0, 0}) { AU_LOCK_GLOBAL_GUARD(this->pRWReadView); if (!AuAtomicLoad(&this->shuttingdown_) && !job->shutdown.bDropSubmissions) { // Pump and barrier + reject all after atomically Barrier(id, 0, false, true); } } ThisExiting(); if (id == WorkerId_t {0, 0}) { CleanWorkerPoolReservedZeroFree(); } } void ThreadPool::EarlyExitTick() { if ((AuAtomicLoad(&this->shuttingdown_) & 2) != 2) { return; } auto jobWorker = GetThreadState(); auto state = jobWorker->parent.lock(); if (!jobWorker) { SysPushErrorUninitialized("Not an async thread"); return; } state->SignalAll(); { if (AuExchange(jobWorker->bAlreadyDoingExitTick, true)) { return; } AuUInt32 uCount {}; do { uCount = 0; this->PollInternal(jobWorker, false, false, uCount); } while (uCount); } AuList> features; { AU_LOCK_GUARD(jobWorker->tlsFeatures.mutex); features = AuExchange(jobWorker->tlsFeatures.features, {}); } { for (const auto &thread : features) { try { thread->Cleanup(); } catch (...) { SysPushErrorCatch("Couldn't clean up thread feature!"); } } jobWorker->isDeadEvent->Set(); jobWorker->bAlreadyDoingExitTick = false; jobWorker->shutdown.bBreakMainLoop = true; } } void ThreadPool::ThisExiting() { AU_DEBUG_MEMCRUNCH; auto id = GetCurrentThread(); auto state = GetGroup(id.first); auto pLocalState = state->GetThreadByIndex(id.second); AuList> features; { AU_LOCK_GLOBAL_GUARD(this->pRWReadView); pLocalState->isDeadEvent->Set(); CleanUpWorker(id); TerminateSceduledTasks(this, id); pLocalState->syncSema->Unlock(10); // prevent ::Barrier dead-locks { AU_LOCK_GUARD(pLocalState->externalFencesLock); pLocalState->exitingflag2 = true; for (const auto &pIWaitable : pLocalState->externalFences) { pIWaitable->Unlock(); } pLocalState->externalFences.clear(); } { AU_LOCK_GUARD(pLocalState->tlsFeatures.mutex); features = AuExchange(pLocalState->tlsFeatures.features, {}); } } { for (const auto &thread : features) { try { thread->Cleanup(); } catch (...) { SysPushErrorConcurrentRejected("Couldn't clean up thread feature!"); } } features.clear(); } { state->Decommit(id.second); } pLocalState->Deinit(); } AuSPtr ThreadPool::GetGroup(ThreadGroup_t type) { return this->threadGroups_[type]; } AuSPtr ThreadPool::GetThreadState() { auto thread = tlsCurrentThreadPool.lock(); if (!thread) { return {}; } #if defined(AU_CFG_ID_INTERNAL) || defined(AU_CFG_ID_DEBUG) if (thread.get() != this) { SysPushErrorGeneric("wrong thread"); return {}; } #endif auto worker = *tlsWorkerId; auto state = GetGroup(worker.first); if (!state) { return {}; } return state->GetThreadByIndex(worker.second); } AuSPtr ThreadPool::GetThreadStateNoWarn() { auto thread = tlsCurrentThreadPool.lock(); if (!thread) { return {}; } if (thread.get() != this) { return {}; } auto worker = *tlsWorkerId; auto state = GetGroup(worker.first); if (!state) { return {}; } return state->GetThreadByIndex(worker.second); } AuSPtr ThreadPool::GetThreadStateLocal() { auto worker = *tlsWorkerId; if (auto pSelf = AuTryLockMemoryType(worker.pool)) { auto state = GetGroup(worker.first); if (!state) { return {}; } return state->GetThreadByIndex(worker.second); } else { return {}; } } AuSPtr ThreadPool::GetThreadHandle(WorkerId_t id) { auto group = GetGroup(id.first); if (!group) { return {}; } return group->GetThreadByIndex(id.second); } AuList> ThreadPool::GetThreadHandles(WorkerId_t id) { auto group = GetGroup(id.first); if (!group) { return {}; } AuList> ret; if (id.second != Async::kThreadIdAny) { if (auto pPtr = group->GetThreadByIndex(id.second)) { ret.push_back(pPtr); } } else { AU_LOCK_GLOBAL_GUARD(group->workersMutex); for (const auto &[key, value] : group->workers) { ret.push_back(value); } } return AuMove(ret); } AUKN_SYM AuSPtr NewThreadPool() { // apps that don't require async shouldn't be burdened with the overhead of this litl spiner StartSched(); return AuMakeShared(); } }