[*] Optimize >4 thread wakeups on <= Win7, under Semaphore

This commit is contained in:
Reece Wilson 2023-08-26 18:08:33 +01:00
parent 4b1e3fd1e5
commit 610f2c73a0

View File

@ -224,8 +224,12 @@ namespace Aurora::Threading::Primitives
} }
else else
{ {
this->mutex.Lock(); // do not remove // realistically, we cant use the sleep counter optimization trick under windows 7
AuAtomicAdd<AuUInt32>(&this->dwState_, count); // we would have to expand our already oversized by 8, 24-byte x86_64 semaphore for a trivial perf boost
// we cant efficiently access the conditions state or atomic guarantees...
this->mutex.Lock(); // do not [re]move this lock fence
AuAtomicAdd<AuUInt32>(&this->dwState_, count); // this could be moved anywhere above the unlock, including above the lock.
this->mutex.Unlock(); this->mutex.Unlock();
if (count == 1) if (count == 1)
@ -234,6 +238,15 @@ namespace Aurora::Threading::Primitives
} }
else else
{ {
if (count >= 3) // ...this is the only optimization we can hope to achieve
{
// we can always save a few cycles by doing an atomic broadcast on a contended semaphore
// waking up the wrong amount of threads probably doesn't matter at this point, on these target platforms
this->var.Broadcast();
}
else // ...otherwise, do the handshake just a few times
{
// doing the condvar handshake for the exact amount of threads you need, once the cond is contended, can pay off
for (AU_ITERATE_N(i, count)) for (AU_ITERATE_N(i, count))
{ {
(void)i; (void)i;
@ -242,6 +255,7 @@ namespace Aurora::Threading::Primitives
} }
} }
} }
}
void SemaphoreImpl::Unlock() void SemaphoreImpl::Unlock()
{ {