[+] Zen3 on top of AlderLake optimizations

[*] Minor alderlake adjustments
This commit is contained in:
Reece Wilson 2024-05-05 19:42:10 +01:00
parent 459a9a789b
commit f3ba901f71
3 changed files with 125 additions and 24 deletions

View File

@ -111,12 +111,43 @@ namespace Aurora::Threading
{
if (uEndTimeSteadyNS2 - uNowNS <= 100000ull)
{
for (AU_ITERATE_N(i, 32))
auto uNow = AuAtomicAdd(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
if (!AuThreadPrimitives::gSpinAdaptiveThreshold || uNow <= AuThreadPrimitives::gSpinAdaptiveThreshold)
{
AuThreadPrimitives::SMPPause();
}
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
if (AuThreadPrimitives::ThrdCfg::gIsIntelAlderLakeOrGreater)
{
_tpause(0, 10000);
}
else
#endif
{
#if 0
for (AU_ITERATE_N(i, 32))
{
AuThreadPrimitives::SMPPause();
}
#else
// shit compiler wont unwrap
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
// 32 or 16?
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
#endif
}
continue;
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
continue;
}
else
{
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
}
}
}
@ -131,12 +162,44 @@ namespace Aurora::Threading
{
if (AuThreadPrimitives::ThrdCfg::gPlatformIsSMPProcessorOptimized)
{
for (AU_ITERATE_N(i, 32))
auto uNow = AuAtomicAdd(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
if (!AuThreadPrimitives::gSpinAdaptiveThreshold || uNow <= AuThreadPrimitives::gSpinAdaptiveThreshold)
{
AuThreadPrimitives::SMPPause();
}
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
if (AuThreadPrimitives::ThrdCfg::gIsIntelAlderLakeOrGreater)
{
_tpause(0, 1000);
}
else
#endif
{
#if 0
for (AU_ITERATE_N(i, 32))
{
AuThreadPrimitives::SMPPause();
}
#else
// shit compiler wont unwrap
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
// 32 or 16?
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
#endif
}
continue;
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
continue;
}
else
{
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
break;
}
}
else
{

View File

@ -101,8 +101,14 @@ namespace Aurora::Threading::Primitives
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
{
#if 0
auto cpuId = AuHwInfo::cpuid(7);
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
#else
auto &cpuId = AuHwInfo::GetCPUInfo().cpuId;
ThrdCfg::gIsIntelAlderLakeOrGreater = AuBitTest(cpuId.f_7_ECX, 5);
ThrdCfg::gIsZen3OrGreater = AuBitTest(cpuId.f_81_ECX, 29);
#endif
}
#endif

View File

@ -85,6 +85,7 @@ namespace Aurora::Threading::Primitives
inline bool gAlwaysRWLockWriteBiasOnReadLock {};
inline bool gEnableRWLockWriteBiasOnReadLock {};
inline AuUInt32 gIsIntelAlderLakeOrGreater {};
inline AuUInt32 gIsZen3OrGreater {};
inline AuUInt8 gCountOfPCores {};
}
@ -204,7 +205,7 @@ namespace Aurora::Threading::Primitives
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
if (uNow <= gSpinAdaptiveThreshold)
if (!gSpinAdaptiveThreshold || uNow <= gSpinAdaptiveThreshold)
{
auto uCount = spin;
@ -217,22 +218,39 @@ namespace Aurora::Threading::Primitives
uCount += tlsSpinCountLocal;
}
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
if (ThrdCfg::gIsIntelAlderLakeOrGreater)
{
if (callback())
{
bRet = true;
break;
}
else
{
SMPPause();
uCount--;
_umwait(1, __rdtsc() + uCount);
bRet = callback();
}
}
else
#endif
{
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
bRet = true;
break;
}
else
{
SMPPause();
uCount--;
}
}
}
}
@ -279,7 +297,7 @@ namespace Aurora::Threading::Primitives
if (gHasThreadLocalTimeout)
{
auto uCount = tlsSpinCountLocal;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
@ -304,7 +322,7 @@ namespace Aurora::Threading::Primitives
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
{
auto uCount = (spin) / 3;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
@ -403,7 +421,7 @@ namespace Aurora::Threading::Primitives
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
if (uNow <= gSpinAdaptiveThreshold)
if (!gSpinAdaptiveThreshold || uNow <= gSpinAdaptiveThreshold)
{
auto uCount = spin;
bool bSMTProbablyHit {};
@ -433,6 +451,20 @@ namespace Aurora::Threading::Primitives
bRet = callback();
}
}
else if (ThrdCfg::gIsZen3OrGreater)
{
_mm_monitorx((void *)pWord, 0, 0);
if (callback())
{
bRet = true;
}
else
{
_mm_mwaitx(2, 0, uCount);
bRet = callback();
}
}
else
#endif
{
@ -499,7 +531,7 @@ namespace Aurora::Threading::Primitives
if (gHasThreadLocalTimeout)
{
auto uCount = tlsSpinCountLocal;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
@ -524,7 +556,7 @@ namespace Aurora::Threading::Primitives
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
{
auto uCount = (spin) / 3;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else