[+] Zen3 on top of AlderLake optimizations

[*] Minor alderlake adjustments
This commit is contained in:
Reece Wilson 2024-05-05 19:42:10 +01:00
parent 459a9a789b
commit f3ba901f71
3 changed files with 125 additions and 24 deletions

View File

@ -111,12 +111,43 @@ namespace Aurora::Threading
{ {
if (uEndTimeSteadyNS2 - uNowNS <= 100000ull) if (uEndTimeSteadyNS2 - uNowNS <= 100000ull)
{ {
for (AU_ITERATE_N(i, 32)) auto uNow = AuAtomicAdd(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
if (!AuThreadPrimitives::gSpinAdaptiveThreshold || uNow <= AuThreadPrimitives::gSpinAdaptiveThreshold)
{ {
AuThreadPrimitives::SMPPause(); #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
} if (AuThreadPrimitives::ThrdCfg::gIsIntelAlderLakeOrGreater)
{
_tpause(0, 10000);
}
else
#endif
{
#if 0
for (AU_ITERATE_N(i, 32))
{
AuThreadPrimitives::SMPPause();
}
#else
// shit compiler wont unwrap
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
// 32 or 16?
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
#endif
}
continue; AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
continue;
}
else
{
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
}
} }
} }
@ -131,12 +162,44 @@ namespace Aurora::Threading
{ {
if (AuThreadPrimitives::ThrdCfg::gPlatformIsSMPProcessorOptimized) if (AuThreadPrimitives::ThrdCfg::gPlatformIsSMPProcessorOptimized)
{ {
for (AU_ITERATE_N(i, 32)) auto uNow = AuAtomicAdd(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
if (!AuThreadPrimitives::gSpinAdaptiveThreshold || uNow <= AuThreadPrimitives::gSpinAdaptiveThreshold)
{ {
AuThreadPrimitives::SMPPause(); #if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
} if (AuThreadPrimitives::ThrdCfg::gIsIntelAlderLakeOrGreater)
{
_tpause(0, 1000);
}
else
#endif
{
#if 0
for (AU_ITERATE_N(i, 32))
{
AuThreadPrimitives::SMPPause();
}
#else
// shit compiler wont unwrap
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
// 32 or 16?
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
#endif
}
continue; AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
continue;
}
else
{
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
break;
}
} }
else else
{ {

View File

@ -101,8 +101,14 @@ namespace Aurora::Threading::Primitives
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
{ {
#if 0
auto cpuId = AuHwInfo::cpuid(7); auto cpuId = AuHwInfo::cpuid(7);
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1; ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
#else
auto &cpuId = AuHwInfo::GetCPUInfo().cpuId;
ThrdCfg::gIsIntelAlderLakeOrGreater = AuBitTest(cpuId.f_7_ECX, 5);
ThrdCfg::gIsZen3OrGreater = AuBitTest(cpuId.f_81_ECX, 29);
#endif
} }
#endif #endif

View File

@ -85,6 +85,7 @@ namespace Aurora::Threading::Primitives
inline bool gAlwaysRWLockWriteBiasOnReadLock {}; inline bool gAlwaysRWLockWriteBiasOnReadLock {};
inline bool gEnableRWLockWriteBiasOnReadLock {}; inline bool gEnableRWLockWriteBiasOnReadLock {};
inline AuUInt32 gIsIntelAlderLakeOrGreater {}; inline AuUInt32 gIsIntelAlderLakeOrGreater {};
inline AuUInt32 gIsZen3OrGreater {};
inline AuUInt8 gCountOfPCores {}; inline AuUInt8 gCountOfPCores {};
} }
@ -204,7 +205,7 @@ namespace Aurora::Threading::Primitives
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u); AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u); auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
if (uNow <= gSpinAdaptiveThreshold) if (!gSpinAdaptiveThreshold || uNow <= gSpinAdaptiveThreshold)
{ {
auto uCount = spin; auto uCount = spin;
@ -217,22 +218,39 @@ namespace Aurora::Threading::Primitives
uCount += tlsSpinCountLocal; uCount += tlsSpinCountLocal;
} }
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
auto perfCounter = __rdtsc() + uCount; if (ThrdCfg::gIsIntelAlderLakeOrGreater)
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{ {
if (callback()) if (callback())
{ {
bRet = true; bRet = true;
break;
} }
else else
{ {
SMPPause(); _umwait(1, __rdtsc() + uCount);
uCount--; bRet = callback();
}
}
else
#endif
{
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter)
#else
while (uCount > 0)
#endif
{
if (callback())
{
bRet = true;
break;
}
else
{
SMPPause();
uCount--;
}
} }
} }
} }
@ -279,7 +297,7 @@ namespace Aurora::Threading::Primitives
if (gHasThreadLocalTimeout) if (gHasThreadLocalTimeout)
{ {
auto uCount = tlsSpinCountLocal; auto uCount = tlsSpinCountLocal;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount; auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter) while (__rdtsc() < perfCounter)
#else #else
@ -304,7 +322,7 @@ namespace Aurora::Threading::Primitives
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3)) else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
{ {
auto uCount = (spin) / 3; auto uCount = (spin) / 3;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount; auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter) while (__rdtsc() < perfCounter)
#else #else
@ -403,7 +421,7 @@ namespace Aurora::Threading::Primitives
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u); AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u); auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
if (uNow <= gSpinAdaptiveThreshold) if (!gSpinAdaptiveThreshold || uNow <= gSpinAdaptiveThreshold)
{ {
auto uCount = spin; auto uCount = spin;
bool bSMTProbablyHit {}; bool bSMTProbablyHit {};
@ -433,6 +451,20 @@ namespace Aurora::Threading::Primitives
bRet = callback(); bRet = callback();
} }
} }
else if (ThrdCfg::gIsZen3OrGreater)
{
_mm_monitorx((void *)pWord, 0, 0);
if (callback())
{
bRet = true;
}
else
{
_mm_mwaitx(2, 0, uCount);
bRet = callback();
}
}
else else
#endif #endif
{ {
@ -499,7 +531,7 @@ namespace Aurora::Threading::Primitives
if (gHasThreadLocalTimeout) if (gHasThreadLocalTimeout)
{ {
auto uCount = tlsSpinCountLocal; auto uCount = tlsSpinCountLocal;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount; auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter) while (__rdtsc() < perfCounter)
#else #else
@ -524,7 +556,7 @@ namespace Aurora::Threading::Primitives
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3)) else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
{ {
auto uCount = (spin) / 3; auto uCount = (spin) / 3;
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
auto perfCounter = __rdtsc() + uCount; auto perfCounter = __rdtsc() + uCount;
while (__rdtsc() < perfCounter) while (__rdtsc() < perfCounter)
#else #else