[+] Zen3 on top of AlderLake optimizations
[*] Minor alderlake adjustments
This commit is contained in:
parent
459a9a789b
commit
f3ba901f71
@ -111,12 +111,43 @@ namespace Aurora::Threading
|
|||||||
{
|
{
|
||||||
if (uEndTimeSteadyNS2 - uNowNS <= 100000ull)
|
if (uEndTimeSteadyNS2 - uNowNS <= 100000ull)
|
||||||
{
|
{
|
||||||
for (AU_ITERATE_N(i, 32))
|
auto uNow = AuAtomicAdd(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
|
||||||
|
if (!AuThreadPrimitives::gSpinAdaptiveThreshold || uNow <= AuThreadPrimitives::gSpinAdaptiveThreshold)
|
||||||
{
|
{
|
||||||
AuThreadPrimitives::SMPPause();
|
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
|
||||||
}
|
if (AuThreadPrimitives::ThrdCfg::gIsIntelAlderLakeOrGreater)
|
||||||
|
{
|
||||||
|
_tpause(0, 10000);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
for (AU_ITERATE_N(i, 32))
|
||||||
|
{
|
||||||
|
AuThreadPrimitives::SMPPause();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// shit compiler wont unwrap
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
// 32 or 16?
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,12 +162,44 @@ namespace Aurora::Threading
|
|||||||
{
|
{
|
||||||
if (AuThreadPrimitives::ThrdCfg::gPlatformIsSMPProcessorOptimized)
|
if (AuThreadPrimitives::ThrdCfg::gPlatformIsSMPProcessorOptimized)
|
||||||
{
|
{
|
||||||
for (AU_ITERATE_N(i, 32))
|
auto uNow = AuAtomicAdd(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
|
||||||
|
if (!AuThreadPrimitives::gSpinAdaptiveThreshold || uNow <= AuThreadPrimitives::gSpinAdaptiveThreshold)
|
||||||
{
|
{
|
||||||
AuThreadPrimitives::SMPPause();
|
#if (defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86))
|
||||||
}
|
if (AuThreadPrimitives::ThrdCfg::gIsIntelAlderLakeOrGreater)
|
||||||
|
{
|
||||||
|
_tpause(0, 1000);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
for (AU_ITERATE_N(i, 32))
|
||||||
|
{
|
||||||
|
AuThreadPrimitives::SMPPause();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// shit compiler wont unwrap
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
// 32 or 16?
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause(); AuThreadPrimitives::SMPPause();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
AuAtomicSub(&AuThreadPrimitives::gSpinAdaptiveCurrentCount, 1u);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -101,8 +101,14 @@ namespace Aurora::Threading::Primitives
|
|||||||
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
auto cpuId = AuHwInfo::cpuid(7);
|
auto cpuId = AuHwInfo::cpuid(7);
|
||||||
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
|
ThrdCfg::gIsIntelAlderLakeOrGreater = (cpuId.ecx >> 5) & 1;
|
||||||
|
#else
|
||||||
|
auto &cpuId = AuHwInfo::GetCPUInfo().cpuId;
|
||||||
|
ThrdCfg::gIsIntelAlderLakeOrGreater = AuBitTest(cpuId.f_7_ECX, 5);
|
||||||
|
ThrdCfg::gIsZen3OrGreater = AuBitTest(cpuId.f_81_ECX, 29);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -85,6 +85,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
inline bool gAlwaysRWLockWriteBiasOnReadLock {};
|
inline bool gAlwaysRWLockWriteBiasOnReadLock {};
|
||||||
inline bool gEnableRWLockWriteBiasOnReadLock {};
|
inline bool gEnableRWLockWriteBiasOnReadLock {};
|
||||||
inline AuUInt32 gIsIntelAlderLakeOrGreater {};
|
inline AuUInt32 gIsIntelAlderLakeOrGreater {};
|
||||||
|
inline AuUInt32 gIsZen3OrGreater {};
|
||||||
inline AuUInt8 gCountOfPCores {};
|
inline AuUInt8 gCountOfPCores {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,7 +205,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
|
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
|
||||||
|
|
||||||
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
|
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
|
||||||
if (uNow <= gSpinAdaptiveThreshold)
|
if (!gSpinAdaptiveThreshold || uNow <= gSpinAdaptiveThreshold)
|
||||||
{
|
{
|
||||||
auto uCount = spin;
|
auto uCount = spin;
|
||||||
|
|
||||||
@ -217,22 +218,39 @@ namespace Aurora::Threading::Primitives
|
|||||||
uCount += tlsSpinCountLocal;
|
uCount += tlsSpinCountLocal;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
||||||
auto perfCounter = __rdtsc() + uCount;
|
if (ThrdCfg::gIsIntelAlderLakeOrGreater)
|
||||||
while (__rdtsc() < perfCounter)
|
|
||||||
#else
|
|
||||||
while (uCount > 0)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
if (callback())
|
if (callback())
|
||||||
{
|
{
|
||||||
bRet = true;
|
bRet = true;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
SMPPause();
|
_umwait(1, __rdtsc() + uCount);
|
||||||
uCount--;
|
bRet = callback();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||||
|
auto perfCounter = __rdtsc() + uCount;
|
||||||
|
while (__rdtsc() < perfCounter)
|
||||||
|
#else
|
||||||
|
while (uCount > 0)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (callback())
|
||||||
|
{
|
||||||
|
bRet = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SMPPause();
|
||||||
|
uCount--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -279,7 +297,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
if (gHasThreadLocalTimeout)
|
if (gHasThreadLocalTimeout)
|
||||||
{
|
{
|
||||||
auto uCount = tlsSpinCountLocal;
|
auto uCount = tlsSpinCountLocal;
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||||
auto perfCounter = __rdtsc() + uCount;
|
auto perfCounter = __rdtsc() + uCount;
|
||||||
while (__rdtsc() < perfCounter)
|
while (__rdtsc() < perfCounter)
|
||||||
#else
|
#else
|
||||||
@ -304,7 +322,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
|
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
|
||||||
{
|
{
|
||||||
auto uCount = (spin) / 3;
|
auto uCount = (spin) / 3;
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||||
auto perfCounter = __rdtsc() + uCount;
|
auto perfCounter = __rdtsc() + uCount;
|
||||||
while (__rdtsc() < perfCounter)
|
while (__rdtsc() < perfCounter)
|
||||||
#else
|
#else
|
||||||
@ -403,7 +421,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
|
AuAtomicStore<SMTAtomic_t>(&gCoreTable[uWord], 1u);
|
||||||
|
|
||||||
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
|
auto uNow = AuAtomicAdd(&gSpinAdaptiveCurrentCount, 1u);
|
||||||
if (uNow <= gSpinAdaptiveThreshold)
|
if (!gSpinAdaptiveThreshold || uNow <= gSpinAdaptiveThreshold)
|
||||||
{
|
{
|
||||||
auto uCount = spin;
|
auto uCount = spin;
|
||||||
bool bSMTProbablyHit {};
|
bool bSMTProbablyHit {};
|
||||||
@ -433,6 +451,20 @@ namespace Aurora::Threading::Primitives
|
|||||||
bRet = callback();
|
bRet = callback();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (ThrdCfg::gIsZen3OrGreater)
|
||||||
|
{
|
||||||
|
_mm_monitorx((void *)pWord, 0, 0);
|
||||||
|
|
||||||
|
if (callback())
|
||||||
|
{
|
||||||
|
bRet = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_mwaitx(2, 0, uCount);
|
||||||
|
bRet = callback();
|
||||||
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -499,7 +531,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
if (gHasThreadLocalTimeout)
|
if (gHasThreadLocalTimeout)
|
||||||
{
|
{
|
||||||
auto uCount = tlsSpinCountLocal;
|
auto uCount = tlsSpinCountLocal;
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||||
auto perfCounter = __rdtsc() + uCount;
|
auto perfCounter = __rdtsc() + uCount;
|
||||||
while (__rdtsc() < perfCounter)
|
while (__rdtsc() < perfCounter)
|
||||||
#else
|
#else
|
||||||
@ -524,7 +556,7 @@ namespace Aurora::Threading::Primitives
|
|||||||
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
|
else if (uNow <= (gSpinAdaptiveThreadCount / 4 * 3))
|
||||||
{
|
{
|
||||||
auto uCount = (spin) / 3;
|
auto uCount = (spin) / 3;
|
||||||
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
|
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) || defined(ALT_RDT)
|
||||||
auto perfCounter = __rdtsc() + uCount;
|
auto perfCounter = __rdtsc() + uCount;
|
||||||
while (__rdtsc() < perfCounter)
|
while (__rdtsc() < perfCounter)
|
||||||
#else
|
#else
|
||||||
|
Loading…
Reference in New Issue
Block a user