mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-25 12:11:10 +00:00
x86: Optimizing memcpy for AMD Zen architecture.
Modifying the shareable cache '__x86_shared_cache_size', which is a factor in computing the non-temporal threshold parameter '__x86_shared_non_temporal_threshold' to optimize memcpy for AMD Zen architectures. In the existing implementation, the shareable cache is computed as 'L3 per thread, L2 per core'. Recomputing this shareable cache as 'L3 per CCX(Core-Complex)' has brought in performance gains. As per the large bench variant results, this patch also addresses the regression problem on AMD Zen architectures. Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
This commit is contained in:
parent
641a124845
commit
59803e81f9
@ -320,7 +320,7 @@ init_cacheinfo (void)
|
||||
threads = 1 << ((ecx >> 12) & 0x0f);
|
||||
}
|
||||
|
||||
if (threads == 0)
|
||||
if (threads == 0 || cpu_features->basic.family >= 0x17)
|
||||
{
|
||||
/* If APIC ID width is not available, use logical
|
||||
processor count. */
|
||||
@ -335,13 +335,30 @@ init_cacheinfo (void)
|
||||
if (threads > 0)
|
||||
shared /= threads;
|
||||
|
||||
/* Get shared cache per ccx for Zen architectures. */
|
||||
if (cpu_features->basic.family >= 0x17)
|
||||
{
|
||||
unsigned int eax;
|
||||
|
||||
/* Get number of threads share the L3 cache in CCX. */
|
||||
__cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
|
||||
|
||||
unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
|
||||
shared *= threads_per_ccx;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Account for exclusive L2 and L3 caches. */
|
||||
shared += core;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu_features->data_cache_size != 0)
|
||||
{
|
||||
if (data == 0 || cpu_features->basic.kind != arch_kind_amd)
|
||||
data = cpu_features->data_cache_size;
|
||||
}
|
||||
|
||||
if (data > 0)
|
||||
{
|
||||
@ -354,7 +371,10 @@ init_cacheinfo (void)
|
||||
}
|
||||
|
||||
if (cpu_features->shared_cache_size != 0)
|
||||
{
|
||||
if (shared == 0 || cpu_features->basic.kind != arch_kind_amd)
|
||||
shared = cpu_features->shared_cache_size;
|
||||
}
|
||||
|
||||
if (shared > 0)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user