mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-11 22:00:08 +00:00
cecbac5212
No bug.
This patch doubles the rep_movsb_threshold when using ERMS. Based on
benchmarks the vector copy loop, especially now that it handles 4k
aliasing, is better for these medium ranged.
On Skylake with ERMS:
Size, Align1, Align2, dst>src,(rep movsb) / (vec copy)
4096, 0, 0, 0, 0.975
4096, 0, 0, 1, 0.953
4096, 12, 0, 0, 0.969
4096, 12, 0, 1, 0.872
4096, 44, 0, 0, 0.979
4096, 44, 0, 1, 0.83
4096, 0, 12, 0, 1.006
4096, 0, 12, 1, 0.989
4096, 0, 44, 0, 0.739
4096, 0, 44, 1, 0.942
4096, 12, 12, 0, 1.009
4096, 12, 12, 1, 0.973
4096, 44, 44, 0, 0.791
4096, 44, 44, 1, 0.961
4096, 2048, 0, 0, 0.978
4096, 2048, 0, 1, 0.951
4096, 2060, 0, 0, 0.986
4096, 2060, 0, 1, 0.963
4096, 2048, 12, 0, 0.971
4096, 2048, 12, 1, 0.941
4096, 2060, 12, 0, 0.977
4096, 2060, 12, 1, 0.949
8192, 0, 0, 0, 0.85
8192, 0, 0, 1, 0.845
8192, 13, 0, 0, 0.937
8192, 13, 0, 1, 0.939
8192, 45, 0, 0, 0.932
8192, 45, 0, 1, 0.927
8192, 0, 13, 0, 0.621
8192, 0, 13, 1, 0.62
8192, 0, 45, 0, 0.53
8192, 0, 45, 1, 0.516
8192, 13, 13, 0, 0.664
8192, 13, 13, 1, 0.659
8192, 45, 45, 0, 0.593
8192, 45, 45, 1, 0.575
8192, 2048, 0, 0, 0.854
8192, 2048, 0, 1, 0.834
8192, 2061, 0, 0, 0.863
8192, 2061, 0, 1, 0.857
8192, 2048, 13, 0, 0.63
8192, 2048, 13, 1, 0.629
8192, 2061, 13, 0, 0.627
8192, 2061, 13, 1, 0.62
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit
|
||
---|---|---|
.. | ||
bits | ||
fpu | ||
include | ||
nptl | ||
sys/platform | ||
__longjmp_cancel.S | ||
abi-note.c | ||
atomic-machine.h | ||
cacheinfo.c | ||
cacheinfo.h | ||
cet-control.h | ||
check-cet.awk | ||
configure | ||
configure.ac | ||
cpu-features-offsets.sym | ||
cpu-features.c | ||
cpu-tunables.c | ||
dl-cacheinfo.h | ||
dl-cet.c | ||
dl-diagnostics-cpu.c | ||
dl-get-cpu-features.c | ||
dl-hwcap.h | ||
dl-isa-level.h | ||
dl-lookupcfg.h | ||
dl-minsigstacksize.h | ||
dl-procinfo.c | ||
dl-procinfo.h | ||
dl-procruntime.c | ||
dl-prop.h | ||
dl-tunables.list | ||
elf-initfini.h | ||
elide.h | ||
float128-abi.h | ||
fpu_control.h | ||
get-cpuid-feature-leaf.c | ||
get-isa-level.h | ||
hp-timing.h | ||
init-arch.h | ||
isa-level.c | ||
jmp_buf-ssp.sym | ||
ldbl2mpn.c | ||
ldsodefs.h | ||
libc-start.c | ||
libc-start.h | ||
link_map.h | ||
linkmap.h | ||
longjmp.c | ||
Makeconfig | ||
Makefile | ||
string_private.h | ||
sysdep.h | ||
tininess.h | ||
tst-cet-legacy-1.c | ||
tst-cet-legacy-1a.c | ||
tst-cet-legacy-2.c | ||
tst-cet-legacy-2a.c | ||
tst-cet-legacy-3.c | ||
tst-cet-legacy-4.c | ||
tst-cet-legacy-4a.c | ||
tst-cet-legacy-4b.c | ||
tst-cet-legacy-4c.c | ||
tst-cet-legacy-5.c | ||
tst-cet-legacy-5a.c | ||
tst-cet-legacy-5b.c | ||
tst-cet-legacy-6.c | ||
tst-cet-legacy-6a.c | ||
tst-cet-legacy-6b.c | ||
tst-cet-legacy-7.c | ||
tst-cet-legacy-8.c | ||
tst-cet-legacy-9-static.c | ||
tst-cet-legacy-9.c | ||
tst-cet-legacy-10-static.c | ||
tst-cet-legacy-10.c | ||
tst-cet-legacy-mod-1.c | ||
tst-cet-legacy-mod-2.c | ||
tst-cet-legacy-mod-4.c | ||
tst-cet-legacy-mod-5.c | ||
tst-cet-legacy-mod-5a.c | ||
tst-cet-legacy-mod-5b.c | ||
tst-cet-legacy-mod-5c.c | ||
tst-cet-legacy-mod-6.c | ||
tst-cet-legacy-mod-6a.c | ||
tst-cet-legacy-mod-6b.c | ||
tst-cet-legacy-mod-6c.c | ||
tst-cet-legacy-mod-6d.c | ||
tst-cpu-features-cpuinfo-static.c | ||
tst-cpu-features-cpuinfo.c | ||
tst-cpu-features-supports-static.c | ||
tst-cpu-features-supports.c | ||
tst-get-cpu-features-static.c | ||
tst-get-cpu-features.c | ||
tst-ifunc-isa-1-static.c | ||
tst-ifunc-isa-1.c | ||
tst-ifunc-isa-2-static.c | ||
tst-ifunc-isa-2.c | ||
tst-ifunc-isa.h | ||
tst-isa-level-1.c | ||
tst-isa-level-mod-1-baseline.c | ||
tst-isa-level-mod-1-v2.c | ||
tst-isa-level-mod-1-v3.c | ||
tst-isa-level-mod-1-v4.c | ||
tst-isa-level-mod-1.c | ||
tst-ldbl-nonnormal-printf.c | ||
tst-memchr-rtm.c | ||
tst-memcmp-rtm.c | ||
tst-memmove-rtm.c | ||
tst-memrchr-rtm.c | ||
tst-memset-rtm.c | ||
tst-setjmp-cet.c | ||
tst-stack-align.h | ||
tst-strchr-rtm.c | ||
tst-strcpy-rtm.c | ||
tst-string-rtm.h | ||
tst-strlen-rtm.c | ||
tst-strncmp-rtm.c | ||
tst-strrchr-rtm.c | ||
tst-sysconf-cache-linesize-static.c | ||
tst-sysconf-cache-linesize.c | ||
tst-wcsncmp-rtm.c | ||
Versions |