x86-64: Use ZMM16-ZMM31 in AVX512 memmove family functions

Update ifunc-memmove.h to select the function optimized with AVX512
instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable
AVX512VL since VZEROUPPER isn't needed at function exit.

(cherry picked from commit e4fda46310)
This commit is contained in:
H.J. Lu 2021-03-07 09:45:23 -08:00
parent a47bf3df2f
commit 1a66898afb
3 changed files with 35 additions and 19 deletions

View File

@ -83,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512F_Usable),
__memmove_chk_avx512_no_vzeroupper) __memmove_chk_avx512_no_vzeroupper)
IFUNC_IMPL_ADD (array, i, __memmove_chk, IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memmove_chk_avx512_unaligned) __memmove_chk_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, __memmove_chk, IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memmove_chk_avx512_unaligned_erms) __memmove_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memmove_chk, IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX_Usable), HAS_ARCH_FEATURE (AVX_Usable),
@ -148,10 +148,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512F_Usable),
__memmove_avx512_no_vzeroupper) __memmove_avx512_no_vzeroupper)
IFUNC_IMPL_ADD (array, i, memmove, IFUNC_IMPL_ADD (array, i, memmove,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memmove_avx512_unaligned) __memmove_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, memmove, IFUNC_IMPL_ADD (array, i, memmove,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memmove_avx512_unaligned_erms) __memmove_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
__memmove_ssse3_back) __memmove_ssse3_back)
@ -697,10 +697,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512F_Usable),
__memcpy_chk_avx512_no_vzeroupper) __memcpy_chk_avx512_no_vzeroupper)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memcpy_chk_avx512_unaligned) __memcpy_chk_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memcpy_chk_avx512_unaligned_erms) __memcpy_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX_Usable), HAS_ARCH_FEATURE (AVX_Usable),
@ -766,10 +766,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512F_Usable),
__memcpy_avx512_no_vzeroupper) __memcpy_avx512_no_vzeroupper)
IFUNC_IMPL_ADD (array, i, memcpy, IFUNC_IMPL_ADD (array, i, memcpy,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memcpy_avx512_unaligned) __memcpy_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, IFUNC_IMPL_ADD (array, i, memcpy,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__memcpy_avx512_unaligned_erms) __memcpy_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, 1, IFUNC_IMPL_ADD (array, i, memcpy, 1,
@ -783,10 +783,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_chk_avx512_no_vzeroupper) __mempcpy_chk_avx512_no_vzeroupper)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__mempcpy_chk_avx512_unaligned) __mempcpy_chk_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__mempcpy_chk_avx512_unaligned_erms) __mempcpy_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX_Usable), HAS_ARCH_FEATURE (AVX_Usable),
@ -828,10 +828,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_avx512_no_vzeroupper) __mempcpy_avx512_no_vzeroupper)
IFUNC_IMPL_ADD (array, i, mempcpy, IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__mempcpy_avx512_unaligned) __mempcpy_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, mempcpy, IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX512F_Usable), HAS_ARCH_FEATURE (AVX512VL_Usable),
__mempcpy_avx512_unaligned_erms) __mempcpy_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, mempcpy, IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX_Usable), HAS_ARCH_FEATURE (AVX_Usable),

View File

@ -56,15 +56,17 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
{ {
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
return OPTIMIZE (avx512_no_vzeroupper); {
if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
return OPTIMIZE (avx512_unaligned_erms); return OPTIMIZE (avx512_unaligned_erms);
return OPTIMIZE (avx512_unaligned); return OPTIMIZE (avx512_unaligned);
} }
return OPTIMIZE (avx512_no_vzeroupper);
}
if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
{ {
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))

View File

@ -1,11 +1,25 @@
#if IS_IN (libc) #if IS_IN (libc)
# define VEC_SIZE 64 # define VEC_SIZE 64
# define VEC(i) zmm##i # define XMM0 xmm16
# define XMM1 xmm17
# define YMM0 ymm16
# define YMM1 ymm17
# define VEC0 zmm16
# define VEC1 zmm17
# define VEC2 zmm18
# define VEC3 zmm19
# define VEC4 zmm20
# define VEC5 zmm21
# define VEC6 zmm22
# define VEC7 zmm23
# define VEC8 zmm24
# define VEC(i) VEC##i
# define VMOVNT vmovntdq # define VMOVNT vmovntdq
# define VMOVU vmovdqu64 # define VMOVU vmovdqu64
# define VMOVA vmovdqa64 # define VMOVA vmovdqa64
# define VZEROUPPER
# define SECTION(p) p##.avx512 # define SECTION(p) p##.evex512
# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
# include "memmove-vec-unaligned-erms.S" # include "memmove-vec-unaligned-erms.S"