mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 15:30:07 +00:00
x86-64: Use ZMM16-ZMM31 in AVX512 memmove family functions
Update ifunc-memmove.h to select the function optimized with AVX512
instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable
AVX512VL since VZEROUPPER isn't needed at function exit.
(cherry picked from commit e4fda46310
)
This commit is contained in:
parent
a47bf3df2f
commit
1a66898afb
@ -83,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
__memmove_chk_avx512_no_vzeroupper)
|
__memmove_chk_avx512_no_vzeroupper)
|
||||||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memmove_chk_avx512_unaligned)
|
__memmove_chk_avx512_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memmove_chk_avx512_unaligned_erms)
|
__memmove_chk_avx512_unaligned_erms)
|
||||||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||||||
HAS_ARCH_FEATURE (AVX_Usable),
|
HAS_ARCH_FEATURE (AVX_Usable),
|
||||||
@ -148,10 +148,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
__memmove_avx512_no_vzeroupper)
|
__memmove_avx512_no_vzeroupper)
|
||||||
IFUNC_IMPL_ADD (array, i, memmove,
|
IFUNC_IMPL_ADD (array, i, memmove,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memmove_avx512_unaligned)
|
__memmove_avx512_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, memmove,
|
IFUNC_IMPL_ADD (array, i, memmove,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memmove_avx512_unaligned_erms)
|
__memmove_avx512_unaligned_erms)
|
||||||
IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
|
IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
|
||||||
__memmove_ssse3_back)
|
__memmove_ssse3_back)
|
||||||
@ -697,10 +697,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
__memcpy_chk_avx512_no_vzeroupper)
|
__memcpy_chk_avx512_no_vzeroupper)
|
||||||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memcpy_chk_avx512_unaligned)
|
__memcpy_chk_avx512_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memcpy_chk_avx512_unaligned_erms)
|
__memcpy_chk_avx512_unaligned_erms)
|
||||||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||||||
HAS_ARCH_FEATURE (AVX_Usable),
|
HAS_ARCH_FEATURE (AVX_Usable),
|
||||||
@ -766,10 +766,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
__memcpy_avx512_no_vzeroupper)
|
__memcpy_avx512_no_vzeroupper)
|
||||||
IFUNC_IMPL_ADD (array, i, memcpy,
|
IFUNC_IMPL_ADD (array, i, memcpy,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memcpy_avx512_unaligned)
|
__memcpy_avx512_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, memcpy,
|
IFUNC_IMPL_ADD (array, i, memcpy,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__memcpy_avx512_unaligned_erms)
|
__memcpy_avx512_unaligned_erms)
|
||||||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
|
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, memcpy, 1,
|
IFUNC_IMPL_ADD (array, i, memcpy, 1,
|
||||||
@ -783,10 +783,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
__mempcpy_chk_avx512_no_vzeroupper)
|
__mempcpy_chk_avx512_no_vzeroupper)
|
||||||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__mempcpy_chk_avx512_unaligned)
|
__mempcpy_chk_avx512_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__mempcpy_chk_avx512_unaligned_erms)
|
__mempcpy_chk_avx512_unaligned_erms)
|
||||||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||||||
HAS_ARCH_FEATURE (AVX_Usable),
|
HAS_ARCH_FEATURE (AVX_Usable),
|
||||||
@ -828,10 +828,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
__mempcpy_avx512_no_vzeroupper)
|
__mempcpy_avx512_no_vzeroupper)
|
||||||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__mempcpy_avx512_unaligned)
|
__mempcpy_avx512_unaligned)
|
||||||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||||||
HAS_ARCH_FEATURE (AVX512F_Usable),
|
HAS_ARCH_FEATURE (AVX512VL_Usable),
|
||||||
__mempcpy_avx512_unaligned_erms)
|
__mempcpy_avx512_unaligned_erms)
|
||||||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||||||
HAS_ARCH_FEATURE (AVX_Usable),
|
HAS_ARCH_FEATURE (AVX_Usable),
|
||||||
|
@ -56,15 +56,17 @@ IFUNC_SELECTOR (void)
|
|||||||
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
|
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
|
||||||
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
||||||
{
|
{
|
||||||
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
|
||||||
return OPTIMIZE (avx512_no_vzeroupper);
|
{
|
||||||
|
|
||||||
if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
|
if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
|
||||||
return OPTIMIZE (avx512_unaligned_erms);
|
return OPTIMIZE (avx512_unaligned_erms);
|
||||||
|
|
||||||
return OPTIMIZE (avx512_unaligned);
|
return OPTIMIZE (avx512_unaligned);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return OPTIMIZE (avx512_no_vzeroupper);
|
||||||
|
}
|
||||||
|
|
||||||
if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||||
{
|
{
|
||||||
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
|
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
|
||||||
|
@ -1,11 +1,25 @@
|
|||||||
#if IS_IN (libc)
|
#if IS_IN (libc)
|
||||||
# define VEC_SIZE 64
|
# define VEC_SIZE 64
|
||||||
# define VEC(i) zmm##i
|
# define XMM0 xmm16
|
||||||
|
# define XMM1 xmm17
|
||||||
|
# define YMM0 ymm16
|
||||||
|
# define YMM1 ymm17
|
||||||
|
# define VEC0 zmm16
|
||||||
|
# define VEC1 zmm17
|
||||||
|
# define VEC2 zmm18
|
||||||
|
# define VEC3 zmm19
|
||||||
|
# define VEC4 zmm20
|
||||||
|
# define VEC5 zmm21
|
||||||
|
# define VEC6 zmm22
|
||||||
|
# define VEC7 zmm23
|
||||||
|
# define VEC8 zmm24
|
||||||
|
# define VEC(i) VEC##i
|
||||||
# define VMOVNT vmovntdq
|
# define VMOVNT vmovntdq
|
||||||
# define VMOVU vmovdqu64
|
# define VMOVU vmovdqu64
|
||||||
# define VMOVA vmovdqa64
|
# define VMOVA vmovdqa64
|
||||||
|
# define VZEROUPPER
|
||||||
|
|
||||||
# define SECTION(p) p##.avx512
|
# define SECTION(p) p##.evex512
|
||||||
# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
|
# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
|
||||||
|
|
||||||
# include "memmove-vec-unaligned-erms.S"
|
# include "memmove-vec-unaligned-erms.S"
|
||||||
|
Loading…
Reference in New Issue
Block a user