glibc/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
H.J. Lu 1b968b6b9b x86-64: Add memset family functions with 256-bit EVEX
Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized
with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM
abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at
function exit.
2021-03-29 07:40:17 -07:00

25 lines
550 B
ArmAsm

#if IS_IN (libc)
# define VEC_SIZE 32
# define XMM0 xmm16
# define YMM0 ymm16
# define VEC0 ymm16
# define VEC(i) VEC##i
# define VMOVU vmovdqu64
# define VMOVA vmovdqa64
# define VZEROUPPER
# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
movq r, %rax; \
vpbroadcastb d, %VEC0
# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
movq r, %rax; \
vpbroadcastd d, %VEC0
# define SECTION(p) p##.evex
# define MEMSET_SYMBOL(p,s) p##_evex_##s
# define WMEMSET_SYMBOL(p,s) p##_evex_##s
# include "memset-vec-unaligned-erms.S"
#endif