mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 07:20:11 +00:00
3d9f171bfb
memset with zero as the value to set is by far the majority value (99%+ for Python3 and GCC). bzero can be slightly more optimized for this case by using a zero-idiom xor for broadcasting the set value to a register (vector or GPR). Co-developed-by: Noah Goldstein <goldstein.w.n@gmail.com>
41 lines
865 B
ArmAsm
41 lines
865 B
ArmAsm
#if IS_IN (libc)
|
|
# define USE_WITH_AVX512 1
|
|
|
|
# define VEC_SIZE 64
|
|
# define MOV_SIZE 6
|
|
# define RET_SIZE 1
|
|
|
|
# define XMM0 xmm16
|
|
# define YMM0 ymm16
|
|
# define VEC0 zmm16
|
|
# define VEC(i) VEC##i
|
|
|
|
# define VMOVU vmovdqu64
|
|
# define VMOVA vmovdqa64
|
|
|
|
# define VZEROUPPER
|
|
|
|
# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
|
|
vpbroadcastb d, %VEC0; \
|
|
movq r, %rax
|
|
|
|
# define BZERO_ZERO_VEC0() \
|
|
vpxorq %XMM0, %XMM0, %XMM0
|
|
|
|
# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
|
|
vpbroadcastd d, %VEC0; \
|
|
movq r, %rax
|
|
|
|
# define MEMSET_VDUP_TO_VEC0_HIGH()
|
|
# define MEMSET_VDUP_TO_VEC0_LOW()
|
|
|
|
# define WMEMSET_VDUP_TO_VEC0_HIGH()
|
|
# define WMEMSET_VDUP_TO_VEC0_LOW()
|
|
|
|
# define SECTION(p) p##.evex512
|
|
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
|
|
# define WMEMSET_SYMBOL(p,s) p##_avx512_##s
|
|
# define USE_LESS_VEC_MASK_STORE 1
|
|
# include "memset-vec-unaligned-erms.S"
|
|
#endif
|