mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-08 10:20:15 +00:00
5cb6329652
memset with zero as the value to set is by far the majority value (99%+
for Python3 and GCC).
bzero can be slightly more optimized for this case by using a zero-idiom
xor for broadcasting the set value to a register (vector or GPR).
Co-developed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit 3d9f171bfb
)
45 lines
1015 B
ArmAsm
45 lines
1015 B
ArmAsm
#if IS_IN (libc)
|
|
# define USE_WITH_AVX2 1
|
|
|
|
# define VEC_SIZE 32
|
|
# define MOV_SIZE 4
|
|
# define RET_SIZE 4
|
|
|
|
# define VEC(i) ymm##i
|
|
|
|
# define VMOVU vmovdqu
|
|
# define VMOVA vmovdqa
|
|
|
|
# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
|
|
vmovd d, %xmm0; \
|
|
movq r, %rax;
|
|
|
|
# define BZERO_ZERO_VEC0() \
|
|
vpxor %xmm0, %xmm0, %xmm0
|
|
|
|
# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
|
|
MEMSET_SET_VEC0_AND_SET_RETURN(d, r)
|
|
|
|
# define MEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastb %xmm0, %ymm0
|
|
# define MEMSET_VDUP_TO_VEC0_LOW() vpbroadcastb %xmm0, %xmm0
|
|
|
|
# define WMEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastd %xmm0, %ymm0
|
|
# define WMEMSET_VDUP_TO_VEC0_LOW() vpbroadcastd %xmm0, %xmm0
|
|
|
|
# ifndef SECTION
|
|
# define SECTION(p) p##.avx
|
|
# endif
|
|
# ifndef MEMSET_SYMBOL
|
|
# define MEMSET_SYMBOL(p,s) p##_avx2_##s
|
|
# endif
|
|
# ifndef BZERO_SYMBOL
|
|
# define BZERO_SYMBOL(p,s) p##_avx2_##s
|
|
# endif
|
|
# ifndef WMEMSET_SYMBOL
|
|
# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
|
|
# endif
|
|
|
|
# define USE_XMM_LESS_VEC
|
|
# include "memset-vec-unaligned-erms.S"
|
|
#endif
|