mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-30 00:31:08 +00:00
3d9f171bfb
memset with zero as the value to set is by far the majority value (99%+ for Python3 and GCC). bzero can be slightly more optimized for this case by using a zero-idiom xor for broadcasting the set value to a register (vector or GPR). Co-developed-by: Noah Goldstein <goldstein.w.n@gmail.com>
45 lines
1015 B
ArmAsm
45 lines
1015 B
ArmAsm
#if IS_IN (libc)
|
|
# define USE_WITH_AVX2 1
|
|
|
|
# define VEC_SIZE 32
|
|
# define MOV_SIZE 4
|
|
# define RET_SIZE 4
|
|
|
|
# define VEC(i) ymm##i
|
|
|
|
# define VMOVU vmovdqu
|
|
# define VMOVA vmovdqa
|
|
|
|
# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
|
|
vmovd d, %xmm0; \
|
|
movq r, %rax;
|
|
|
|
# define BZERO_ZERO_VEC0() \
|
|
vpxor %xmm0, %xmm0, %xmm0
|
|
|
|
# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
|
|
MEMSET_SET_VEC0_AND_SET_RETURN(d, r)
|
|
|
|
# define MEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastb %xmm0, %ymm0
|
|
# define MEMSET_VDUP_TO_VEC0_LOW() vpbroadcastb %xmm0, %xmm0
|
|
|
|
# define WMEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastd %xmm0, %ymm0
|
|
# define WMEMSET_VDUP_TO_VEC0_LOW() vpbroadcastd %xmm0, %xmm0
|
|
|
|
# ifndef SECTION
|
|
# define SECTION(p) p##.avx
|
|
# endif
|
|
# ifndef MEMSET_SYMBOL
|
|
# define MEMSET_SYMBOL(p,s) p##_avx2_##s
|
|
# endif
|
|
# ifndef BZERO_SYMBOL
|
|
# define BZERO_SYMBOL(p,s) p##_avx2_##s
|
|
# endif
|
|
# ifndef WMEMSET_SYMBOL
|
|
# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
|
|
# endif
|
|
|
|
# define USE_XMM_LESS_VEC
|
|
# include "memset-vec-unaligned-erms.S"
|
|
#endif
|