mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-26 06:50:07 +00:00
830566307f
Implement x86-64 memset with unaligned store and rep movsb. Support 16-byte, 32-byte and 64-byte vector register sizes. A single file provides 2 implementations of memset, one with rep stosb and the other without rep stosb. They share the same codes when size is between 2 times of vector register size and REP_STOSB_THRESHOLD which defaults to 2KB. Key features: 1. Use overlapping store to avoid branch. 2. For size <= 4 times of vector register size, fully unroll the loop. 3. For size > 4 times of vector register size, store 4 times of vector register size at a time. [BZ #19881] * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add memset-sse2-unaligned-erms, memset-avx2-unaligned-erms and memset-avx512-unaligned-erms. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Test __memset_chk_sse2_unaligned, __memset_chk_sse2_unaligned_erms, __memset_chk_avx2_unaligned, __memset_chk_avx2_unaligned_erms, __memset_chk_avx512_unaligned, __memset_chk_avx512_unaligned_erms, __memset_sse2_unaligned, __memset_sse2_unaligned_erms, __memset_erms, __memset_avx2_unaligned, __memset_avx2_unaligned_erms, __memset_avx512_unaligned_erms and __memset_avx512_unaligned. * sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S: New file. * sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Likewise.
18 lines
395 B
ArmAsm
18 lines
395 B
ArmAsm
#ifdef HAVE_AVX512_ASM_SUPPORT
|
|
# define VEC_SIZE 64
|
|
# define VEC(i) zmm##i
|
|
# define VMOVU vmovdqu64
|
|
# define VMOVA vmovdqa64
|
|
|
|
# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
|
vmovd d, %xmm0; \
|
|
movq r, %rax; \
|
|
vpbroadcastb %xmm0, %xmm0; \
|
|
vpbroadcastq %xmm0, %zmm0
|
|
|
|
# define SECTION(p) p##.avx512
|
|
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
|
|
|
|
# include "memset-vec-unaligned-erms.S"
|
|
#endif
|