mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-23 13:30:06 +00:00
a6b7502ec0
No bug. The optimizations are as follows: 1) Always align entry to 64 bytes. This makes behavior more predictable and makes other frontend optimizations easier. 2) Make the L(more_8x_vec) cases 4k aliasing aware. This can have significant benefits in the case that: 0 < (dst - src) < [256, 512] 3) Align before `rep movsb`. For ERMS this is roughly a [0, 30%] improvement and for FSRM [-10%, 25%]. In addition to these primary changes there is general cleanup throughout to optimize the aligning routines and control flow logic. Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
18 lines
412 B
ArmAsm
18 lines
412 B
ArmAsm
#if IS_IN (libc)
|
|
# define VEC_SIZE 32
|
|
# define VEC(i) ymm##i
|
|
# define VMOVNT vmovntdq
|
|
# define VMOVU vmovdqu
|
|
# define VMOVA vmovdqa
|
|
# define MOV_SIZE 4
|
|
# define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
|
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
|
|
|
# define VZEROUPPER_RETURN jmp L(return)
|
|
|
|
# define SECTION(p) p##.avx.rtm
|
|
# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm
|
|
|
|
# include "memmove-vec-unaligned-erms.S"
|
|
#endif
|