mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 12:30:06 +00:00
x86: Only align destination to 1x VEC_SIZE in memset 4x loop
Current code aligns to 2x VEC_SIZE. Aligning to 2x has no affect on performance other than potentially resulting in an additional iteration of the loop. 1x maintains aligned stores (the only reason to align in this case) and doesn't incur any unnecessary loop iterations. Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
This commit is contained in:
parent
3921c5b40f
commit
9469261cf1
@ -293,7 +293,7 @@ L(more_2x_vec):
|
||||
leaq (VEC_SIZE * 4)(%rax), %LOOP_REG
|
||||
#endif
|
||||
/* Align dst for loop. */
|
||||
andq $(VEC_SIZE * -2), %LOOP_REG
|
||||
andq $(VEC_SIZE * -1), %LOOP_REG
|
||||
.p2align 4
|
||||
L(loop):
|
||||
VMOVA %VMM(0), LOOP_4X_OFFSET(%LOOP_REG)
|
||||
|
Loading…
Reference in New Issue
Block a user