x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S

This commit replaces two usages of SSE2 'movups' with AVX 'vmovdqu'.

it could potentially be dangerous to use SSE2 if this function is ever
called without using 'vzeroupper' beforehand. While compilers appear
to use 'vzeroupper' before function calls if AVX2 has been used, using
SSE2 here is more brittle. Since it is not absolutely necessary it
should be avoided.

It costs 2-extra bytes but the extra bytes should only eat into
alignment padding.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
Noah Goldstein 2021-10-23 01:26:47 -04:00
parent d8e7d06381
commit bad852b61b

View File

@ -561,13 +561,13 @@ L(between_16_31):
/* From 16 to 31 bytes. No branch when size == 16. */
/* Use movups to save code size. */
movups (%rsi), %xmm2
vmovdqu (%rsi), %xmm2
VPCMP $4, (%rdi), %xmm2, %k1
kmovd %k1, %eax
testl %eax, %eax
jnz L(return_vec_0_lv)
/* Use overlapping loads to avoid branches. */
movups -16(%rsi, %rdx, CHAR_SIZE), %xmm2
vmovdqu -16(%rsi, %rdx, CHAR_SIZE), %xmm2
VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1
addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx
kmovd %k1, %eax