x86: Optimize SSE2 memchr overflow calculation

SSE2 memchr computes "edx + ecx - 16" where ecx is less than 16.  Use
"edx - (16 - ecx)", instead of satured math, to avoid possible addition
overflow.  This replaces

	add	%ecx, %edx
	sbb	%eax, %eax
	or	%eax, %edx
	sub	$16, %edx

with

	neg	%ecx
	add	$16, %ecx
	sub	%ecx, %edx

It is the same for x86_64, except for rcx/rdx, instead of ecx/edx.

	* sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use
	"edx + ecx - 16" to avoid possible addition overflow.
	* sysdeps/x86_64/memchr.S (memchr): Likewise.
This commit is contained in:
H.J. Lu 2017-05-19 10:46:29 -07:00
parent 1d71a63153
commit 402bf06952
3 changed files with 18 additions and 16 deletions

View File

@ -1,3 +1,9 @@
2017-05-19 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/i686/multiarch/memchr-sse2.S (MEMCHR): Use
"edx + ecx - 16" to avoid possible addition overflow.
* sysdeps/x86_64/memchr.S (memchr): Likewise.
2017-05-19 Adhemerval Zanella <adhemerval.zanella@linaro.org>
* misc/Makefile (CFLAGS-vmsplice.c): Remove rule.

View File

@ -117,14 +117,12 @@ L(crosscache):
# ifndef USE_AS_RAWMEMCHR
jnz L(match_case2_prolog1)
/* Calculate the last acceptable address and check for possible
addition overflow by using satured math:
edx = ecx + edx
edx |= -(edx < ecx) */
add %ecx, %edx
sbb %eax, %eax
or %eax, %edx
sub $16, %edx
/* "ecx" is less than 16. Calculate "edx + ecx - 16" by using
"edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
possible addition overflow. */
neg %ecx
add $16, %ecx
sub %ecx, %edx
jbe L(return_null)
lea 16(%edi), %edi
# else

View File

@ -76,14 +76,12 @@ L(crosscache):
.p2align 4
L(unaligned_no_match):
/* Calculate the last acceptable address and check for possible
addition overflow by using satured math:
rdx = rcx + rdx
rdx |= -(rdx < rcx) */
add %rcx, %rdx
sbb %rax, %rax
or %rax, %rdx
sub $16, %rdx
/* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
"rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
possible addition overflow. */
neg %rcx
add $16, %rcx
sub %rcx, %rdx
jbe L(return_null)
add $16, %rdi
sub $64, %rdx