Add optimized x86-64 wcscmp

This commit is contained in:
Ulrich Drepper 2011-09-05 14:08:23 -04:00
parent 5fc11f0d64
commit 49d42c37ba
3 changed files with 940 additions and 1 deletions

View File

@ -1,5 +1,7 @@
2011-08-29 Liubov Dmitrieva <liubov.dmitrieva@gmail.com> 2011-08-29 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
* sysdeps/x86_64/wcscmp.S: New file.
* sysdeps/i386/i686/multiarch/Makefile: (sysdep_routines): Add * sysdeps/i386/i686/multiarch/Makefile: (sysdep_routines): Add
wcscmp-c wcscmp-sse2 wcscmp-c wcscmp-sse2
* sysdeps/i386/i686/multiarch/wcscmp-c.c: New file. * sysdeps/i386/i686/multiarch/wcscmp-c.c: New file.

3
NEWS
View File

@ -25,7 +25,8 @@ Version 2.15
* Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64. * Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64.
Contributed by HJ Lu. Contributed by HJ Lu.
* Optimized strcat and strncat on x86-64 and optimized wcscmp on x86-32. * Optimized strcat and strncat on x86-64 and optimized wcscmp on x86-32 and
x86-64.
Contributed by Liubov Dmitrieva. Contributed by Liubov Dmitrieva.
* New interfaces: scandirat, scandirat64 * New interfaces: scandirat, scandirat64

936
sysdeps/x86_64/wcscmp.S Normal file
View File

@ -0,0 +1,936 @@
/* Optimized wcscmp for x86-64 with SSE2.
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <init-arch.h>
/* Define multiple versions only for the definition in libc and for
the DSO. In static binaries we need wcscmp before the initialization
happened. */
.text
ENTRY (wcscmp)
/*
* This implementation uses SSE to compare up to 16 bytes at a time.
*/
mov %esi, %eax
mov %edi, %edx
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
mov %al, %ch
mov %dl, %cl
and $63, %eax /* rsi alignment in cache line */
and $63, %edx /* rdi alignment in cache line */
and $15, %cl
jz L(continue_00)
cmp $16, %edx
jb L(continue_0)
cmp $32, %edx
jb L(continue_16)
cmp $48, %edx
jb L(continue_32)
L(continue_48):
and $15, %ch
jz L(continue_48_00)
cmp $16, %eax
jb L(continue_0_48)
cmp $32, %eax
jb L(continue_16_48)
cmp $48, %eax
jb L(continue_32_48)
.p2align 4
L(continue_48_48):
mov (%rsi), %ecx
cmp %ecx, (%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 4(%rsi), %ecx
cmp %ecx, 4(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 8(%rsi), %ecx
cmp %ecx, 8(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 12(%rsi), %ecx
cmp %ecx, 12(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rdi), %xmm1
movdqu 32(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
movdqu 48(%rdi), %xmm1
movdqu 48(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
add $64, %rsi
add $64, %rdi
jmp L(continue_48_48)
L(continue_0):
and $15, %ch
jz L(continue_0_00)
cmp $16, %eax
jb L(continue_0_0)
cmp $32, %eax
jb L(continue_0_16)
cmp $48, %eax
jb L(continue_0_32)
.p2align 4
L(continue_0_48):
mov (%rsi), %ecx
cmp %ecx, (%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 4(%rsi), %ecx
cmp %ecx, 4(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 8(%rsi), %ecx
cmp %ecx, 8(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 12(%rsi), %ecx
cmp %ecx, 12(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rdi), %xmm1
movdqu 32(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
mov 48(%rsi), %ecx
cmp %ecx, 48(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 52(%rsi), %ecx
cmp %ecx, 52(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 56(%rsi), %ecx
cmp %ecx, 56(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 60(%rsi), %ecx
cmp %ecx, 60(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
add $64, %rsi
add $64, %rdi
jmp L(continue_0_48)
.p2align 4
L(continue_00):
and $15, %ch
jz L(continue_00_00)
cmp $16, %eax
jb L(continue_00_0)
cmp $32, %eax
jb L(continue_00_16)
cmp $48, %eax
jb L(continue_00_32)
.p2align 4
L(continue_00_48):
pcmpeqd (%rdi), %xmm0
mov (%rdi), %eax
pmovmskb %xmm0, %ecx
test %ecx, %ecx
jnz L(less4_double_words1)
sub (%rsi), %eax
jnz L(return)
mov 4(%rdi), %eax
sub 4(%rsi), %eax
jnz L(return)
mov 8(%rdi), %eax
sub 8(%rsi), %eax
jnz L(return)
mov 12(%rdi), %eax
sub 12(%rsi), %eax
jnz L(return)
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
movdqu 48(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
add $64, %rsi
add $64, %rdi
jmp L(continue_00_48)
.p2align 4
L(continue_32):
and $15, %ch
jz L(continue_32_00)
cmp $16, %eax
jb L(continue_0_32)
cmp $32, %eax
jb L(continue_16_32)
cmp $48, %eax
jb L(continue_32_32)
.p2align 4
L(continue_32_48):
mov (%rsi), %ecx
cmp %ecx, (%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 4(%rsi), %ecx
cmp %ecx, 4(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 8(%rsi), %ecx
cmp %ecx, 8(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 12(%rsi), %ecx
cmp %ecx, 12(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 16(%rsi), %ecx
cmp %ecx, 16(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 20(%rsi), %ecx
cmp %ecx, 20(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 24(%rsi), %ecx
cmp %ecx, 24(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 28(%rsi), %ecx
cmp %ecx, 28(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
movdqu 32(%rdi), %xmm1
movdqu 32(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
movdqu 48(%rdi), %xmm1
movdqu 48(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
add $64, %rsi
add $64, %rdi
jmp L(continue_32_48)
.p2align 4
L(continue_16):
and $15, %ch
jz L(continue_16_00)
cmp $16, %eax
jb L(continue_0_16)
cmp $32, %eax
jb L(continue_16_16)
cmp $48, %eax
jb L(continue_16_32)
.p2align 4
L(continue_16_48):
mov (%rsi), %ecx
cmp %ecx, (%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 4(%rsi), %ecx
cmp %ecx, 4(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 8(%rsi), %ecx
cmp %ecx, 8(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 12(%rsi), %ecx
cmp %ecx, 12(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
mov 32(%rsi), %ecx
cmp %ecx, 32(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 36(%rsi), %ecx
cmp %ecx, 36(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 40(%rsi), %ecx
cmp %ecx, 40(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 44(%rsi), %ecx
cmp %ecx, 44(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
movdqu 48(%rdi), %xmm1
movdqu 48(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
add $64, %rsi
add $64, %rdi
jmp L(continue_16_48)
.p2align 4
L(continue_00_00):
movdqa (%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqa 16(%rdi), %xmm3
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
pmovmskb %xmm3, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqa 32(%rdi), %xmm5
pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm5 /* packed sub of comparison results*/
pmovmskb %xmm5, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
movdqa 48(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
add $64, %rsi
add $64, %rdi
jmp L(continue_00_00)
.p2align 4
L(continue_00_32):
movdqu (%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
add $16, %rsi
add $16, %rdi
jmp L(continue_00_48)
.p2align 4
L(continue_00_16):
movdqu (%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
add $32, %rsi
add $32, %rdi
jmp L(continue_00_48)
.p2align 4
L(continue_00_0):
movdqu (%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rsi), %xmm2
pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
add $48, %rsi
add $48, %rdi
jmp L(continue_00_48)
.p2align 4
L(continue_48_00):
pcmpeqd (%rsi), %xmm0
mov (%rdi), %eax
pmovmskb %xmm0, %ecx
test %ecx, %ecx
jnz L(less4_double_words1)
sub (%rsi), %eax
jnz L(return)
mov 4(%rdi), %eax
sub 4(%rsi), %eax
jnz L(return)
mov 8(%rdi), %eax
sub 8(%rsi), %eax
jnz L(return)
mov 12(%rdi), %eax
sub 12(%rsi), %eax
jnz L(return)
movdqu 16(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
movdqu 48(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_48)
add $64, %rsi
add $64, %rdi
jmp L(continue_48_00)
.p2align 4
L(continue_32_00):
movdqu (%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
add $16, %rsi
add $16, %rdi
jmp L(continue_48_00)
.p2align 4
L(continue_16_00):
movdqu (%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
add $32, %rsi
add $32, %rdi
jmp L(continue_48_00)
.p2align 4
L(continue_0_00):
movdqu (%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rdi), %xmm1
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
add $48, %rsi
add $48, %rdi
jmp L(continue_48_00)
.p2align 4
L(continue_32_32):
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
add $16, %rsi
add $16, %rdi
jmp L(continue_48_48)
.p2align 4
L(continue_16_16):
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rdi), %xmm3
movdqu 16(%rsi), %xmm4
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
pmovmskb %xmm3, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
add $32, %rsi
add $32, %rdi
jmp L(continue_48_48)
.p2align 4
L(continue_0_0):
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rdi), %xmm3
movdqu 16(%rsi), %xmm4
pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm3 /* packed sub of comparison results*/
pmovmskb %xmm3, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
movdqu 32(%rdi), %xmm1
movdqu 32(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_32)
add $48, %rsi
add $48, %rdi
jmp L(continue_48_48)
.p2align 4
L(continue_0_16):
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words_16)
add $32, %rsi
add $32, %rdi
jmp L(continue_32_48)
.p2align 4
L(continue_0_32):
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
add $16, %rsi
add $16, %rdi
jmp L(continue_16_48)
.p2align 4
L(continue_16_32):
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
jnz L(less4_double_words)
add $16, %rsi
add $16, %rdi
jmp L(continue_32_48)
.p2align 4
L(less4_double_words1):
cmp (%rsi), %eax
jne L(nequal)
test %eax, %eax
jz L(equal)
mov 4(%rsi), %ecx
cmp %ecx, 4(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 8(%rsi), %ecx
cmp %ecx, 8(%rdi)
jne L(nequal)
test %ecx, %ecx
jz L(equal)
mov 12(%rsi), %edx
mov 12(%rdi), %eax
sub %edx, %eax
ret
.p2align 4
L(less4_double_words):
test %dl, %dl
jz L(next_two_double_words)
and $15, %dl
jz L(second_double_word)
mov (%rdi), %eax
sub (%rsi), %eax
ret
.p2align 4
L(second_double_word):
mov 4(%rdi), %eax
sub 4(%rsi), %eax
ret
.p2align 4
L(next_two_double_words):
and $15, %dh
jz L(fourth_double_word)
mov 8(%rdi), %eax
sub 8(%rsi), %eax
ret
.p2align 4
L(fourth_double_word):
mov 12(%rdi), %eax
sub 12(%rsi), %eax
ret
.p2align 4
L(less4_double_words_16):
test %dl, %dl
jz L(next_two_double_words_16)
and $15, %dl
jz L(second_double_word_16)
mov 16(%rdi), %eax
sub 16(%rsi), %eax
ret
.p2align 4
L(second_double_word_16):
mov 20(%rdi), %eax
sub 20(%rsi), %eax
ret
.p2align 4
L(next_two_double_words_16):
and $15, %dh
jz L(fourth_double_word_16)
mov 24(%rdi), %eax
sub 24(%rsi), %eax
ret
.p2align 4
L(fourth_double_word_16):
mov 28(%rdi), %eax
sub 28(%rsi), %eax
ret
.p2align 4
L(less4_double_words_32):
test %dl, %dl
jz L(next_two_double_words_32)
and $15, %dl
jz L(second_double_word_32)
mov 32(%rdi), %eax
sub 32(%rsi), %eax
ret
.p2align 4
L(second_double_word_32):
mov 36(%rdi), %eax
sub 36(%rsi), %eax
ret
.p2align 4
L(next_two_double_words_32):
and $15, %dh
jz L(fourth_double_word_32)
mov 40(%rdi), %eax
sub 40(%rsi), %eax
ret
.p2align 4
L(fourth_double_word_32):
mov 44(%rdi), %eax
sub 44(%rsi), %eax
ret
.p2align 4
L(less4_double_words_48):
test %dl, %dl
jz L(next_two_double_words_48)
and $15, %dl
jz L(second_double_word_48)
mov 48(%rdi), %eax
sub 48(%rsi), %eax
ret
.p2align 4
L(second_double_word_48):
mov 52(%rdi), %eax
sub 52(%rsi), %eax
ret
.p2align 4
L(next_two_double_words_48):
and $15, %dh
jz L(fourth_double_word_48)
mov 56(%rdi), %eax
sub 56(%rsi), %eax
ret
.p2align 4
L(fourth_double_word_48):
mov 60(%rdi), %eax
sub 60(%rsi), %eax
ret
.p2align 4
L(return):
ret
.p2align 4
L(nequal):
mov $1, %eax
ja L(nequal_bigger)
neg %eax
L(nequal_bigger):
ret
.p2align 4
L(equal):
xor %rax, %rax
ret
END (wcscmp)
libc_hidden_def (wcscmp)