Add unaligned strcmp.

This commit is contained in:
Ondřej Bílka 2013-09-03 16:21:38 +02:00
parent d34202f674
commit 8f02859f17
5 changed files with 231 additions and 2 deletions

View File

@ -1,3 +1,12 @@
2013-09-03 Ondřej Bílka <neleai@seznam.cz>
* sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: New file.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
Add ifunc.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines):
Add strcmp-sse2-unaligned
* sysdeps/x86_64/multiarch/strcmp.S (strcmp): Add ifunc.
2013-09-02 Mike Frysinger <vapier@gentoo.org>
* Versions.def (libc): Add GLIBC_2.19.

View File

@ -6,8 +6,10 @@ endif
ifeq ($(subdir),string)
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned mempcpy-ssse3 \
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcmp-sse2-unaligned strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 \
memcpy-sse2-unaligned mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\

View File

@ -118,6 +118,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strcmp,
IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSE4_2, __strcmp_sse42)
IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSSE3, __strcmp_ssse3)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
/* Support sysdeps/x86_64/multiarch/strcpy.S. */

View File

@ -0,0 +1,210 @@
/* strcmp with unaligned loads
Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include "sysdep.h"
#define ALIGN(x) .p2align x
ENTRY ( __strcmp_sse2_unaligned)
movl %edi, %eax
xorl %edx, %edx
pxor %xmm7, %xmm7
orl %esi, %eax
andl $4095, %eax
cmpl $4032, %eax
jg L(cross_page)
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm0
pcmpeqb %xmm1, %xmm0
pminub %xmm1, %xmm0
pxor %xmm1, %xmm1
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
testq %rax, %rax
je L(next_48_bytes)
L(return):
bsfq %rax, %rdx
movzbl (%rdi, %rdx), %eax
movzbl (%rsi, %rdx), %edx
subl %edx, %eax
ret
ALIGN (4)
L(next_48_bytes):
movdqu 16(%rdi), %xmm6
movdqu 16(%rsi), %xmm3
movdqu 32(%rdi), %xmm5
pcmpeqb %xmm6, %xmm3
movdqu 32(%rsi), %xmm2
pminub %xmm6, %xmm3
pcmpeqb %xmm1, %xmm3
movdqu 48(%rdi), %xmm4
pcmpeqb %xmm5, %xmm2
pmovmskb %xmm3, %edx
movdqu 48(%rsi), %xmm0
pminub %xmm5, %xmm2
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm4, %xmm0
pmovmskb %xmm2, %eax
salq $16, %rdx
pminub %xmm4, %xmm0
pcmpeqb %xmm1, %xmm0
salq $32, %rax
orq %rdx, %rax
pmovmskb %xmm0, %ecx
movq %rcx, %rdx
salq $48, %rdx
orq %rdx, %rax
jne L(return)
L(main_loop_header):
leaq 64(%rdi), %rdx
movl $4096, %ecx
pxor %xmm9, %xmm9
andq $-64, %rdx
subq %rdi, %rdx
leaq (%rdi, %rdx), %rax
addq %rsi, %rdx
movq %rdx, %rsi
andl $4095, %esi
subq %rsi, %rcx
shrq $6, %rcx
movq %rcx, %rsi
jmp L(loop_start)
ALIGN (4)
L(loop):
addq $64, %rax
addq $64, %rdx
L(loop_start):
testq %rsi, %rsi
leaq -1(%rsi), %rsi
je L(loop_cross_page)
L(back_to_loop):
movdqu (%rdx), %xmm0
movdqu 16(%rdx), %xmm1
movdqa (%rax), %xmm2
movdqa 16(%rax), %xmm3
pcmpeqb %xmm2, %xmm0
movdqu 32(%rdx), %xmm5
pcmpeqb %xmm3, %xmm1
pminub %xmm2, %xmm0
movdqu 48(%rdx), %xmm6
pminub %xmm3, %xmm1
movdqa 32(%rax), %xmm2
pminub %xmm1, %xmm0
movdqa 48(%rax), %xmm3
pcmpeqb %xmm2, %xmm5
pcmpeqb %xmm3, %xmm6
pminub %xmm2, %xmm5
pminub %xmm3, %xmm6
pminub %xmm5, %xmm0
pminub %xmm6, %xmm0
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %ecx
testl %ecx, %ecx
je L(loop)
pcmpeqb %xmm7, %xmm5
movdqu (%rdx), %xmm0
pcmpeqb %xmm7, %xmm1
movdqa (%rax), %xmm2
pcmpeqb %xmm2, %xmm0
pminub %xmm2, %xmm0
pcmpeqb %xmm7, %xmm6
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm1, %ecx
pmovmskb %xmm5, %r8d
pmovmskb %xmm0, %edi
salq $16, %rcx
salq $32, %r8
pmovmskb %xmm6, %esi
orq %r8, %rcx
orq %rdi, %rcx
salq $48, %rsi
orq %rsi, %rcx
bsfq %rcx, %rcx
movzbl (%rax, %rcx), %eax
movzbl (%rdx, %rcx), %edx
subl %edx, %eax
ret
ALIGN (4)
L(loop_cross_page):
xor %r10, %r10
movq %rdx, %r9
and $63, %r9
subq %r9, %r10
movdqa (%rdx, %r10), %xmm0
movdqa 16(%rdx, %r10), %xmm1
movdqu (%rax, %r10), %xmm2
movdqu 16(%rax, %r10), %xmm3
pcmpeqb %xmm2, %xmm0
movdqa 32(%rdx, %r10), %xmm5
pcmpeqb %xmm3, %xmm1
pminub %xmm2, %xmm0
movdqa 48(%rdx, %r10), %xmm6
pminub %xmm3, %xmm1
movdqu 32(%rax, %r10), %xmm2
movdqu 48(%rax, %r10), %xmm3
pcmpeqb %xmm2, %xmm5
pcmpeqb %xmm3, %xmm6
pminub %xmm2, %xmm5
pminub %xmm3, %xmm6
pcmpeqb %xmm7, %xmm0
pcmpeqb %xmm7, %xmm1
pcmpeqb %xmm7, %xmm5
pcmpeqb %xmm7, %xmm6
pmovmskb %xmm1, %ecx
pmovmskb %xmm5, %r8d
pmovmskb %xmm0, %edi
salq $16, %rcx
salq $32, %r8
pmovmskb %xmm6, %esi
orq %r8, %rdi
orq %rcx, %rdi
salq $48, %rsi
orq %rsi, %rdi
movq %r9, %rcx
movq $63, %rsi
shrq %cl, %rdi
test %rdi, %rdi
je L(back_to_loop)
bsfq %rdi, %rcx
movzbl (%rax, %rcx), %eax
movzbl (%rdx, %rcx), %edx
subl %edx, %eax
ret
ALIGN (4)
L(cross_page_loop):
cmpb %cl, %al
jne L(different)
addq $1, %rdx
cmpq $64, %rdx
je L(main_loop_header)
L(cross_page):
movzbl (%rdi, %rdx), %eax
movzbl (%rsi, %rdx), %ecx
testb %al, %al
jne L(cross_page_loop)
xorl %eax, %eax
L(different):
subl %ecx, %eax
ret
END (__strcmp_sse2_unaligned)

View File

@ -66,6 +66,7 @@
# define STRCMP_SSE2 __strncasecmp_l_sse2
# define __GI_STRCMP __GI___strncasecmp_l
#else
# define USE_AS_STRCMP
# define UPDATE_STRNCMP_COUNTER
# ifndef STRCMP
# define STRCMP strcmp
@ -88,11 +89,17 @@ ENTRY(STRCMP)
jne 1f
call __init_cpu_features
1:
#ifdef USE_AS_STRCMP
leaq __strcmp_sse2_unaligned(%rip), %rax
testl $bit_Fast_Unaligned_Load, __cpu_features+CPUID_OFFSET+index_Fast_Unaligned_Load(%rip)
jnz 3f
#else
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
jnz 2f
leaq STRCMP_SSE42(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
jnz 3f
#endif
2: leaq STRCMP_SSSE3(%rip), %rax
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
jnz 3f