mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 20:40:05 +00:00
Add unaligned strcmp.
This commit is contained in:
parent
d34202f674
commit
8f02859f17
@ -1,3 +1,12 @@
|
||||
2013-09-03 Ondřej Bílka <neleai@seznam.cz>
|
||||
|
||||
* sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: New file.
|
||||
* sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
|
||||
Add ifunc.
|
||||
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines):
|
||||
Add strcmp-sse2-unaligned
|
||||
* sysdeps/x86_64/multiarch/strcmp.S (strcmp): Add ifunc.
|
||||
|
||||
2013-09-02 Mike Frysinger <vapier@gentoo.org>
|
||||
|
||||
* Versions.def (libc): Add GLIBC_2.19.
|
||||
|
@ -6,8 +6,10 @@ endif
|
||||
|
||||
ifeq ($(subdir),string)
|
||||
|
||||
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
||||
strend-sse4 memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned mempcpy-ssse3 \
|
||||
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
|
||||
strcmp-sse2-unaligned strncmp-ssse3 \
|
||||
strend-sse4 memcmp-sse4 memcpy-ssse3 \
|
||||
memcpy-sse2-unaligned mempcpy-ssse3 \
|
||||
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
||||
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
|
||||
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
|
||||
|
@ -118,6 +118,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL (i, name, strcmp,
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSE4_2, __strcmp_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, HAS_SSSE3, __strcmp_ssse3)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
|
||||
IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcpy.S. */
|
||||
|
210
sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
Normal file
210
sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
Normal file
@ -0,0 +1,210 @@
|
||||
/* strcmp with unaligned loads
|
||||
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "sysdep.h"
|
||||
#define ALIGN(x) .p2align x
|
||||
|
||||
ENTRY ( __strcmp_sse2_unaligned)
|
||||
movl %edi, %eax
|
||||
xorl %edx, %edx
|
||||
pxor %xmm7, %xmm7
|
||||
orl %esi, %eax
|
||||
andl $4095, %eax
|
||||
cmpl $4032, %eax
|
||||
jg L(cross_page)
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqu (%rsi), %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pminub %xmm1, %xmm0
|
||||
pxor %xmm1, %xmm1
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
testq %rax, %rax
|
||||
je L(next_48_bytes)
|
||||
L(return):
|
||||
bsfq %rax, %rdx
|
||||
movzbl (%rdi, %rdx), %eax
|
||||
movzbl (%rsi, %rdx), %edx
|
||||
subl %edx, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(next_48_bytes):
|
||||
movdqu 16(%rdi), %xmm6
|
||||
movdqu 16(%rsi), %xmm3
|
||||
movdqu 32(%rdi), %xmm5
|
||||
pcmpeqb %xmm6, %xmm3
|
||||
movdqu 32(%rsi), %xmm2
|
||||
pminub %xmm6, %xmm3
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
movdqu 48(%rdi), %xmm4
|
||||
pcmpeqb %xmm5, %xmm2
|
||||
pmovmskb %xmm3, %edx
|
||||
movdqu 48(%rsi), %xmm0
|
||||
pminub %xmm5, %xmm2
|
||||
pcmpeqb %xmm1, %xmm2
|
||||
pcmpeqb %xmm4, %xmm0
|
||||
pmovmskb %xmm2, %eax
|
||||
salq $16, %rdx
|
||||
pminub %xmm4, %xmm0
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
salq $32, %rax
|
||||
orq %rdx, %rax
|
||||
pmovmskb %xmm0, %ecx
|
||||
movq %rcx, %rdx
|
||||
salq $48, %rdx
|
||||
orq %rdx, %rax
|
||||
jne L(return)
|
||||
L(main_loop_header):
|
||||
leaq 64(%rdi), %rdx
|
||||
movl $4096, %ecx
|
||||
pxor %xmm9, %xmm9
|
||||
andq $-64, %rdx
|
||||
subq %rdi, %rdx
|
||||
leaq (%rdi, %rdx), %rax
|
||||
addq %rsi, %rdx
|
||||
movq %rdx, %rsi
|
||||
andl $4095, %esi
|
||||
subq %rsi, %rcx
|
||||
shrq $6, %rcx
|
||||
movq %rcx, %rsi
|
||||
jmp L(loop_start)
|
||||
|
||||
ALIGN (4)
|
||||
L(loop):
|
||||
addq $64, %rax
|
||||
addq $64, %rdx
|
||||
L(loop_start):
|
||||
testq %rsi, %rsi
|
||||
leaq -1(%rsi), %rsi
|
||||
je L(loop_cross_page)
|
||||
L(back_to_loop):
|
||||
movdqu (%rdx), %xmm0
|
||||
movdqu 16(%rdx), %xmm1
|
||||
movdqa (%rax), %xmm2
|
||||
movdqa 16(%rax), %xmm3
|
||||
pcmpeqb %xmm2, %xmm0
|
||||
movdqu 32(%rdx), %xmm5
|
||||
pcmpeqb %xmm3, %xmm1
|
||||
pminub %xmm2, %xmm0
|
||||
movdqu 48(%rdx), %xmm6
|
||||
pminub %xmm3, %xmm1
|
||||
movdqa 32(%rax), %xmm2
|
||||
pminub %xmm1, %xmm0
|
||||
movdqa 48(%rax), %xmm3
|
||||
pcmpeqb %xmm2, %xmm5
|
||||
pcmpeqb %xmm3, %xmm6
|
||||
pminub %xmm2, %xmm5
|
||||
pminub %xmm3, %xmm6
|
||||
pminub %xmm5, %xmm0
|
||||
pminub %xmm6, %xmm0
|
||||
pcmpeqb %xmm7, %xmm0
|
||||
pmovmskb %xmm0, %ecx
|
||||
testl %ecx, %ecx
|
||||
je L(loop)
|
||||
pcmpeqb %xmm7, %xmm5
|
||||
movdqu (%rdx), %xmm0
|
||||
pcmpeqb %xmm7, %xmm1
|
||||
movdqa (%rax), %xmm2
|
||||
pcmpeqb %xmm2, %xmm0
|
||||
pminub %xmm2, %xmm0
|
||||
pcmpeqb %xmm7, %xmm6
|
||||
pcmpeqb %xmm7, %xmm0
|
||||
pmovmskb %xmm1, %ecx
|
||||
pmovmskb %xmm5, %r8d
|
||||
pmovmskb %xmm0, %edi
|
||||
salq $16, %rcx
|
||||
salq $32, %r8
|
||||
pmovmskb %xmm6, %esi
|
||||
orq %r8, %rcx
|
||||
orq %rdi, %rcx
|
||||
salq $48, %rsi
|
||||
orq %rsi, %rcx
|
||||
bsfq %rcx, %rcx
|
||||
movzbl (%rax, %rcx), %eax
|
||||
movzbl (%rdx, %rcx), %edx
|
||||
subl %edx, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(loop_cross_page):
|
||||
xor %r10, %r10
|
||||
movq %rdx, %r9
|
||||
and $63, %r9
|
||||
subq %r9, %r10
|
||||
|
||||
movdqa (%rdx, %r10), %xmm0
|
||||
movdqa 16(%rdx, %r10), %xmm1
|
||||
movdqu (%rax, %r10), %xmm2
|
||||
movdqu 16(%rax, %r10), %xmm3
|
||||
pcmpeqb %xmm2, %xmm0
|
||||
movdqa 32(%rdx, %r10), %xmm5
|
||||
pcmpeqb %xmm3, %xmm1
|
||||
pminub %xmm2, %xmm0
|
||||
movdqa 48(%rdx, %r10), %xmm6
|
||||
pminub %xmm3, %xmm1
|
||||
movdqu 32(%rax, %r10), %xmm2
|
||||
movdqu 48(%rax, %r10), %xmm3
|
||||
pcmpeqb %xmm2, %xmm5
|
||||
pcmpeqb %xmm3, %xmm6
|
||||
pminub %xmm2, %xmm5
|
||||
pminub %xmm3, %xmm6
|
||||
|
||||
pcmpeqb %xmm7, %xmm0
|
||||
pcmpeqb %xmm7, %xmm1
|
||||
pcmpeqb %xmm7, %xmm5
|
||||
pcmpeqb %xmm7, %xmm6
|
||||
|
||||
pmovmskb %xmm1, %ecx
|
||||
pmovmskb %xmm5, %r8d
|
||||
pmovmskb %xmm0, %edi
|
||||
salq $16, %rcx
|
||||
salq $32, %r8
|
||||
pmovmskb %xmm6, %esi
|
||||
orq %r8, %rdi
|
||||
orq %rcx, %rdi
|
||||
salq $48, %rsi
|
||||
orq %rsi, %rdi
|
||||
movq %r9, %rcx
|
||||
movq $63, %rsi
|
||||
shrq %cl, %rdi
|
||||
test %rdi, %rdi
|
||||
je L(back_to_loop)
|
||||
bsfq %rdi, %rcx
|
||||
movzbl (%rax, %rcx), %eax
|
||||
movzbl (%rdx, %rcx), %edx
|
||||
subl %edx, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(cross_page_loop):
|
||||
cmpb %cl, %al
|
||||
jne L(different)
|
||||
addq $1, %rdx
|
||||
cmpq $64, %rdx
|
||||
je L(main_loop_header)
|
||||
L(cross_page):
|
||||
movzbl (%rdi, %rdx), %eax
|
||||
movzbl (%rsi, %rdx), %ecx
|
||||
testb %al, %al
|
||||
jne L(cross_page_loop)
|
||||
xorl %eax, %eax
|
||||
L(different):
|
||||
subl %ecx, %eax
|
||||
ret
|
||||
END (__strcmp_sse2_unaligned)
|
@ -66,6 +66,7 @@
|
||||
# define STRCMP_SSE2 __strncasecmp_l_sse2
|
||||
# define __GI_STRCMP __GI___strncasecmp_l
|
||||
#else
|
||||
# define USE_AS_STRCMP
|
||||
# define UPDATE_STRNCMP_COUNTER
|
||||
# ifndef STRCMP
|
||||
# define STRCMP strcmp
|
||||
@ -88,11 +89,17 @@ ENTRY(STRCMP)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1:
|
||||
#ifdef USE_AS_STRCMP
|
||||
leaq __strcmp_sse2_unaligned(%rip), %rax
|
||||
testl $bit_Fast_Unaligned_Load, __cpu_features+CPUID_OFFSET+index_Fast_Unaligned_Load(%rip)
|
||||
jnz 3f
|
||||
#else
|
||||
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
|
||||
jnz 2f
|
||||
leaq STRCMP_SSE42(%rip), %rax
|
||||
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
||||
jnz 3f
|
||||
#endif
|
||||
2: leaq STRCMP_SSSE3(%rip), %rax
|
||||
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||
jnz 3f
|
||||
|
Loading…
Reference in New Issue
Block a user