mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 02:40:08 +00:00
Optimized memcmp and wmemcmp for x86-64 and x86-32
This commit is contained in:
parent
556a200797
commit
be13f7bff6
29
ChangeLog
29
ChangeLog
@ -1,3 +1,32 @@
|
||||
2011-09-27 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
|
||||
|
||||
* sysdeps/x86_64/multiarch/Makefile: (sysdep_routines): Add
|
||||
memcmp-ssse3 wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
|
||||
* sysdeps/x86_64/multiarch/memcmp-ssse3: New file.
|
||||
* sysdeps/x86_64/multiarch/memcmp.S: Update. Add __memcmp_ssse3.
|
||||
* sysdeps/x86_64/multiarch/memcmp-sse4.S: Update.
|
||||
(USE_AS_WMEMCMP): New macro.
|
||||
Fixing indents.
|
||||
* sysdeps/x86_64/multiarch/wmemcmp.S: New file.
|
||||
* sysdeps/x86_64/multiarch/wmemcmp-ssse3.S: New file.
|
||||
* sysdeps/x86_64/multiarch/wmemcmp-sse4.S: New file.
|
||||
* sysdeps/x86_64/multiarch/wmemcmp-c.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
|
||||
wmemcmp-ssse3 wmemcmp-sse4 wmemcmp-c
|
||||
* sysdeps/i386/i686/multiarch/wmemcmp.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/wmemcmp-c.c: New file.
|
||||
* sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/wmemcmp-sse4.S: New file.
|
||||
* sysdeps/i386/i686/multiarch/memcmp-sse4.S: Update.
|
||||
(USE_AS_WMEMCMP): New macro.
|
||||
* sysdeps/i386/i686/multiarch/memcmp-ssse3: Likewise.
|
||||
* sysdeps/string/test-memcmp.c: Update.
|
||||
Fix simple_wmemcmp.
|
||||
Add new tests.
|
||||
* wcsmbs/wmemcmp.c: Update.
|
||||
(WMEMCMP): New macro.
|
||||
Fix overflow bug.
|
||||
|
||||
2011-10-12 Andreas Jaeger <aj@suse.de>
|
||||
|
||||
[BZ #13268]
|
||||
|
2
NEWS
2
NEWS
@ -33,7 +33,7 @@ Version 2.15
|
||||
* Optimized strchr and strrchr for SSE on x86-32.
|
||||
Contributed by Liubov Dmitrieva.
|
||||
|
||||
* Optimized memchr, memrchr, rawmemchr for x86-64 and x86-32.
|
||||
* Optimized memchr, memrchr, rawmemchr, memcmp, wmemcmp for x86-64 and x86-32.
|
||||
Contributed by Liubov Dmitrieva.
|
||||
|
||||
* New interfaces: scandirat, scandirat64
|
||||
|
@ -29,9 +29,21 @@
|
||||
# define MEMCPY wmemcpy
|
||||
# define SIMPLE_MEMCMP simple_wmemcmp
|
||||
# define CHAR wchar_t
|
||||
# define MAX_CHAR 256000
|
||||
# define UCHAR uint32_t
|
||||
# define UCHAR wchar_t
|
||||
# define CHARBYTES 4
|
||||
# define CHAR__MIN WCHAR_MIN
|
||||
# define CHAR__MAX WCHAR_MAX
|
||||
int
|
||||
simple_wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n)
|
||||
{
|
||||
int ret = 0;
|
||||
/* Warning!
|
||||
wmemcmp has to use SIGNED comparison for elements.
|
||||
memcmp has to use UNSIGNED comparison for elemnts.
|
||||
*/
|
||||
while (n-- && (ret = *s1 < *s2 ? -1 : *s1 == *s2 ? 0 : 1) == 0) {s1++; s2++;}
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
# define MEMCMP memcmp
|
||||
# define MEMCPY memcpy
|
||||
@ -40,18 +52,20 @@
|
||||
# define MAX_CHAR 255
|
||||
# define UCHAR unsigned char
|
||||
# define CHARBYTES 1
|
||||
#endif
|
||||
|
||||
typedef int (*proto_t) (const CHAR *, const CHAR *, size_t);
|
||||
# define CHAR__MIN CHAR_MIN
|
||||
# define CHAR__MAX CHAR_MAX
|
||||
|
||||
int
|
||||
SIMPLE_MEMCMP (const CHAR *s1, const CHAR *s2, size_t n)
|
||||
simple_memcmp (const char *s1, const char *s2, size_t n)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
while (n-- && (ret = *(UCHAR *) s1++ - *(UCHAR *) s2++) == 0);
|
||||
while (n-- && (ret = *(unsigned char *) s1++ - *(unsigned char *) s2++) == 0);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef int (*proto_t) (const CHAR *, const CHAR *, size_t);
|
||||
|
||||
IMPL (SIMPLE_MEMCMP, 0)
|
||||
IMPL (MEMCMP, 1)
|
||||
@ -121,7 +135,7 @@ do_test (size_t align1, size_t align2, size_t len, int exp_result)
|
||||
s2 = (CHAR *) (buf2 + align2);
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % MAX_CHAR;
|
||||
s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % CHAR__MAX;
|
||||
|
||||
s1[len] = align1;
|
||||
s2[len] = align2;
|
||||
@ -412,8 +426,8 @@ check1 (void)
|
||||
s2[99] = 1;
|
||||
s1[100] = 116;
|
||||
s2[100] = 116;
|
||||
s1[101] = -13;
|
||||
s2[101] = -13;
|
||||
s1[101] = CHAR__MIN;
|
||||
s2[101] = CHAR__MAX;
|
||||
s1[102] = -109;
|
||||
s2[102] = -109;
|
||||
s1[103] = 1;
|
||||
@ -434,8 +448,8 @@ check1 (void)
|
||||
s2[110] = -109;
|
||||
s1[111] = 1;
|
||||
s2[111] = 1;
|
||||
s1[112] = 20;
|
||||
s2[112] = 20;
|
||||
s1[112] = CHAR__MAX;
|
||||
s2[112] = CHAR__MIN;
|
||||
s1[113] = -13;
|
||||
s2[113] = -13;
|
||||
s1[114] = -109;
|
||||
@ -444,9 +458,12 @@ check1 (void)
|
||||
s2[115] = 1;
|
||||
|
||||
n = 116;
|
||||
exp_result = SIMPLE_MEMCMP (s1, s2, n);
|
||||
FOR_EACH_IMPL (impl, 0)
|
||||
check_result (impl, s1, s2, n, exp_result);
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
|
||||
FOR_EACH_IMPL (impl, 0)
|
||||
check_result (impl, s1 + i, s2 + i, n - i, exp_result);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -17,7 +17,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
|
||||
strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
|
||||
wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \
|
||||
memrchr-sse2 memrchr-sse2-bsf memrchr-c \
|
||||
rawmemchr-sse2 rawmemchr-sse2-bsf
|
||||
rawmemchr-sse2 rawmemchr-sse2-bsf \
|
||||
wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
|
||||
ifeq (yes,$(config-cflags-sse4))
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||
CFLAGS-varshift.c += -msse4
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* memcmp with SSE4.2
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
/* memcmp with SSE4.2, wmemcmp with SSE4.2
|
||||
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
@ -20,84 +20,97 @@
|
||||
|
||||
#ifndef NOT_IN_libc
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
# include <sysdep.h>
|
||||
|
||||
#ifndef MEMCMP
|
||||
# define MEMCMP __memcmp_sse4_2
|
||||
#endif
|
||||
# ifndef MEMCMP
|
||||
# define MEMCMP __memcmp_sse4_2
|
||||
# endif
|
||||
|
||||
#define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
# define CFI_PUSH(REG) \
|
||||
cfi_adjust_cfa_offset (4); \
|
||||
cfi_rel_offset (REG, 0)
|
||||
|
||||
#define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
# define CFI_POP(REG) \
|
||||
cfi_adjust_cfa_offset (-4); \
|
||||
cfi_restore (REG)
|
||||
|
||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
#define POP(REG) popl REG; CFI_POP (REG)
|
||||
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||
# define POP(REG) popl REG; CFI_POP (REG)
|
||||
|
||||
#define PARMS 4
|
||||
#define BLK1 PARMS
|
||||
#define BLK2 BLK1+4
|
||||
#define LEN BLK2+4
|
||||
#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
|
||||
# define PARMS 4
|
||||
# define BLK1 PARMS
|
||||
# define BLK2 BLK1 + 4
|
||||
# define LEN BLK2 + 4
|
||||
# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
|
||||
|
||||
|
||||
#ifdef SHARED
|
||||
# define JMPTBL(I, B) I - B
|
||||
# ifdef SHARED
|
||||
# define JMPTBL(I, B) I - B
|
||||
|
||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||
jump table with relative offsets. INDEX is a register contains the
|
||||
index into the jump table. SCALE is the scale of INDEX. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
/* We first load PC into EBX. */ \
|
||||
call __i686.get_pc_thunk.bx; \
|
||||
/* Get the address of the jump table. */ \
|
||||
addl $(TABLE - .), %ebx; \
|
||||
/* Get the entry and convert the relative offset to the \
|
||||
absolute address. */ \
|
||||
addl (%ebx,INDEX,SCALE), %ebx; \
|
||||
/* We loaded the jump table and adjuested EDX/ESI. Go. */ \
|
||||
jmp *%ebx
|
||||
jump table with relative offsets. INDEX is a register contains the
|
||||
index into the jump table. SCALE is the scale of INDEX. */
|
||||
|
||||
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
|
||||
.globl __i686.get_pc_thunk.bx
|
||||
.hidden __i686.get_pc_thunk.bx
|
||||
ALIGN (4)
|
||||
.type __i686.get_pc_thunk.bx,@function
|
||||
__i686.get_pc_thunk.bx:
|
||||
movl (%esp), %ebx
|
||||
ret
|
||||
#else
|
||||
# define JMPTBL(I, B) I
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
/* We first load PC into EBX. */ \
|
||||
call __i686.get_pc_thunk.bx; \
|
||||
/* Get the address of the jump table. */ \
|
||||
addl $(TABLE - .), %ebx; \
|
||||
/* Get the entry and convert the relative offset to the \
|
||||
absolute address. */ \
|
||||
addl (%ebx,INDEX,SCALE), %ebx; \
|
||||
/* We loaded the jump table and adjuested EDX/ESI. Go. */ \
|
||||
jmp *%ebx
|
||||
# else
|
||||
# define JMPTBL(I, B) I
|
||||
|
||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||
jump table with relative offsets. INDEX is a register contains the
|
||||
index into the jump table. SCALE is the scale of INDEX. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
jmp *TABLE(,INDEX,SCALE)
|
||||
#endif
|
||||
jump table with relative offsets. INDEX is a register contains the
|
||||
index into the jump table. SCALE is the scale of INDEX. */
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
jmp *TABLE(,INDEX,SCALE)
|
||||
# endif
|
||||
|
||||
|
||||
/* Warning!
|
||||
wmemcmp has to use SIGNED comparison for elements.
|
||||
memcmp has to use UNSIGNED comparison for elemnts.
|
||||
*/
|
||||
|
||||
.section .text.sse4.2,"ax",@progbits
|
||||
ENTRY (MEMCMP)
|
||||
movl BLK1(%esp), %eax
|
||||
movl BLK2(%esp), %edx
|
||||
movl LEN(%esp), %ecx
|
||||
|
||||
# ifdef USE_AS_WMEMCMP
|
||||
shl $2, %ecx
|
||||
test %ecx, %ecx
|
||||
jz L(return0)
|
||||
# else
|
||||
cmp $1, %ecx
|
||||
jbe L(less1bytes)
|
||||
# endif
|
||||
|
||||
pxor %xmm0, %xmm0
|
||||
cmp $64, %ecx
|
||||
ja L(64bytesormore)
|
||||
cmp $8, %ecx
|
||||
PUSH (%ebx)
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
PUSH (%ebx)
|
||||
jb L(less8bytes)
|
||||
# else
|
||||
jb L(less8bytes)
|
||||
PUSH (%ebx)
|
||||
# endif
|
||||
|
||||
add %ecx, %edx
|
||||
add %ecx, %eax
|
||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(less8bytes):
|
||||
mov (%eax), %bl
|
||||
cmpb (%edx), %bl
|
||||
@ -141,22 +154,49 @@ L(less8bytes):
|
||||
mov 6(%eax), %bl
|
||||
cmpb 6(%edx), %bl
|
||||
je L(0bytes)
|
||||
|
||||
L(nonzero):
|
||||
POP (%ebx)
|
||||
POP (%ebx)
|
||||
mov $1, %eax
|
||||
ja L(above)
|
||||
neg %eax
|
||||
L(above):
|
||||
ret
|
||||
CFI_PUSH (%ebx)
|
||||
# endif
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(0bytes):
|
||||
POP (%ebx)
|
||||
POP (%ebx)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
# ifdef USE_AS_WMEMCMP
|
||||
|
||||
/* for wmemcmp, case N == 1 */
|
||||
|
||||
.p2align 4
|
||||
L(less8bytes):
|
||||
mov (%eax), %ecx
|
||||
cmp (%edx), %ecx
|
||||
je L(return0)
|
||||
mov $1, %eax
|
||||
jg L(find_diff_bigger)
|
||||
neg %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(find_diff_bigger):
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(return0):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
# endif
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(less1bytes):
|
||||
jb L(0bytesend)
|
||||
movzbl (%eax), %eax
|
||||
@ -164,14 +204,14 @@ L(less1bytes):
|
||||
sub %edx, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(0bytesend):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
# endif
|
||||
.p2align 4
|
||||
L(64bytesormore):
|
||||
PUSH (%ebx)
|
||||
PUSH (%ebx)
|
||||
mov %ecx, %ebx
|
||||
mov $64, %ecx
|
||||
sub $64, %ebx
|
||||
@ -208,7 +248,14 @@ L(64bytesormore_loop):
|
||||
add %ecx, %eax
|
||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
||||
|
||||
ALIGN (4)
|
||||
# ifdef USE_AS_WMEMCMP
|
||||
|
||||
/* Label needs only for table_64bytes filling */
|
||||
L(unreal_case):
|
||||
/* no code here */
|
||||
|
||||
# endif
|
||||
.p2align 4
|
||||
L(find_16diff):
|
||||
sub $16, %ecx
|
||||
L(find_32diff):
|
||||
@ -218,9 +265,9 @@ L(find_48diff):
|
||||
L(find_64diff):
|
||||
add %ecx, %edx
|
||||
add %ecx, %eax
|
||||
jmp L(16bytes)
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(16bytes):
|
||||
mov -16(%eax), %ecx
|
||||
mov -16(%edx), %ebx
|
||||
@ -243,8 +290,30 @@ L(4bytes):
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
# else
|
||||
.p2align 4
|
||||
L(16bytes):
|
||||
mov -16(%eax), %ecx
|
||||
cmp -16(%edx), %ecx
|
||||
jne L(find_diff)
|
||||
L(12bytes):
|
||||
mov -12(%eax), %ecx
|
||||
cmp -12(%edx), %ecx
|
||||
jne L(find_diff)
|
||||
L(8bytes):
|
||||
mov -8(%eax), %ecx
|
||||
cmp -8(%edx), %ecx
|
||||
jne L(find_diff)
|
||||
L(4bytes):
|
||||
mov -4(%eax), %ecx
|
||||
cmp -4(%edx), %ecx
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
# endif
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(49bytes):
|
||||
movdqu -49(%eax), %xmm1
|
||||
movdqu -49(%edx), %xmm2
|
||||
@ -285,7 +354,7 @@ L(5bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(50bytes):
|
||||
mov $-50, %ebx
|
||||
movdqu -50(%eax), %xmm1
|
||||
@ -330,7 +399,7 @@ L(2bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(51bytes):
|
||||
mov $-51, %ebx
|
||||
movdqu -51(%eax), %xmm1
|
||||
@ -378,8 +447,8 @@ L(1bytes):
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# endif
|
||||
.p2align 4
|
||||
L(52bytes):
|
||||
movdqu -52(%eax), %xmm1
|
||||
movdqu -52(%edx), %xmm2
|
||||
@ -402,13 +471,18 @@ L(20bytes):
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -4(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -4(%edx), %ecx
|
||||
# endif
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(53bytes):
|
||||
movdqu -53(%eax), %xmm1
|
||||
movdqu -53(%edx), %xmm2
|
||||
@ -440,7 +514,7 @@ L(21bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(54bytes):
|
||||
movdqu -54(%eax), %xmm1
|
||||
movdqu -54(%edx), %xmm2
|
||||
@ -476,7 +550,7 @@ L(22bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(55bytes):
|
||||
movdqu -55(%eax), %xmm1
|
||||
movdqu -55(%edx), %xmm2
|
||||
@ -513,8 +587,8 @@ L(23bytes):
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# endif
|
||||
.p2align 4
|
||||
L(56bytes):
|
||||
movdqu -56(%eax), %xmm1
|
||||
movdqu -56(%edx), %xmm2
|
||||
@ -538,18 +612,27 @@ L(24bytes):
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -8(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -8(%edx), %ecx
|
||||
# endif
|
||||
jne L(find_diff)
|
||||
|
||||
mov -4(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -4(%edx), %ecx
|
||||
# endif
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(57bytes):
|
||||
movdqu -57(%eax), %xmm1
|
||||
movdqu -57(%edx), %xmm2
|
||||
@ -585,7 +668,7 @@ L(25bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(58bytes):
|
||||
movdqu -58(%eax), %xmm1
|
||||
movdqu -58(%edx), %xmm2
|
||||
@ -627,7 +710,7 @@ L(26bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(59bytes):
|
||||
movdqu -59(%eax), %xmm1
|
||||
movdqu -59(%edx), %xmm2
|
||||
@ -668,8 +751,8 @@ L(27bytes):
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# endif
|
||||
.p2align 4
|
||||
L(60bytes):
|
||||
movdqu -60(%eax), %xmm1
|
||||
movdqu -60(%edx), %xmm2
|
||||
@ -691,22 +774,38 @@ L(28bytes):
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -12(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -12(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -12(%edx), %ecx
|
||||
# endif
|
||||
jne L(find_diff)
|
||||
|
||||
mov -8(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -8(%edx), %ecx
|
||||
# endif
|
||||
jne L(find_diff)
|
||||
|
||||
mov -4(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -4(%edx), %ecx
|
||||
# endif
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(61bytes):
|
||||
movdqu -61(%eax), %xmm1
|
||||
movdqu -61(%edx), %xmm2
|
||||
@ -749,7 +848,7 @@ L(29bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(62bytes):
|
||||
movdqu -62(%eax), %xmm1
|
||||
movdqu -62(%edx), %xmm2
|
||||
@ -792,7 +891,7 @@ L(30bytes):
|
||||
jne L(end)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(63bytes):
|
||||
movdqu -63(%eax), %xmm1
|
||||
movdqu -63(%edx), %xmm2
|
||||
@ -838,8 +937,9 @@ L(31bytes):
|
||||
mov $0, %eax
|
||||
jne L(end)
|
||||
RETURN
|
||||
# endif
|
||||
|
||||
ALIGN (4)
|
||||
.p2align 4
|
||||
L(64bytes):
|
||||
movdqu -64(%eax), %xmm1
|
||||
movdqu -64(%edx), %xmm2
|
||||
@ -863,28 +963,45 @@ L(32bytes):
|
||||
jnc L(less16bytes)
|
||||
|
||||
mov -16(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -16(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -16(%edx), %ecx
|
||||
# endif
|
||||
jne L(find_diff)
|
||||
|
||||
mov -12(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -12(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -12(%edx), %ecx
|
||||
# endif
|
||||
jne L(find_diff)
|
||||
|
||||
mov -8(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -8(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -8(%edx), %ecx
|
||||
# endif
|
||||
jne L(find_diff)
|
||||
|
||||
mov -4(%eax), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%edx), %ebx
|
||||
cmp %ebx, %ecx
|
||||
# else
|
||||
cmp -4(%edx), %ecx
|
||||
# endif
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
|
||||
ALIGN (4)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
.p2align 4
|
||||
L(less16bytes):
|
||||
add %ebx, %eax
|
||||
add %ebx, %edx
|
||||
@ -910,9 +1027,35 @@ L(less16bytes):
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
# else
|
||||
.p2align 4
|
||||
L(less16bytes):
|
||||
add %ebx, %eax
|
||||
add %ebx, %edx
|
||||
|
||||
ALIGN (4)
|
||||
mov (%eax), %ecx
|
||||
cmp (%edx), %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov 4(%eax), %ecx
|
||||
cmp 4(%edx), %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov 8(%eax), %ecx
|
||||
cmp 8(%edx), %ecx
|
||||
jne L(find_diff)
|
||||
|
||||
mov 12(%eax), %ecx
|
||||
cmp 12(%edx), %ecx
|
||||
|
||||
mov $0, %eax
|
||||
jne L(find_diff)
|
||||
RETURN
|
||||
# endif
|
||||
|
||||
.p2align 4
|
||||
L(find_diff):
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
cmpb %bl, %cl
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
@ -923,17 +1066,29 @@ L(find_diff):
|
||||
jne L(end)
|
||||
cmp %bx, %cx
|
||||
L(end):
|
||||
POP (%ebx)
|
||||
POP (%ebx)
|
||||
mov $1, %eax
|
||||
ja L(bigger)
|
||||
neg %eax
|
||||
L(bigger):
|
||||
ret
|
||||
# else
|
||||
POP (%ebx)
|
||||
mov $1, %eax
|
||||
jg L(bigger)
|
||||
neg %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(bigger):
|
||||
ret
|
||||
# endif
|
||||
END (MEMCMP)
|
||||
|
||||
.section .rodata.sse4.2,"a",@progbits
|
||||
ALIGN (2)
|
||||
.p2align 2
|
||||
.type L(table_64bytes), @object
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
L(table_64bytes):
|
||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(1bytes), L(table_64bytes))
|
||||
@ -1000,5 +1155,72 @@ L(table_64bytes):
|
||||
.int JMPTBL (L(62bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(63bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||
.size L(table_64bytes), .-L(table_64bytes)
|
||||
# else
|
||||
L(table_64bytes):
|
||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(4bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(8bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(12bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(16bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(20bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(24bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(28bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(32bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(36bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(40bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(44bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(48bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(52bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(56bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(60bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||
# endif
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
5
sysdeps/i386/i686/multiarch/wmemcmp-c.c
Normal file
5
sysdeps/i386/i686/multiarch/wmemcmp-c.c
Normal file
@ -0,0 +1,5 @@
|
||||
#ifndef NOT_IN_libc
|
||||
# define WMEMCMP __wmemcmp_ia32
|
||||
#endif
|
||||
|
||||
#include "wcsmbs/wmemcmp.c"
|
4
sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
Normal file
4
sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
Normal file
@ -0,0 +1,4 @@
|
||||
#define USE_AS_WMEMCMP 1
|
||||
#define MEMCMP __wmemcmp_sse4_2
|
||||
|
||||
#include "memcmp-sse4.S"
|
4
sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
Normal file
4
sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
||||
#define USE_AS_WMEMCMP 1
|
||||
#define MEMCMP __wmemcmp_ssse3
|
||||
|
||||
#include "memcmp-ssse3.S"
|
59
sysdeps/i386/i686/multiarch/wmemcmp.S
Normal file
59
sysdeps/i386/i686/multiarch/wmemcmp.S
Normal file
@ -0,0 +1,59 @@
|
||||
/* Multiple versions of wmemcmp
|
||||
Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <init-arch.h>
|
||||
|
||||
/* Define multiple versions only for the definition in libc. */
|
||||
|
||||
#ifndef NOT_IN_libc
|
||||
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
|
||||
.globl __i686.get_pc_thunk.bx
|
||||
.hidden __i686.get_pc_thunk.bx
|
||||
.p2align 4
|
||||
.type __i686.get_pc_thunk.bx,@function
|
||||
__i686.get_pc_thunk.bx:
|
||||
movl (%esp), %ebx
|
||||
ret
|
||||
|
||||
.text
|
||||
ENTRY(wmemcmp)
|
||||
.type wmemcmp, @gnu_indirect_function
|
||||
pushl %ebx
|
||||
cfi_adjust_cfa_offset (4)
|
||||
cfi_rel_offset (ebx, 0)
|
||||
call __i686.get_pc_thunk.bx
|
||||
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leal __wmemcmp_ia32@GOTOFF(%ebx), %eax
|
||||
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __wmemcmp_ssse3@GOTOFF(%ebx), %eax
|
||||
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||
jz 2f
|
||||
leal __wmemcmp_sse4_2@GOTOFF(%ebx), %eax
|
||||
2: popl %ebx
|
||||
cfi_adjust_cfa_offset (-4)
|
||||
cfi_restore (ebx)
|
||||
ret
|
||||
END(wmemcmp)
|
||||
#endif
|
@ -15,7 +15,8 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
||||
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
||||
strcat-sse2-unaligned strncat-sse2-unaligned \
|
||||
strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
|
||||
strrchr-sse2-no-bsf strchr-sse2-no-bsf
|
||||
strrchr-sse2-no-bsf strchr-sse2-no-bsf \
|
||||
memcmp-ssse3 wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
|
||||
ifeq (yes,$(config-cflags-sse4))
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
|
||||
CFLAGS-varshift.c += -msse4
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* memcmp with SSE4.1
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
/* memcmp with SSE4.1, wmemcmp with SSE4.1
|
||||
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
@ -20,43 +20,54 @@
|
||||
|
||||
#ifndef NOT_IN_libc
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
# include <sysdep.h>
|
||||
|
||||
#ifndef MEMCMP
|
||||
# define MEMCMP __memcmp_sse4_1
|
||||
#endif
|
||||
# ifndef MEMCMP
|
||||
# define MEMCMP __memcmp_sse4_1
|
||||
# endif
|
||||
|
||||
#ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
#endif
|
||||
# ifndef ALIGN
|
||||
# define ALIGN(n) .p2align n
|
||||
# endif
|
||||
|
||||
#define JMPTBL(I, B) (I - B)
|
||||
# define JMPTBL(I, B) (I - B)
|
||||
|
||||
#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||
lea TABLE(%rip), %r11; \
|
||||
movslq (%r11, INDEX, SCALE), %rcx; \
|
||||
add %r11, %rcx; \
|
||||
jmp *%rcx; \
|
||||
ud2
|
||||
|
||||
/* Warning!
|
||||
wmemcmp has to use SIGNED comparison for elements.
|
||||
memcmp has to use UNSIGNED comparison for elemnts.
|
||||
*/
|
||||
|
||||
.section .text.sse4.1,"ax",@progbits
|
||||
ENTRY (MEMCMP)
|
||||
# ifdef USE_AS_WMEMCMP
|
||||
shl $2, %rdx
|
||||
# endif
|
||||
pxor %xmm0, %xmm0
|
||||
cmp $79, %rdx
|
||||
ja L(79bytesormore)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
cmp $1, %rdx
|
||||
je L(firstbyte)
|
||||
# endif
|
||||
add %rdx, %rsi
|
||||
add %rdx, %rdi
|
||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
ALIGN (4)
|
||||
L(firstbyte):
|
||||
movzbl (%rdi), %eax
|
||||
movzbl (%rsi), %ecx
|
||||
sub %ecx, %eax
|
||||
ret
|
||||
# endif
|
||||
|
||||
ALIGN (4)
|
||||
L(79bytesormore):
|
||||
@ -308,11 +319,11 @@ L(less32bytesin256):
|
||||
|
||||
ALIGN (4)
|
||||
L(512bytesormore):
|
||||
#ifdef DATA_CACHE_SIZE_HALF
|
||||
# ifdef DATA_CACHE_SIZE_HALF
|
||||
mov $DATA_CACHE_SIZE_HALF, %r8
|
||||
#else
|
||||
# else
|
||||
mov __x86_64_data_cache_size_half(%rip), %r8
|
||||
#endif
|
||||
# endif
|
||||
mov %r8, %r9
|
||||
shr $1, %r8
|
||||
add %r9, %r8
|
||||
@ -624,11 +635,11 @@ L(less32bytesin256in2alinged):
|
||||
|
||||
ALIGN (4)
|
||||
L(512bytesormorein2aligned):
|
||||
#ifdef DATA_CACHE_SIZE_HALF
|
||||
# ifdef DATA_CACHE_SIZE_HALF
|
||||
mov $DATA_CACHE_SIZE_HALF, %r8
|
||||
#else
|
||||
# else
|
||||
mov __x86_64_data_cache_size_half(%rip), %r8
|
||||
#endif
|
||||
# endif
|
||||
mov %r8, %r9
|
||||
shr $1, %r8
|
||||
add %r9, %r8
|
||||
@ -667,6 +678,7 @@ L(64bytesormore_loopin2aligned):
|
||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
|
||||
L(L2_L3_cache_aglined):
|
||||
sub $64, %rdx
|
||||
|
||||
ALIGN (4)
|
||||
L(L2_L3_aligned_128bytes_loop):
|
||||
prefetchnta 0x1c0(%rdi)
|
||||
@ -803,13 +815,19 @@ L(12bytes):
|
||||
jne L(diffin8bytes)
|
||||
L(4bytes):
|
||||
mov -4(%rsi), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%rdi), %eax
|
||||
cmp %eax, %ecx
|
||||
# else
|
||||
cmp -4(%rdi), %ecx
|
||||
# endif
|
||||
jne L(diffin4bytes)
|
||||
L(0bytes):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
/* unreal case for wmemcmp */
|
||||
ALIGN (4)
|
||||
L(65bytes):
|
||||
movdqu -65(%rdi), %xmm1
|
||||
@ -1017,6 +1035,7 @@ L(1bytes):
|
||||
movzbl -1(%rsi), %ecx
|
||||
sub %ecx, %eax
|
||||
ret
|
||||
# endif
|
||||
|
||||
ALIGN (4)
|
||||
L(68bytes):
|
||||
@ -1047,13 +1066,20 @@ L(20bytes):
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -4(%rdi), %eax
|
||||
mov -4(%rsi), %ecx
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%rdi), %eax
|
||||
cmp %eax, %ecx
|
||||
# else
|
||||
cmp -4(%rdi), %ecx
|
||||
# endif
|
||||
jne L(diffin4bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
/* unreal cases for wmemcmp */
|
||||
ALIGN (4)
|
||||
L(69bytes):
|
||||
movdqu -69(%rsi), %xmm1
|
||||
@ -1161,6 +1187,7 @@ L(23bytes):
|
||||
jne L(diffin8bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
# endif
|
||||
|
||||
ALIGN (4)
|
||||
L(72bytes):
|
||||
@ -1191,13 +1218,16 @@ L(24bytes):
|
||||
pxor %xmm1, %xmm2
|
||||
ptest %xmm2, %xmm0
|
||||
jnc L(less16bytes)
|
||||
mov -8(%rdi), %rax
|
||||
|
||||
mov -8(%rsi), %rcx
|
||||
mov -8(%rdi), %rax
|
||||
cmp %rax, %rcx
|
||||
jne L(diffin8bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
/* unreal cases for wmemcmp */
|
||||
ALIGN (4)
|
||||
L(73bytes):
|
||||
movdqu -73(%rsi), %xmm1
|
||||
@ -1312,7 +1342,7 @@ L(27bytes):
|
||||
jne L(diffin4bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# endif
|
||||
ALIGN (4)
|
||||
L(76bytes):
|
||||
movdqu -76(%rsi), %xmm1
|
||||
@ -1346,13 +1376,19 @@ L(28bytes):
|
||||
mov -12(%rsi), %rcx
|
||||
cmp %rax, %rcx
|
||||
jne L(diffin8bytes)
|
||||
mov -4(%rdi), %eax
|
||||
mov -4(%rsi), %ecx
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
mov -4(%rdi), %eax
|
||||
cmp %eax, %ecx
|
||||
# else
|
||||
cmp -4(%rdi), %ecx
|
||||
# endif
|
||||
jne L(diffin4bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
/* unreal cases for wmemcmp */
|
||||
ALIGN (4)
|
||||
L(77bytes):
|
||||
movdqu -77(%rsi), %xmm1
|
||||
@ -1474,7 +1510,7 @@ L(31bytes):
|
||||
jne L(diffin8bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
|
||||
# endif
|
||||
ALIGN (4)
|
||||
L(64bytes):
|
||||
movdqu -64(%rdi), %xmm2
|
||||
@ -1527,7 +1563,17 @@ L(diffin8bytes):
|
||||
jne L(diffin4bytes)
|
||||
shr $32, %rcx
|
||||
shr $32, %rax
|
||||
|
||||
# ifdef USE_AS_WMEMCMP
|
||||
/* for wmemcmp */
|
||||
cmp %eax, %ecx
|
||||
jne L(diffin4bytes)
|
||||
xor %eax, %eax
|
||||
ret
|
||||
# endif
|
||||
|
||||
L(diffin4bytes):
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
cmp %cx, %ax
|
||||
jne L(diffin2bytes)
|
||||
shr $16, %ecx
|
||||
@ -1546,11 +1592,28 @@ L(end):
|
||||
and $0xff, %ecx
|
||||
sub %ecx, %eax
|
||||
ret
|
||||
# else
|
||||
|
||||
/* for wmemcmp */
|
||||
mov $1, %eax
|
||||
jl L(nequal_bigger)
|
||||
neg %eax
|
||||
ret
|
||||
|
||||
ALIGN (4)
|
||||
L(nequal_bigger):
|
||||
ret
|
||||
|
||||
L(unreal_case):
|
||||
xor %eax, %eax
|
||||
ret
|
||||
# endif
|
||||
|
||||
END (MEMCMP)
|
||||
|
||||
.section .rodata.sse4.1,"a",@progbits
|
||||
ALIGN (3)
|
||||
# ifndef USE_AS_WMEMCMP
|
||||
L(table_64bytes):
|
||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(1bytes), L(table_64bytes))
|
||||
@ -1632,4 +1695,87 @@ L(table_64bytes):
|
||||
.int JMPTBL (L(77bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(78bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(79bytes), L(table_64bytes))
|
||||
# else
|
||||
L(table_64bytes):
|
||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(4bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(8bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(12bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(16bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(20bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(24bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(28bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(32bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(36bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(40bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(44bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(48bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(52bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(56bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(60bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(68bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(72bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(76bytes), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||
# endif
|
||||
#endif
|
||||
|
1997
sysdeps/x86_64/multiarch/memcmp-ssse3.S
Normal file
1997
sysdeps/x86_64/multiarch/memcmp-ssse3.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/* Multiple versions of memcmp
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
@ -29,11 +29,20 @@ ENTRY(memcmp)
|
||||
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1: leaq __memcmp_sse2(%rip), %rax
|
||||
testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
|
||||
jz 2f
|
||||
|
||||
1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||
jnz 2f
|
||||
leaq __memcmp_sse2(%rip), %rax
|
||||
ret
|
||||
|
||||
2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
|
||||
jz 3f
|
||||
leaq __memcmp_sse4_1(%rip), %rax
|
||||
2: ret
|
||||
ret
|
||||
|
||||
3: leaq __memcmp_ssse3(%rip), %rax
|
||||
ret
|
||||
|
||||
END(memcmp)
|
||||
|
||||
# undef ENTRY
|
||||
|
5
sysdeps/x86_64/multiarch/wmemcmp-c.c
Normal file
5
sysdeps/x86_64/multiarch/wmemcmp-c.c
Normal file
@ -0,0 +1,5 @@
|
||||
#ifndef NOT_IN_libc
|
||||
# define WMEMCMP __wmemcmp_sse2
|
||||
#endif
|
||||
|
||||
#include "wcsmbs/wmemcmp.c"
|
4
sysdeps/x86_64/multiarch/wmemcmp-sse4.S
Normal file
4
sysdeps/x86_64/multiarch/wmemcmp-sse4.S
Normal file
@ -0,0 +1,4 @@
|
||||
#define USE_AS_WMEMCMP 1
|
||||
#define MEMCMP __wmemcmp_sse4_1
|
||||
|
||||
#include "memcmp-sse4.S"
|
4
sysdeps/x86_64/multiarch/wmemcmp-ssse3.S
Normal file
4
sysdeps/x86_64/multiarch/wmemcmp-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
||||
#define USE_AS_WMEMCMP 1
|
||||
#define MEMCMP __wmemcmp_ssse3
|
||||
|
||||
#include "memcmp-ssse3.S"
|
47
sysdeps/x86_64/multiarch/wmemcmp.S
Normal file
47
sysdeps/x86_64/multiarch/wmemcmp.S
Normal file
@ -0,0 +1,47 @@
|
||||
/* Multiple versions of wmemcmp
|
||||
Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <init-arch.h>
|
||||
|
||||
/* Define multiple versions only for the definition in libc. */
|
||||
#ifndef NOT_IN_libc
|
||||
.text
|
||||
ENTRY(wmemcmp)
|
||||
.type wmemcmp, @gnu_indirect_function
|
||||
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
|
||||
1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||
jnz 2f
|
||||
leaq __wmemcmp_sse2(%rip), %rax
|
||||
ret
|
||||
|
||||
2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
|
||||
jz 3f
|
||||
leaq __wmemcmp_sse4_1(%rip), %rax
|
||||
ret
|
||||
|
||||
3: leaq __wmemcmp_ssse3(%rip), %rax
|
||||
ret
|
||||
|
||||
END(wmemcmp)
|
||||
#endif
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1996, 1997i, 2011 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
|
||||
|
||||
@ -19,9 +19,12 @@
|
||||
|
||||
#include <wchar.h>
|
||||
|
||||
#ifndef WMEMCMP
|
||||
# define wmemcmp
|
||||
#endif
|
||||
|
||||
int
|
||||
wmemcmp (s1, s2, n)
|
||||
WMEMCMP (s1, s2, n)
|
||||
const wchar_t *s1;
|
||||
const wchar_t *s2;
|
||||
size_t n;
|
||||
@ -34,19 +37,19 @@ wmemcmp (s1, s2, n)
|
||||
c1 = (wint_t) s1[0];
|
||||
c2 = (wint_t) s2[0];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
c1 = (wint_t) s1[1];
|
||||
c2 = (wint_t) s2[1];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
c1 = (wint_t) s1[2];
|
||||
c2 = (wint_t) s2[2];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
c1 = (wint_t) s1[3];
|
||||
c2 = (wint_t) s2[3];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
s1 += 4;
|
||||
s2 += 4;
|
||||
n -= 4;
|
||||
@ -57,7 +60,7 @@ wmemcmp (s1, s2, n)
|
||||
c1 = (wint_t) s1[0];
|
||||
c2 = (wint_t) s2[0];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
++s1;
|
||||
++s2;
|
||||
--n;
|
||||
@ -67,7 +70,7 @@ wmemcmp (s1, s2, n)
|
||||
c1 = (wint_t) s1[0];
|
||||
c2 = (wint_t) s2[0];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
++s1;
|
||||
++s2;
|
||||
--n;
|
||||
@ -77,7 +80,7 @@ wmemcmp (s1, s2, n)
|
||||
c1 = (wint_t) s1[0];
|
||||
c2 = (wint_t) s2[0];
|
||||
if (c1 - c2 != 0)
|
||||
return c1 - c2;
|
||||
return c1 > c2 ? 1 : -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user