mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-03 08:11:08 +00:00
Optimized memcmp and wmemcmp for x86-64 and x86-32
This commit is contained in:
parent
556a200797
commit
be13f7bff6
29
ChangeLog
29
ChangeLog
@ -1,3 +1,32 @@
|
|||||||
|
2011-09-27 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/multiarch/Makefile: (sysdep_routines): Add
|
||||||
|
memcmp-ssse3 wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
|
||||||
|
* sysdeps/x86_64/multiarch/memcmp-ssse3: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memcmp.S: Update. Add __memcmp_ssse3.
|
||||||
|
* sysdeps/x86_64/multiarch/memcmp-sse4.S: Update.
|
||||||
|
(USE_AS_WMEMCMP): New macro.
|
||||||
|
Fixing indents.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemcmp.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemcmp-ssse3.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemcmp-sse4.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemcmp-c.S: New file.
|
||||||
|
* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
|
||||||
|
wmemcmp-ssse3 wmemcmp-sse4 wmemcmp-c
|
||||||
|
* sysdeps/i386/i686/multiarch/wmemcmp.S: New file.
|
||||||
|
* sysdeps/i386/i686/multiarch/wmemcmp-c.c: New file.
|
||||||
|
* sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S: New file.
|
||||||
|
* sysdeps/i386/i686/multiarch/wmemcmp-sse4.S: New file.
|
||||||
|
* sysdeps/i386/i686/multiarch/memcmp-sse4.S: Update.
|
||||||
|
(USE_AS_WMEMCMP): New macro.
|
||||||
|
* sysdeps/i386/i686/multiarch/memcmp-ssse3: Likewise.
|
||||||
|
* sysdeps/string/test-memcmp.c: Update.
|
||||||
|
Fix simple_wmemcmp.
|
||||||
|
Add new tests.
|
||||||
|
* wcsmbs/wmemcmp.c: Update.
|
||||||
|
(WMEMCMP): New macro.
|
||||||
|
Fix overflow bug.
|
||||||
|
|
||||||
2011-10-12 Andreas Jaeger <aj@suse.de>
|
2011-10-12 Andreas Jaeger <aj@suse.de>
|
||||||
|
|
||||||
[BZ #13268]
|
[BZ #13268]
|
||||||
|
2
NEWS
2
NEWS
@ -33,7 +33,7 @@ Version 2.15
|
|||||||
* Optimized strchr and strrchr for SSE on x86-32.
|
* Optimized strchr and strrchr for SSE on x86-32.
|
||||||
Contributed by Liubov Dmitrieva.
|
Contributed by Liubov Dmitrieva.
|
||||||
|
|
||||||
* Optimized memchr, memrchr, rawmemchr for x86-64 and x86-32.
|
* Optimized memchr, memrchr, rawmemchr, memcmp, wmemcmp for x86-64 and x86-32.
|
||||||
Contributed by Liubov Dmitrieva.
|
Contributed by Liubov Dmitrieva.
|
||||||
|
|
||||||
* New interfaces: scandirat, scandirat64
|
* New interfaces: scandirat, scandirat64
|
||||||
|
@ -29,9 +29,21 @@
|
|||||||
# define MEMCPY wmemcpy
|
# define MEMCPY wmemcpy
|
||||||
# define SIMPLE_MEMCMP simple_wmemcmp
|
# define SIMPLE_MEMCMP simple_wmemcmp
|
||||||
# define CHAR wchar_t
|
# define CHAR wchar_t
|
||||||
# define MAX_CHAR 256000
|
# define UCHAR wchar_t
|
||||||
# define UCHAR uint32_t
|
|
||||||
# define CHARBYTES 4
|
# define CHARBYTES 4
|
||||||
|
# define CHAR__MIN WCHAR_MIN
|
||||||
|
# define CHAR__MAX WCHAR_MAX
|
||||||
|
int
|
||||||
|
simple_wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
/* Warning!
|
||||||
|
wmemcmp has to use SIGNED comparison for elements.
|
||||||
|
memcmp has to use UNSIGNED comparison for elemnts.
|
||||||
|
*/
|
||||||
|
while (n-- && (ret = *s1 < *s2 ? -1 : *s1 == *s2 ? 0 : 1) == 0) {s1++; s2++;}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
# define MEMCMP memcmp
|
# define MEMCMP memcmp
|
||||||
# define MEMCPY memcpy
|
# define MEMCPY memcpy
|
||||||
@ -40,18 +52,20 @@
|
|||||||
# define MAX_CHAR 255
|
# define MAX_CHAR 255
|
||||||
# define UCHAR unsigned char
|
# define UCHAR unsigned char
|
||||||
# define CHARBYTES 1
|
# define CHARBYTES 1
|
||||||
#endif
|
# define CHAR__MIN CHAR_MIN
|
||||||
|
# define CHAR__MAX CHAR_MAX
|
||||||
typedef int (*proto_t) (const CHAR *, const CHAR *, size_t);
|
|
||||||
|
|
||||||
int
|
int
|
||||||
SIMPLE_MEMCMP (const CHAR *s1, const CHAR *s2, size_t n)
|
simple_memcmp (const char *s1, const char *s2, size_t n)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
while (n-- && (ret = *(UCHAR *) s1++ - *(UCHAR *) s2++) == 0);
|
while (n-- && (ret = *(unsigned char *) s1++ - *(unsigned char *) s2++) == 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef int (*proto_t) (const CHAR *, const CHAR *, size_t);
|
||||||
|
|
||||||
IMPL (SIMPLE_MEMCMP, 0)
|
IMPL (SIMPLE_MEMCMP, 0)
|
||||||
IMPL (MEMCMP, 1)
|
IMPL (MEMCMP, 1)
|
||||||
@ -121,7 +135,7 @@ do_test (size_t align1, size_t align2, size_t len, int exp_result)
|
|||||||
s2 = (CHAR *) (buf2 + align2);
|
s2 = (CHAR *) (buf2 + align2);
|
||||||
|
|
||||||
for (i = 0; i < len; i++)
|
for (i = 0; i < len; i++)
|
||||||
s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % MAX_CHAR;
|
s1[i] = s2[i] = 1 + (23 << ((CHARBYTES - 1) * 8)) * i % CHAR__MAX;
|
||||||
|
|
||||||
s1[len] = align1;
|
s1[len] = align1;
|
||||||
s2[len] = align2;
|
s2[len] = align2;
|
||||||
@ -412,8 +426,8 @@ check1 (void)
|
|||||||
s2[99] = 1;
|
s2[99] = 1;
|
||||||
s1[100] = 116;
|
s1[100] = 116;
|
||||||
s2[100] = 116;
|
s2[100] = 116;
|
||||||
s1[101] = -13;
|
s1[101] = CHAR__MIN;
|
||||||
s2[101] = -13;
|
s2[101] = CHAR__MAX;
|
||||||
s1[102] = -109;
|
s1[102] = -109;
|
||||||
s2[102] = -109;
|
s2[102] = -109;
|
||||||
s1[103] = 1;
|
s1[103] = 1;
|
||||||
@ -434,8 +448,8 @@ check1 (void)
|
|||||||
s2[110] = -109;
|
s2[110] = -109;
|
||||||
s1[111] = 1;
|
s1[111] = 1;
|
||||||
s2[111] = 1;
|
s2[111] = 1;
|
||||||
s1[112] = 20;
|
s1[112] = CHAR__MAX;
|
||||||
s2[112] = 20;
|
s2[112] = CHAR__MIN;
|
||||||
s1[113] = -13;
|
s1[113] = -13;
|
||||||
s2[113] = -13;
|
s2[113] = -13;
|
||||||
s1[114] = -109;
|
s1[114] = -109;
|
||||||
@ -444,9 +458,12 @@ check1 (void)
|
|||||||
s2[115] = 1;
|
s2[115] = 1;
|
||||||
|
|
||||||
n = 116;
|
n = 116;
|
||||||
exp_result = SIMPLE_MEMCMP (s1, s2, n);
|
for (size_t i = 0; i < n; i++)
|
||||||
FOR_EACH_IMPL (impl, 0)
|
{
|
||||||
check_result (impl, s1, s2, n, exp_result);
|
exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
|
||||||
|
FOR_EACH_IMPL (impl, 0)
|
||||||
|
check_result (impl, s1 + i, s2 + i, n - i, exp_result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -17,7 +17,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
|
|||||||
strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
|
strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
|
||||||
wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \
|
wcscmp-sse2 wcscmp-c memchr-sse2 memchr-sse2-bsf \
|
||||||
memrchr-sse2 memrchr-sse2-bsf memrchr-c \
|
memrchr-sse2 memrchr-sse2-bsf memrchr-c \
|
||||||
rawmemchr-sse2 rawmemchr-sse2-bsf
|
rawmemchr-sse2 rawmemchr-sse2-bsf \
|
||||||
|
wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
|
||||||
ifeq (yes,$(config-cflags-sse4))
|
ifeq (yes,$(config-cflags-sse4))
|
||||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||||
CFLAGS-varshift.c += -msse4
|
CFLAGS-varshift.c += -msse4
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* memcmp with SSE4.2
|
/* memcmp with SSE4.2, wmemcmp with SSE4.2
|
||||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
Contributed by Intel Corporation.
|
Contributed by Intel Corporation.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
@ -20,84 +20,97 @@
|
|||||||
|
|
||||||
#ifndef NOT_IN_libc
|
#ifndef NOT_IN_libc
|
||||||
|
|
||||||
#include <sysdep.h>
|
# include <sysdep.h>
|
||||||
#include "asm-syntax.h"
|
|
||||||
|
|
||||||
#ifndef MEMCMP
|
# ifndef MEMCMP
|
||||||
# define MEMCMP __memcmp_sse4_2
|
# define MEMCMP __memcmp_sse4_2
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
#define CFI_PUSH(REG) \
|
# define CFI_PUSH(REG) \
|
||||||
cfi_adjust_cfa_offset (4); \
|
cfi_adjust_cfa_offset (4); \
|
||||||
cfi_rel_offset (REG, 0)
|
cfi_rel_offset (REG, 0)
|
||||||
|
|
||||||
#define CFI_POP(REG) \
|
# define CFI_POP(REG) \
|
||||||
cfi_adjust_cfa_offset (-4); \
|
cfi_adjust_cfa_offset (-4); \
|
||||||
cfi_restore (REG)
|
cfi_restore (REG)
|
||||||
|
|
||||||
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
||||||
#define POP(REG) popl REG; CFI_POP (REG)
|
# define POP(REG) popl REG; CFI_POP (REG)
|
||||||
|
|
||||||
#define PARMS 4
|
# define PARMS 4
|
||||||
#define BLK1 PARMS
|
# define BLK1 PARMS
|
||||||
#define BLK2 BLK1+4
|
# define BLK2 BLK1 + 4
|
||||||
#define LEN BLK2+4
|
# define LEN BLK2 + 4
|
||||||
#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
|
# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
|
||||||
|
|
||||||
|
|
||||||
#ifdef SHARED
|
# ifdef SHARED
|
||||||
# define JMPTBL(I, B) I - B
|
# define JMPTBL(I, B) I - B
|
||||||
|
|
||||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||||
jump table with relative offsets. INDEX is a register contains the
|
jump table with relative offsets. INDEX is a register contains the
|
||||||
index into the jump table. SCALE is the scale of INDEX. */
|
index into the jump table. SCALE is the scale of INDEX. */
|
||||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
|
||||||
/* We first load PC into EBX. */ \
|
|
||||||
call __i686.get_pc_thunk.bx; \
|
|
||||||
/* Get the address of the jump table. */ \
|
|
||||||
addl $(TABLE - .), %ebx; \
|
|
||||||
/* Get the entry and convert the relative offset to the \
|
|
||||||
absolute address. */ \
|
|
||||||
addl (%ebx,INDEX,SCALE), %ebx; \
|
|
||||||
/* We loaded the jump table and adjuested EDX/ESI. Go. */ \
|
|
||||||
jmp *%ebx
|
|
||||||
|
|
||||||
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||||
.globl __i686.get_pc_thunk.bx
|
/* We first load PC into EBX. */ \
|
||||||
.hidden __i686.get_pc_thunk.bx
|
call __i686.get_pc_thunk.bx; \
|
||||||
ALIGN (4)
|
/* Get the address of the jump table. */ \
|
||||||
.type __i686.get_pc_thunk.bx,@function
|
addl $(TABLE - .), %ebx; \
|
||||||
__i686.get_pc_thunk.bx:
|
/* Get the entry and convert the relative offset to the \
|
||||||
movl (%esp), %ebx
|
absolute address. */ \
|
||||||
ret
|
addl (%ebx,INDEX,SCALE), %ebx; \
|
||||||
#else
|
/* We loaded the jump table and adjuested EDX/ESI. Go. */ \
|
||||||
# define JMPTBL(I, B) I
|
jmp *%ebx
|
||||||
|
# else
|
||||||
|
# define JMPTBL(I, B) I
|
||||||
|
|
||||||
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
/* Load an entry in a jump table into EBX and branch to it. TABLE is a
|
||||||
jump table with relative offsets. INDEX is a register contains the
|
jump table with relative offsets. INDEX is a register contains the
|
||||||
index into the jump table. SCALE is the scale of INDEX. */
|
index into the jump table. SCALE is the scale of INDEX. */
|
||||||
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||||
jmp *TABLE(,INDEX,SCALE)
|
jmp *TABLE(,INDEX,SCALE)
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Warning!
|
||||||
|
wmemcmp has to use SIGNED comparison for elements.
|
||||||
|
memcmp has to use UNSIGNED comparison for elemnts.
|
||||||
|
*/
|
||||||
|
|
||||||
.section .text.sse4.2,"ax",@progbits
|
.section .text.sse4.2,"ax",@progbits
|
||||||
ENTRY (MEMCMP)
|
ENTRY (MEMCMP)
|
||||||
movl BLK1(%esp), %eax
|
movl BLK1(%esp), %eax
|
||||||
movl BLK2(%esp), %edx
|
movl BLK2(%esp), %edx
|
||||||
movl LEN(%esp), %ecx
|
movl LEN(%esp), %ecx
|
||||||
|
|
||||||
|
# ifdef USE_AS_WMEMCMP
|
||||||
|
shl $2, %ecx
|
||||||
|
test %ecx, %ecx
|
||||||
|
jz L(return0)
|
||||||
|
# else
|
||||||
cmp $1, %ecx
|
cmp $1, %ecx
|
||||||
jbe L(less1bytes)
|
jbe L(less1bytes)
|
||||||
|
# endif
|
||||||
|
|
||||||
pxor %xmm0, %xmm0
|
pxor %xmm0, %xmm0
|
||||||
cmp $64, %ecx
|
cmp $64, %ecx
|
||||||
ja L(64bytesormore)
|
ja L(64bytesormore)
|
||||||
cmp $8, %ecx
|
cmp $8, %ecx
|
||||||
PUSH (%ebx)
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
PUSH (%ebx)
|
||||||
jb L(less8bytes)
|
jb L(less8bytes)
|
||||||
|
# else
|
||||||
|
jb L(less8bytes)
|
||||||
|
PUSH (%ebx)
|
||||||
|
# endif
|
||||||
|
|
||||||
add %ecx, %edx
|
add %ecx, %edx
|
||||||
add %ecx, %eax
|
add %ecx, %eax
|
||||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(less8bytes):
|
L(less8bytes):
|
||||||
mov (%eax), %bl
|
mov (%eax), %bl
|
||||||
cmpb (%edx), %bl
|
cmpb (%edx), %bl
|
||||||
@ -141,22 +154,49 @@ L(less8bytes):
|
|||||||
mov 6(%eax), %bl
|
mov 6(%eax), %bl
|
||||||
cmpb 6(%edx), %bl
|
cmpb 6(%edx), %bl
|
||||||
je L(0bytes)
|
je L(0bytes)
|
||||||
|
|
||||||
L(nonzero):
|
L(nonzero):
|
||||||
POP (%ebx)
|
POP (%ebx)
|
||||||
mov $1, %eax
|
mov $1, %eax
|
||||||
ja L(above)
|
ja L(above)
|
||||||
neg %eax
|
neg %eax
|
||||||
L(above):
|
L(above):
|
||||||
ret
|
ret
|
||||||
CFI_PUSH (%ebx)
|
CFI_PUSH (%ebx)
|
||||||
|
# endif
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(0bytes):
|
L(0bytes):
|
||||||
POP (%ebx)
|
POP (%ebx)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
ALIGN (4)
|
# ifdef USE_AS_WMEMCMP
|
||||||
|
|
||||||
|
/* for wmemcmp, case N == 1 */
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(less8bytes):
|
||||||
|
mov (%eax), %ecx
|
||||||
|
cmp (%edx), %ecx
|
||||||
|
je L(return0)
|
||||||
|
mov $1, %eax
|
||||||
|
jg L(find_diff_bigger)
|
||||||
|
neg %eax
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(find_diff_bigger):
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(return0):
|
||||||
|
xor %eax, %eax
|
||||||
|
ret
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(less1bytes):
|
L(less1bytes):
|
||||||
jb L(0bytesend)
|
jb L(0bytesend)
|
||||||
movzbl (%eax), %eax
|
movzbl (%eax), %eax
|
||||||
@ -164,14 +204,14 @@ L(less1bytes):
|
|||||||
sub %edx, %eax
|
sub %edx, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(0bytesend):
|
L(0bytesend):
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
# endif
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(64bytesormore):
|
L(64bytesormore):
|
||||||
PUSH (%ebx)
|
PUSH (%ebx)
|
||||||
mov %ecx, %ebx
|
mov %ecx, %ebx
|
||||||
mov $64, %ecx
|
mov $64, %ecx
|
||||||
sub $64, %ebx
|
sub $64, %ebx
|
||||||
@ -208,7 +248,14 @@ L(64bytesormore_loop):
|
|||||||
add %ecx, %eax
|
add %ecx, %eax
|
||||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
|
||||||
|
|
||||||
ALIGN (4)
|
# ifdef USE_AS_WMEMCMP
|
||||||
|
|
||||||
|
/* Label needs only for table_64bytes filling */
|
||||||
|
L(unreal_case):
|
||||||
|
/* no code here */
|
||||||
|
|
||||||
|
# endif
|
||||||
|
.p2align 4
|
||||||
L(find_16diff):
|
L(find_16diff):
|
||||||
sub $16, %ecx
|
sub $16, %ecx
|
||||||
L(find_32diff):
|
L(find_32diff):
|
||||||
@ -218,9 +265,9 @@ L(find_48diff):
|
|||||||
L(find_64diff):
|
L(find_64diff):
|
||||||
add %ecx, %edx
|
add %ecx, %edx
|
||||||
add %ecx, %eax
|
add %ecx, %eax
|
||||||
jmp L(16bytes)
|
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(16bytes):
|
L(16bytes):
|
||||||
mov -16(%eax), %ecx
|
mov -16(%eax), %ecx
|
||||||
mov -16(%edx), %ebx
|
mov -16(%edx), %ebx
|
||||||
@ -243,8 +290,30 @@ L(4bytes):
|
|||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
RETURN
|
RETURN
|
||||||
|
# else
|
||||||
|
.p2align 4
|
||||||
|
L(16bytes):
|
||||||
|
mov -16(%eax), %ecx
|
||||||
|
cmp -16(%edx), %ecx
|
||||||
|
jne L(find_diff)
|
||||||
|
L(12bytes):
|
||||||
|
mov -12(%eax), %ecx
|
||||||
|
cmp -12(%edx), %ecx
|
||||||
|
jne L(find_diff)
|
||||||
|
L(8bytes):
|
||||||
|
mov -8(%eax), %ecx
|
||||||
|
cmp -8(%edx), %ecx
|
||||||
|
jne L(find_diff)
|
||||||
|
L(4bytes):
|
||||||
|
mov -4(%eax), %ecx
|
||||||
|
cmp -4(%edx), %ecx
|
||||||
|
mov $0, %eax
|
||||||
|
jne L(find_diff)
|
||||||
|
RETURN
|
||||||
|
# endif
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(49bytes):
|
L(49bytes):
|
||||||
movdqu -49(%eax), %xmm1
|
movdqu -49(%eax), %xmm1
|
||||||
movdqu -49(%edx), %xmm2
|
movdqu -49(%edx), %xmm2
|
||||||
@ -285,7 +354,7 @@ L(5bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(50bytes):
|
L(50bytes):
|
||||||
mov $-50, %ebx
|
mov $-50, %ebx
|
||||||
movdqu -50(%eax), %xmm1
|
movdqu -50(%eax), %xmm1
|
||||||
@ -330,7 +399,7 @@ L(2bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(51bytes):
|
L(51bytes):
|
||||||
mov $-51, %ebx
|
mov $-51, %ebx
|
||||||
movdqu -51(%eax), %xmm1
|
movdqu -51(%eax), %xmm1
|
||||||
@ -378,8 +447,8 @@ L(1bytes):
|
|||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
# endif
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(52bytes):
|
L(52bytes):
|
||||||
movdqu -52(%eax), %xmm1
|
movdqu -52(%eax), %xmm1
|
||||||
movdqu -52(%edx), %xmm2
|
movdqu -52(%edx), %xmm2
|
||||||
@ -402,13 +471,18 @@ L(20bytes):
|
|||||||
ptest %xmm2, %xmm0
|
ptest %xmm2, %xmm0
|
||||||
jnc L(less16bytes)
|
jnc L(less16bytes)
|
||||||
mov -4(%eax), %ecx
|
mov -4(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -4(%edx), %ebx
|
mov -4(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%edx), %ecx
|
||||||
|
# endif
|
||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(53bytes):
|
L(53bytes):
|
||||||
movdqu -53(%eax), %xmm1
|
movdqu -53(%eax), %xmm1
|
||||||
movdqu -53(%edx), %xmm2
|
movdqu -53(%edx), %xmm2
|
||||||
@ -440,7 +514,7 @@ L(21bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(54bytes):
|
L(54bytes):
|
||||||
movdqu -54(%eax), %xmm1
|
movdqu -54(%eax), %xmm1
|
||||||
movdqu -54(%edx), %xmm2
|
movdqu -54(%edx), %xmm2
|
||||||
@ -476,7 +550,7 @@ L(22bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(55bytes):
|
L(55bytes):
|
||||||
movdqu -55(%eax), %xmm1
|
movdqu -55(%eax), %xmm1
|
||||||
movdqu -55(%edx), %xmm2
|
movdqu -55(%edx), %xmm2
|
||||||
@ -513,8 +587,8 @@ L(23bytes):
|
|||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
# endif
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(56bytes):
|
L(56bytes):
|
||||||
movdqu -56(%eax), %xmm1
|
movdqu -56(%eax), %xmm1
|
||||||
movdqu -56(%edx), %xmm2
|
movdqu -56(%edx), %xmm2
|
||||||
@ -538,18 +612,27 @@ L(24bytes):
|
|||||||
jnc L(less16bytes)
|
jnc L(less16bytes)
|
||||||
|
|
||||||
mov -8(%eax), %ecx
|
mov -8(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -8(%edx), %ebx
|
mov -8(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -8(%edx), %ecx
|
||||||
|
# endif
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
|
|
||||||
mov -4(%eax), %ecx
|
mov -4(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -4(%edx), %ebx
|
mov -4(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%edx), %ecx
|
||||||
|
# endif
|
||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(57bytes):
|
L(57bytes):
|
||||||
movdqu -57(%eax), %xmm1
|
movdqu -57(%eax), %xmm1
|
||||||
movdqu -57(%edx), %xmm2
|
movdqu -57(%edx), %xmm2
|
||||||
@ -585,7 +668,7 @@ L(25bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(58bytes):
|
L(58bytes):
|
||||||
movdqu -58(%eax), %xmm1
|
movdqu -58(%eax), %xmm1
|
||||||
movdqu -58(%edx), %xmm2
|
movdqu -58(%edx), %xmm2
|
||||||
@ -627,7 +710,7 @@ L(26bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(59bytes):
|
L(59bytes):
|
||||||
movdqu -59(%eax), %xmm1
|
movdqu -59(%eax), %xmm1
|
||||||
movdqu -59(%edx), %xmm2
|
movdqu -59(%edx), %xmm2
|
||||||
@ -668,8 +751,8 @@ L(27bytes):
|
|||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
# endif
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(60bytes):
|
L(60bytes):
|
||||||
movdqu -60(%eax), %xmm1
|
movdqu -60(%eax), %xmm1
|
||||||
movdqu -60(%edx), %xmm2
|
movdqu -60(%edx), %xmm2
|
||||||
@ -691,22 +774,38 @@ L(28bytes):
|
|||||||
pxor %xmm1, %xmm2
|
pxor %xmm1, %xmm2
|
||||||
ptest %xmm2, %xmm0
|
ptest %xmm2, %xmm0
|
||||||
jnc L(less16bytes)
|
jnc L(less16bytes)
|
||||||
|
|
||||||
mov -12(%eax), %ecx
|
mov -12(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -12(%edx), %ebx
|
mov -12(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -12(%edx), %ecx
|
||||||
|
# endif
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
|
|
||||||
mov -8(%eax), %ecx
|
mov -8(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -8(%edx), %ebx
|
mov -8(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -8(%edx), %ecx
|
||||||
|
# endif
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
|
|
||||||
mov -4(%eax), %ecx
|
mov -4(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -4(%edx), %ebx
|
mov -4(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%edx), %ecx
|
||||||
|
# endif
|
||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(61bytes):
|
L(61bytes):
|
||||||
movdqu -61(%eax), %xmm1
|
movdqu -61(%eax), %xmm1
|
||||||
movdqu -61(%edx), %xmm2
|
movdqu -61(%edx), %xmm2
|
||||||
@ -749,7 +848,7 @@ L(29bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(62bytes):
|
L(62bytes):
|
||||||
movdqu -62(%eax), %xmm1
|
movdqu -62(%eax), %xmm1
|
||||||
movdqu -62(%edx), %xmm2
|
movdqu -62(%edx), %xmm2
|
||||||
@ -792,7 +891,7 @@ L(30bytes):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(63bytes):
|
L(63bytes):
|
||||||
movdqu -63(%eax), %xmm1
|
movdqu -63(%eax), %xmm1
|
||||||
movdqu -63(%edx), %xmm2
|
movdqu -63(%edx), %xmm2
|
||||||
@ -838,8 +937,9 @@ L(31bytes):
|
|||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(end)
|
jne L(end)
|
||||||
RETURN
|
RETURN
|
||||||
|
# endif
|
||||||
|
|
||||||
ALIGN (4)
|
.p2align 4
|
||||||
L(64bytes):
|
L(64bytes):
|
||||||
movdqu -64(%eax), %xmm1
|
movdqu -64(%eax), %xmm1
|
||||||
movdqu -64(%edx), %xmm2
|
movdqu -64(%edx), %xmm2
|
||||||
@ -863,28 +963,45 @@ L(32bytes):
|
|||||||
jnc L(less16bytes)
|
jnc L(less16bytes)
|
||||||
|
|
||||||
mov -16(%eax), %ecx
|
mov -16(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -16(%edx), %ebx
|
mov -16(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -16(%edx), %ecx
|
||||||
|
# endif
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
|
|
||||||
mov -12(%eax), %ecx
|
mov -12(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -12(%edx), %ebx
|
mov -12(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -12(%edx), %ecx
|
||||||
|
# endif
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
|
|
||||||
mov -8(%eax), %ecx
|
mov -8(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -8(%edx), %ebx
|
mov -8(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -8(%edx), %ecx
|
||||||
|
# endif
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
|
|
||||||
mov -4(%eax), %ecx
|
mov -4(%eax), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -4(%edx), %ebx
|
mov -4(%edx), %ebx
|
||||||
cmp %ebx, %ecx
|
cmp %ebx, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%edx), %ecx
|
||||||
|
# endif
|
||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
RETURN
|
RETURN
|
||||||
|
|
||||||
ALIGN (4)
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
.p2align 4
|
||||||
L(less16bytes):
|
L(less16bytes):
|
||||||
add %ebx, %eax
|
add %ebx, %eax
|
||||||
add %ebx, %edx
|
add %ebx, %edx
|
||||||
@ -910,9 +1027,35 @@ L(less16bytes):
|
|||||||
mov $0, %eax
|
mov $0, %eax
|
||||||
jne L(find_diff)
|
jne L(find_diff)
|
||||||
RETURN
|
RETURN
|
||||||
|
# else
|
||||||
|
.p2align 4
|
||||||
|
L(less16bytes):
|
||||||
|
add %ebx, %eax
|
||||||
|
add %ebx, %edx
|
||||||
|
|
||||||
ALIGN (4)
|
mov (%eax), %ecx
|
||||||
|
cmp (%edx), %ecx
|
||||||
|
jne L(find_diff)
|
||||||
|
|
||||||
|
mov 4(%eax), %ecx
|
||||||
|
cmp 4(%edx), %ecx
|
||||||
|
jne L(find_diff)
|
||||||
|
|
||||||
|
mov 8(%eax), %ecx
|
||||||
|
cmp 8(%edx), %ecx
|
||||||
|
jne L(find_diff)
|
||||||
|
|
||||||
|
mov 12(%eax), %ecx
|
||||||
|
cmp 12(%edx), %ecx
|
||||||
|
|
||||||
|
mov $0, %eax
|
||||||
|
jne L(find_diff)
|
||||||
|
RETURN
|
||||||
|
# endif
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
L(find_diff):
|
L(find_diff):
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
cmpb %bl, %cl
|
cmpb %bl, %cl
|
||||||
jne L(end)
|
jne L(end)
|
||||||
cmp %bx, %cx
|
cmp %bx, %cx
|
||||||
@ -923,17 +1066,29 @@ L(find_diff):
|
|||||||
jne L(end)
|
jne L(end)
|
||||||
cmp %bx, %cx
|
cmp %bx, %cx
|
||||||
L(end):
|
L(end):
|
||||||
POP (%ebx)
|
POP (%ebx)
|
||||||
mov $1, %eax
|
mov $1, %eax
|
||||||
ja L(bigger)
|
ja L(bigger)
|
||||||
neg %eax
|
neg %eax
|
||||||
L(bigger):
|
L(bigger):
|
||||||
ret
|
ret
|
||||||
|
# else
|
||||||
|
POP (%ebx)
|
||||||
|
mov $1, %eax
|
||||||
|
jg L(bigger)
|
||||||
|
neg %eax
|
||||||
|
ret
|
||||||
|
|
||||||
|
.p2align 4
|
||||||
|
L(bigger):
|
||||||
|
ret
|
||||||
|
# endif
|
||||||
END (MEMCMP)
|
END (MEMCMP)
|
||||||
|
|
||||||
.section .rodata.sse4.2,"a",@progbits
|
.section .rodata.sse4.2,"a",@progbits
|
||||||
ALIGN (2)
|
.p2align 2
|
||||||
.type L(table_64bytes), @object
|
.type L(table_64bytes), @object
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
L(table_64bytes):
|
L(table_64bytes):
|
||||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||||
.int JMPTBL (L(1bytes), L(table_64bytes))
|
.int JMPTBL (L(1bytes), L(table_64bytes))
|
||||||
@ -1000,5 +1155,72 @@ L(table_64bytes):
|
|||||||
.int JMPTBL (L(62bytes), L(table_64bytes))
|
.int JMPTBL (L(62bytes), L(table_64bytes))
|
||||||
.int JMPTBL (L(63bytes), L(table_64bytes))
|
.int JMPTBL (L(63bytes), L(table_64bytes))
|
||||||
.int JMPTBL (L(64bytes), L(table_64bytes))
|
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||||
.size L(table_64bytes), .-L(table_64bytes)
|
# else
|
||||||
|
L(table_64bytes):
|
||||||
|
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(4bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(8bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(12bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(16bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(20bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(24bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(28bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(32bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(36bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(40bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(44bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(48bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(52bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(56bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(60bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because it is too large
Load Diff
5
sysdeps/i386/i686/multiarch/wmemcmp-c.c
Normal file
5
sysdeps/i386/i686/multiarch/wmemcmp-c.c
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#ifndef NOT_IN_libc
|
||||||
|
# define WMEMCMP __wmemcmp_ia32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "wcsmbs/wmemcmp.c"
|
4
sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
Normal file
4
sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_WMEMCMP 1
|
||||||
|
#define MEMCMP __wmemcmp_sse4_2
|
||||||
|
|
||||||
|
#include "memcmp-sse4.S"
|
4
sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
Normal file
4
sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_WMEMCMP 1
|
||||||
|
#define MEMCMP __wmemcmp_ssse3
|
||||||
|
|
||||||
|
#include "memcmp-ssse3.S"
|
59
sysdeps/i386/i686/multiarch/wmemcmp.S
Normal file
59
sysdeps/i386/i686/multiarch/wmemcmp.S
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/* Multiple versions of wmemcmp
|
||||||
|
Copyright (C) 2011 Free Software Foundation, Inc.
|
||||||
|
Contributed by Intel Corporation.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in libc. */
|
||||||
|
|
||||||
|
#ifndef NOT_IN_libc
|
||||||
|
.section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
|
||||||
|
.globl __i686.get_pc_thunk.bx
|
||||||
|
.hidden __i686.get_pc_thunk.bx
|
||||||
|
.p2align 4
|
||||||
|
.type __i686.get_pc_thunk.bx,@function
|
||||||
|
__i686.get_pc_thunk.bx:
|
||||||
|
movl (%esp), %ebx
|
||||||
|
ret
|
||||||
|
|
||||||
|
.text
|
||||||
|
ENTRY(wmemcmp)
|
||||||
|
.type wmemcmp, @gnu_indirect_function
|
||||||
|
pushl %ebx
|
||||||
|
cfi_adjust_cfa_offset (4)
|
||||||
|
cfi_rel_offset (ebx, 0)
|
||||||
|
call __i686.get_pc_thunk.bx
|
||||||
|
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
||||||
|
cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
|
||||||
|
jne 1f
|
||||||
|
call __init_cpu_features
|
||||||
|
1: leal __wmemcmp_ia32@GOTOFF(%ebx), %eax
|
||||||
|
testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
|
||||||
|
jz 2f
|
||||||
|
leal __wmemcmp_ssse3@GOTOFF(%ebx), %eax
|
||||||
|
testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
|
||||||
|
jz 2f
|
||||||
|
leal __wmemcmp_sse4_2@GOTOFF(%ebx), %eax
|
||||||
|
2: popl %ebx
|
||||||
|
cfi_adjust_cfa_offset (-4)
|
||||||
|
cfi_restore (ebx)
|
||||||
|
ret
|
||||||
|
END(wmemcmp)
|
||||||
|
#endif
|
@ -15,7 +15,8 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
|||||||
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
||||||
strcat-sse2-unaligned strncat-sse2-unaligned \
|
strcat-sse2-unaligned strncat-sse2-unaligned \
|
||||||
strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
|
strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
|
||||||
strrchr-sse2-no-bsf strchr-sse2-no-bsf
|
strrchr-sse2-no-bsf strchr-sse2-no-bsf \
|
||||||
|
memcmp-ssse3 wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c
|
||||||
ifeq (yes,$(config-cflags-sse4))
|
ifeq (yes,$(config-cflags-sse4))
|
||||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
|
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
|
||||||
CFLAGS-varshift.c += -msse4
|
CFLAGS-varshift.c += -msse4
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/* memcmp with SSE4.1
|
/* memcmp with SSE4.1, wmemcmp with SSE4.1
|
||||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
Contributed by Intel Corporation.
|
Contributed by Intel Corporation.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
@ -20,43 +20,54 @@
|
|||||||
|
|
||||||
#ifndef NOT_IN_libc
|
#ifndef NOT_IN_libc
|
||||||
|
|
||||||
#include <sysdep.h>
|
# include <sysdep.h>
|
||||||
#include "asm-syntax.h"
|
|
||||||
|
|
||||||
#ifndef MEMCMP
|
# ifndef MEMCMP
|
||||||
# define MEMCMP __memcmp_sse4_1
|
# define MEMCMP __memcmp_sse4_1
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
#ifndef ALIGN
|
# ifndef ALIGN
|
||||||
# define ALIGN(n) .p2align n
|
# define ALIGN(n) .p2align n
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
#define JMPTBL(I, B) (I - B)
|
# define JMPTBL(I, B) (I - B)
|
||||||
|
|
||||||
#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
|
||||||
lea TABLE(%rip), %r11; \
|
lea TABLE(%rip), %r11; \
|
||||||
movslq (%r11, INDEX, SCALE), %rcx; \
|
movslq (%r11, INDEX, SCALE), %rcx; \
|
||||||
add %r11, %rcx; \
|
add %r11, %rcx; \
|
||||||
jmp *%rcx; \
|
jmp *%rcx; \
|
||||||
ud2
|
ud2
|
||||||
|
|
||||||
|
/* Warning!
|
||||||
|
wmemcmp has to use SIGNED comparison for elements.
|
||||||
|
memcmp has to use UNSIGNED comparison for elemnts.
|
||||||
|
*/
|
||||||
|
|
||||||
.section .text.sse4.1,"ax",@progbits
|
.section .text.sse4.1,"ax",@progbits
|
||||||
ENTRY (MEMCMP)
|
ENTRY (MEMCMP)
|
||||||
|
# ifdef USE_AS_WMEMCMP
|
||||||
|
shl $2, %rdx
|
||||||
|
# endif
|
||||||
pxor %xmm0, %xmm0
|
pxor %xmm0, %xmm0
|
||||||
cmp $79, %rdx
|
cmp $79, %rdx
|
||||||
ja L(79bytesormore)
|
ja L(79bytesormore)
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
cmp $1, %rdx
|
cmp $1, %rdx
|
||||||
je L(firstbyte)
|
je L(firstbyte)
|
||||||
|
# endif
|
||||||
add %rdx, %rsi
|
add %rdx, %rsi
|
||||||
add %rdx, %rdi
|
add %rdx, %rdi
|
||||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
|
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(firstbyte):
|
L(firstbyte):
|
||||||
movzbl (%rdi), %eax
|
movzbl (%rdi), %eax
|
||||||
movzbl (%rsi), %ecx
|
movzbl (%rsi), %ecx
|
||||||
sub %ecx, %eax
|
sub %ecx, %eax
|
||||||
ret
|
ret
|
||||||
|
# endif
|
||||||
|
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(79bytesormore):
|
L(79bytesormore):
|
||||||
@ -308,11 +319,11 @@ L(less32bytesin256):
|
|||||||
|
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(512bytesormore):
|
L(512bytesormore):
|
||||||
#ifdef DATA_CACHE_SIZE_HALF
|
# ifdef DATA_CACHE_SIZE_HALF
|
||||||
mov $DATA_CACHE_SIZE_HALF, %r8
|
mov $DATA_CACHE_SIZE_HALF, %r8
|
||||||
#else
|
# else
|
||||||
mov __x86_64_data_cache_size_half(%rip), %r8
|
mov __x86_64_data_cache_size_half(%rip), %r8
|
||||||
#endif
|
# endif
|
||||||
mov %r8, %r9
|
mov %r8, %r9
|
||||||
shr $1, %r8
|
shr $1, %r8
|
||||||
add %r9, %r8
|
add %r9, %r8
|
||||||
@ -624,11 +635,11 @@ L(less32bytesin256in2alinged):
|
|||||||
|
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(512bytesormorein2aligned):
|
L(512bytesormorein2aligned):
|
||||||
#ifdef DATA_CACHE_SIZE_HALF
|
# ifdef DATA_CACHE_SIZE_HALF
|
||||||
mov $DATA_CACHE_SIZE_HALF, %r8
|
mov $DATA_CACHE_SIZE_HALF, %r8
|
||||||
#else
|
# else
|
||||||
mov __x86_64_data_cache_size_half(%rip), %r8
|
mov __x86_64_data_cache_size_half(%rip), %r8
|
||||||
#endif
|
# endif
|
||||||
mov %r8, %r9
|
mov %r8, %r9
|
||||||
shr $1, %r8
|
shr $1, %r8
|
||||||
add %r9, %r8
|
add %r9, %r8
|
||||||
@ -667,6 +678,7 @@ L(64bytesormore_loopin2aligned):
|
|||||||
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
|
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
|
||||||
L(L2_L3_cache_aglined):
|
L(L2_L3_cache_aglined):
|
||||||
sub $64, %rdx
|
sub $64, %rdx
|
||||||
|
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(L2_L3_aligned_128bytes_loop):
|
L(L2_L3_aligned_128bytes_loop):
|
||||||
prefetchnta 0x1c0(%rdi)
|
prefetchnta 0x1c0(%rdi)
|
||||||
@ -803,13 +815,19 @@ L(12bytes):
|
|||||||
jne L(diffin8bytes)
|
jne L(diffin8bytes)
|
||||||
L(4bytes):
|
L(4bytes):
|
||||||
mov -4(%rsi), %ecx
|
mov -4(%rsi), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
mov -4(%rdi), %eax
|
mov -4(%rdi), %eax
|
||||||
cmp %eax, %ecx
|
cmp %eax, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%rdi), %ecx
|
||||||
|
# endif
|
||||||
jne L(diffin4bytes)
|
jne L(diffin4bytes)
|
||||||
L(0bytes):
|
L(0bytes):
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
/* unreal case for wmemcmp */
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(65bytes):
|
L(65bytes):
|
||||||
movdqu -65(%rdi), %xmm1
|
movdqu -65(%rdi), %xmm1
|
||||||
@ -1017,6 +1035,7 @@ L(1bytes):
|
|||||||
movzbl -1(%rsi), %ecx
|
movzbl -1(%rsi), %ecx
|
||||||
sub %ecx, %eax
|
sub %ecx, %eax
|
||||||
ret
|
ret
|
||||||
|
# endif
|
||||||
|
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(68bytes):
|
L(68bytes):
|
||||||
@ -1047,13 +1066,20 @@ L(20bytes):
|
|||||||
pxor %xmm1, %xmm2
|
pxor %xmm1, %xmm2
|
||||||
ptest %xmm2, %xmm0
|
ptest %xmm2, %xmm0
|
||||||
jnc L(less16bytes)
|
jnc L(less16bytes)
|
||||||
mov -4(%rdi), %eax
|
|
||||||
mov -4(%rsi), %ecx
|
mov -4(%rsi), %ecx
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
mov -4(%rdi), %eax
|
||||||
cmp %eax, %ecx
|
cmp %eax, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%rdi), %ecx
|
||||||
|
# endif
|
||||||
jne L(diffin4bytes)
|
jne L(diffin4bytes)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
/* unreal cases for wmemcmp */
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(69bytes):
|
L(69bytes):
|
||||||
movdqu -69(%rsi), %xmm1
|
movdqu -69(%rsi), %xmm1
|
||||||
@ -1161,6 +1187,7 @@ L(23bytes):
|
|||||||
jne L(diffin8bytes)
|
jne L(diffin8bytes)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
# endif
|
||||||
|
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(72bytes):
|
L(72bytes):
|
||||||
@ -1191,13 +1218,16 @@ L(24bytes):
|
|||||||
pxor %xmm1, %xmm2
|
pxor %xmm1, %xmm2
|
||||||
ptest %xmm2, %xmm0
|
ptest %xmm2, %xmm0
|
||||||
jnc L(less16bytes)
|
jnc L(less16bytes)
|
||||||
mov -8(%rdi), %rax
|
|
||||||
mov -8(%rsi), %rcx
|
mov -8(%rsi), %rcx
|
||||||
|
mov -8(%rdi), %rax
|
||||||
cmp %rax, %rcx
|
cmp %rax, %rcx
|
||||||
jne L(diffin8bytes)
|
jne L(diffin8bytes)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
/* unreal cases for wmemcmp */
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(73bytes):
|
L(73bytes):
|
||||||
movdqu -73(%rsi), %xmm1
|
movdqu -73(%rsi), %xmm1
|
||||||
@ -1312,7 +1342,7 @@ L(27bytes):
|
|||||||
jne L(diffin4bytes)
|
jne L(diffin4bytes)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
# endif
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(76bytes):
|
L(76bytes):
|
||||||
movdqu -76(%rsi), %xmm1
|
movdqu -76(%rsi), %xmm1
|
||||||
@ -1346,13 +1376,19 @@ L(28bytes):
|
|||||||
mov -12(%rsi), %rcx
|
mov -12(%rsi), %rcx
|
||||||
cmp %rax, %rcx
|
cmp %rax, %rcx
|
||||||
jne L(diffin8bytes)
|
jne L(diffin8bytes)
|
||||||
mov -4(%rdi), %eax
|
|
||||||
mov -4(%rsi), %ecx
|
mov -4(%rsi), %ecx
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
mov -4(%rdi), %eax
|
||||||
cmp %eax, %ecx
|
cmp %eax, %ecx
|
||||||
|
# else
|
||||||
|
cmp -4(%rdi), %ecx
|
||||||
|
# endif
|
||||||
jne L(diffin4bytes)
|
jne L(diffin4bytes)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
|
/* unreal cases for wmemcmp */
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(77bytes):
|
L(77bytes):
|
||||||
movdqu -77(%rsi), %xmm1
|
movdqu -77(%rsi), %xmm1
|
||||||
@ -1474,7 +1510,7 @@ L(31bytes):
|
|||||||
jne L(diffin8bytes)
|
jne L(diffin8bytes)
|
||||||
xor %eax, %eax
|
xor %eax, %eax
|
||||||
ret
|
ret
|
||||||
|
# endif
|
||||||
ALIGN (4)
|
ALIGN (4)
|
||||||
L(64bytes):
|
L(64bytes):
|
||||||
movdqu -64(%rdi), %xmm2
|
movdqu -64(%rdi), %xmm2
|
||||||
@ -1527,7 +1563,17 @@ L(diffin8bytes):
|
|||||||
jne L(diffin4bytes)
|
jne L(diffin4bytes)
|
||||||
shr $32, %rcx
|
shr $32, %rcx
|
||||||
shr $32, %rax
|
shr $32, %rax
|
||||||
|
|
||||||
|
# ifdef USE_AS_WMEMCMP
|
||||||
|
/* for wmemcmp */
|
||||||
|
cmp %eax, %ecx
|
||||||
|
jne L(diffin4bytes)
|
||||||
|
xor %eax, %eax
|
||||||
|
ret
|
||||||
|
# endif
|
||||||
|
|
||||||
L(diffin4bytes):
|
L(diffin4bytes):
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
cmp %cx, %ax
|
cmp %cx, %ax
|
||||||
jne L(diffin2bytes)
|
jne L(diffin2bytes)
|
||||||
shr $16, %ecx
|
shr $16, %ecx
|
||||||
@ -1546,11 +1592,28 @@ L(end):
|
|||||||
and $0xff, %ecx
|
and $0xff, %ecx
|
||||||
sub %ecx, %eax
|
sub %ecx, %eax
|
||||||
ret
|
ret
|
||||||
|
# else
|
||||||
|
|
||||||
|
/* for wmemcmp */
|
||||||
|
mov $1, %eax
|
||||||
|
jl L(nequal_bigger)
|
||||||
|
neg %eax
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN (4)
|
||||||
|
L(nequal_bigger):
|
||||||
|
ret
|
||||||
|
|
||||||
|
L(unreal_case):
|
||||||
|
xor %eax, %eax
|
||||||
|
ret
|
||||||
|
# endif
|
||||||
|
|
||||||
END (MEMCMP)
|
END (MEMCMP)
|
||||||
|
|
||||||
.section .rodata.sse4.1,"a",@progbits
|
.section .rodata.sse4.1,"a",@progbits
|
||||||
ALIGN (3)
|
ALIGN (3)
|
||||||
|
# ifndef USE_AS_WMEMCMP
|
||||||
L(table_64bytes):
|
L(table_64bytes):
|
||||||
.int JMPTBL (L(0bytes), L(table_64bytes))
|
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||||
.int JMPTBL (L(1bytes), L(table_64bytes))
|
.int JMPTBL (L(1bytes), L(table_64bytes))
|
||||||
@ -1632,4 +1695,87 @@ L(table_64bytes):
|
|||||||
.int JMPTBL (L(77bytes), L(table_64bytes))
|
.int JMPTBL (L(77bytes), L(table_64bytes))
|
||||||
.int JMPTBL (L(78bytes), L(table_64bytes))
|
.int JMPTBL (L(78bytes), L(table_64bytes))
|
||||||
.int JMPTBL (L(79bytes), L(table_64bytes))
|
.int JMPTBL (L(79bytes), L(table_64bytes))
|
||||||
|
# else
|
||||||
|
L(table_64bytes):
|
||||||
|
.int JMPTBL (L(0bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(4bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(8bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(12bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(16bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(20bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(24bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(28bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(32bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(36bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(40bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(44bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(48bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(52bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(56bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(60bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(64bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(68bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(72bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(76bytes), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
.int JMPTBL (L(unreal_case), L(table_64bytes))
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
1997
sysdeps/x86_64/multiarch/memcmp-ssse3.S
Normal file
1997
sysdeps/x86_64/multiarch/memcmp-ssse3.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
/* Multiple versions of memcmp
|
/* Multiple versions of memcmp
|
||||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
Contributed by Intel Corporation.
|
Contributed by Intel Corporation.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
@ -29,11 +29,20 @@ ENTRY(memcmp)
|
|||||||
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||||
jne 1f
|
jne 1f
|
||||||
call __init_cpu_features
|
call __init_cpu_features
|
||||||
1: leaq __memcmp_sse2(%rip), %rax
|
|
||||||
testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
|
1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
jz 2f
|
jnz 2f
|
||||||
|
leaq __memcmp_sse2(%rip), %rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
|
||||||
|
jz 3f
|
||||||
leaq __memcmp_sse4_1(%rip), %rax
|
leaq __memcmp_sse4_1(%rip), %rax
|
||||||
2: ret
|
ret
|
||||||
|
|
||||||
|
3: leaq __memcmp_ssse3(%rip), %rax
|
||||||
|
ret
|
||||||
|
|
||||||
END(memcmp)
|
END(memcmp)
|
||||||
|
|
||||||
# undef ENTRY
|
# undef ENTRY
|
||||||
|
5
sysdeps/x86_64/multiarch/wmemcmp-c.c
Normal file
5
sysdeps/x86_64/multiarch/wmemcmp-c.c
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#ifndef NOT_IN_libc
|
||||||
|
# define WMEMCMP __wmemcmp_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "wcsmbs/wmemcmp.c"
|
4
sysdeps/x86_64/multiarch/wmemcmp-sse4.S
Normal file
4
sysdeps/x86_64/multiarch/wmemcmp-sse4.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_WMEMCMP 1
|
||||||
|
#define MEMCMP __wmemcmp_sse4_1
|
||||||
|
|
||||||
|
#include "memcmp-sse4.S"
|
4
sysdeps/x86_64/multiarch/wmemcmp-ssse3.S
Normal file
4
sysdeps/x86_64/multiarch/wmemcmp-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_WMEMCMP 1
|
||||||
|
#define MEMCMP __wmemcmp_ssse3
|
||||||
|
|
||||||
|
#include "memcmp-ssse3.S"
|
47
sysdeps/x86_64/multiarch/wmemcmp.S
Normal file
47
sysdeps/x86_64/multiarch/wmemcmp.S
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/* Multiple versions of wmemcmp
|
||||||
|
Copyright (C) 2011 Free Software Foundation, Inc.
|
||||||
|
Contributed by Intel Corporation.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in libc. */
|
||||||
|
#ifndef NOT_IN_libc
|
||||||
|
.text
|
||||||
|
ENTRY(wmemcmp)
|
||||||
|
.type wmemcmp, @gnu_indirect_function
|
||||||
|
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||||
|
jne 1f
|
||||||
|
call __init_cpu_features
|
||||||
|
|
||||||
|
1: testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
|
jnz 2f
|
||||||
|
leaq __wmemcmp_sse2(%rip), %rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
2: testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
|
||||||
|
jz 3f
|
||||||
|
leaq __wmemcmp_sse4_1(%rip), %rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
3: leaq __wmemcmp_ssse3(%rip), %rax
|
||||||
|
ret
|
||||||
|
|
||||||
|
END(wmemcmp)
|
||||||
|
#endif
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
|
/* Copyright (C) 1996, 1997i, 2011 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
|
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
|
||||||
|
|
||||||
@ -19,9 +19,12 @@
|
|||||||
|
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
|
|
||||||
|
#ifndef WMEMCMP
|
||||||
|
# define wmemcmp
|
||||||
|
#endif
|
||||||
|
|
||||||
int
|
int
|
||||||
wmemcmp (s1, s2, n)
|
WMEMCMP (s1, s2, n)
|
||||||
const wchar_t *s1;
|
const wchar_t *s1;
|
||||||
const wchar_t *s2;
|
const wchar_t *s2;
|
||||||
size_t n;
|
size_t n;
|
||||||
@ -34,19 +37,19 @@ wmemcmp (s1, s2, n)
|
|||||||
c1 = (wint_t) s1[0];
|
c1 = (wint_t) s1[0];
|
||||||
c2 = (wint_t) s2[0];
|
c2 = (wint_t) s2[0];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
c1 = (wint_t) s1[1];
|
c1 = (wint_t) s1[1];
|
||||||
c2 = (wint_t) s2[1];
|
c2 = (wint_t) s2[1];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
c1 = (wint_t) s1[2];
|
c1 = (wint_t) s1[2];
|
||||||
c2 = (wint_t) s2[2];
|
c2 = (wint_t) s2[2];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
c1 = (wint_t) s1[3];
|
c1 = (wint_t) s1[3];
|
||||||
c2 = (wint_t) s2[3];
|
c2 = (wint_t) s2[3];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
s1 += 4;
|
s1 += 4;
|
||||||
s2 += 4;
|
s2 += 4;
|
||||||
n -= 4;
|
n -= 4;
|
||||||
@ -57,7 +60,7 @@ wmemcmp (s1, s2, n)
|
|||||||
c1 = (wint_t) s1[0];
|
c1 = (wint_t) s1[0];
|
||||||
c2 = (wint_t) s2[0];
|
c2 = (wint_t) s2[0];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
++s1;
|
++s1;
|
||||||
++s2;
|
++s2;
|
||||||
--n;
|
--n;
|
||||||
@ -67,7 +70,7 @@ wmemcmp (s1, s2, n)
|
|||||||
c1 = (wint_t) s1[0];
|
c1 = (wint_t) s1[0];
|
||||||
c2 = (wint_t) s2[0];
|
c2 = (wint_t) s2[0];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
++s1;
|
++s1;
|
||||||
++s2;
|
++s2;
|
||||||
--n;
|
--n;
|
||||||
@ -77,7 +80,7 @@ wmemcmp (s1, s2, n)
|
|||||||
c1 = (wint_t) s1[0];
|
c1 = (wint_t) s1[0];
|
||||||
c2 = (wint_t) s2[0];
|
c2 = (wint_t) s2[0];
|
||||||
if (c1 - c2 != 0)
|
if (c1 - c2 != 0)
|
||||||
return c1 - c2;
|
return c1 > c2 ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user