mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-10 07:10:06 +00:00
Implement optimized strcaecmp for x86-64.
This commit is contained in:
parent
fe36dd025e
commit
42e08a5438
11
ChangeLog
11
ChangeLog
@ -1,5 +1,16 @@
|
|||||||
2010-07-30 Ulrich Drepper <drepper@redhat.com>
|
2010-07-30 Ulrich Drepper <drepper@redhat.com>
|
||||||
|
|
||||||
|
* string/Makefile (strop-tests): Add strcasecmp.
|
||||||
|
* sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
|
||||||
|
strcasecmp_l-nonascii.
|
||||||
|
(gen-as-const-headers): Add locale-defines.sym.
|
||||||
|
* sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation.
|
||||||
|
* sysdeps/x86_64/strcasecmp.S: New file.
|
||||||
|
* sysdeps/x86_64/strcasecmp_l.S: New file.
|
||||||
|
* sysdeps/x86_64/strcasecmp_l-nonascii.c: New file.
|
||||||
|
* sysdeps/x86_64/locale-defines.sym: New file.
|
||||||
|
* string/test-strcasecmp.c: New file.
|
||||||
|
|
||||||
* string/test-strcasestr.c: Test both ends of the range of characters.
|
* string/test-strcasestr.c: Test both ends of the range of characters.
|
||||||
* sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
|
* sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
|
||||||
|
|
||||||
|
2
NEWS
2
NEWS
@ -13,7 +13,7 @@ Version 2.13
|
|||||||
|
|
||||||
* POWER7 optimizations: memset, memcmp, strncmp
|
* POWER7 optimizations: memset, memcmp, strncmp
|
||||||
|
|
||||||
* New optimized string functions for x86-64: strnlen
|
* New optimized string functions for x86-64: strnlen, strcasecmp
|
||||||
Implemented by Ulrich Drepper.
|
Implemented by Ulrich Drepper.
|
||||||
|
|
||||||
Version 2.12
|
Version 2.12
|
||||||
|
@ -49,7 +49,7 @@ o-objects.ob := memcpy.o memset.o memchr.o
|
|||||||
strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
|
strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
|
||||||
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
|
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
|
||||||
strlen strncmp strncpy strpbrk strrchr strspn memmem \
|
strlen strncmp strncpy strpbrk strrchr strspn memmem \
|
||||||
strstr strcasestr strnlen
|
strstr strcasestr strnlen strcasecmp
|
||||||
tests := tester inl-tester noinl-tester testcopy test-ffs \
|
tests := tester inl-tester noinl-tester testcopy test-ffs \
|
||||||
tst-strlen stratcliff tst-svc tst-inlcall \
|
tst-strlen stratcliff tst-svc tst-inlcall \
|
||||||
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
|
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
|
||||||
|
276
string/test-strcasecmp.c
Normal file
276
string/test-strcasecmp.c
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
/* Test and measure strcasecmp functions.
|
||||||
|
Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Written by Jakub Jelinek <jakub@redhat.com>, 1999.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <ctype.h>
|
||||||
|
#define TEST_MAIN
|
||||||
|
#include "test-string.h"
|
||||||
|
|
||||||
|
typedef int (*proto_t) (const char *, const char *);
|
||||||
|
static int simple_strcasecmp (const char *, const char *);
|
||||||
|
static int stupid_strcasecmp (const char *, const char *);
|
||||||
|
|
||||||
|
IMPL (stupid_strcasecmp, 0)
|
||||||
|
IMPL (simple_strcasecmp, 0)
|
||||||
|
IMPL (strcasecmp, 1)
|
||||||
|
|
||||||
|
static int
|
||||||
|
simple_strcasecmp (const char *s1, const char *s2)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
while ((ret = ((unsigned char) tolower (*s1)
|
||||||
|
- (unsigned char) tolower (*s2))) == 0
|
||||||
|
&& *s1++)
|
||||||
|
++s2;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
stupid_strcasecmp (const char *s1, const char *s2)
|
||||||
|
{
|
||||||
|
size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
|
||||||
|
size_t n = ns1 < ns2 ? ns1 : ns2;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
while (n--)
|
||||||
|
{
|
||||||
|
if ((ret = ((unsigned char) tolower (*s1)
|
||||||
|
- (unsigned char) tolower (*s2))) != 0)
|
||||||
|
break;
|
||||||
|
++s1;
|
||||||
|
++s2;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
|
||||||
|
{
|
||||||
|
int result = CALL (impl, s1, s2);
|
||||||
|
if ((exp_result == 0 && result != 0)
|
||||||
|
|| (exp_result < 0 && result >= 0)
|
||||||
|
|| (exp_result > 0 && result <= 0))
|
||||||
|
{
|
||||||
|
error (0, 0, "Wrong result in function %s %d %d", impl->name,
|
||||||
|
result, exp_result);
|
||||||
|
ret = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HP_TIMING_AVAIL)
|
||||||
|
{
|
||||||
|
hp_timing_t start __attribute ((unused));
|
||||||
|
hp_timing_t stop __attribute ((unused));
|
||||||
|
hp_timing_t best_time = ~ (hp_timing_t) 0;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < 32; ++i)
|
||||||
|
{
|
||||||
|
HP_TIMING_NOW (start);
|
||||||
|
CALL (impl, s1, s2);
|
||||||
|
HP_TIMING_NOW (stop);
|
||||||
|
HP_TIMING_BEST (best_time, start, stop);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf ("\t%zd", (size_t) best_time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_test (size_t align1, size_t align2, size_t len, int max_char,
|
||||||
|
int exp_result)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
char *s1, *s2;
|
||||||
|
|
||||||
|
if (len == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
align1 &= 7;
|
||||||
|
if (align1 + len + 1 >= page_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
align2 &= 7;
|
||||||
|
if (align2 + len + 1 >= page_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
s1 = (char *) (buf1 + align1);
|
||||||
|
s2 = (char *) (buf2 + align2);
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
{
|
||||||
|
s1[i] = toupper (1 + 23 * i % max_char);
|
||||||
|
s2[i] = tolower (s1[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
s1[len] = s2[len] = 0;
|
||||||
|
s1[len + 1] = 23;
|
||||||
|
s2[len + 1] = 24 + exp_result;
|
||||||
|
if ((s2[len - 1] == 'z' && exp_result == -1)
|
||||||
|
|| (s2[len - 1] == 'a' && exp_result == 1))
|
||||||
|
s1[len - 1] += exp_result;
|
||||||
|
else
|
||||||
|
s2[len - 1] -= exp_result;
|
||||||
|
|
||||||
|
if (HP_TIMING_AVAIL)
|
||||||
|
printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
|
||||||
|
|
||||||
|
FOR_EACH_IMPL (impl, 0)
|
||||||
|
do_one_test (impl, s1, s2, exp_result);
|
||||||
|
|
||||||
|
if (HP_TIMING_AVAIL)
|
||||||
|
putchar ('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_random_tests (void)
|
||||||
|
{
|
||||||
|
size_t i, j, n, align1, align2, pos, len1, len2;
|
||||||
|
int result;
|
||||||
|
long r;
|
||||||
|
unsigned char *p1 = buf1 + page_size - 512;
|
||||||
|
unsigned char *p2 = buf2 + page_size - 512;
|
||||||
|
|
||||||
|
for (n = 0; n < ITERATIONS; n++)
|
||||||
|
{
|
||||||
|
align1 = random () & 31;
|
||||||
|
if (random () & 1)
|
||||||
|
align2 = random () & 31;
|
||||||
|
else
|
||||||
|
align2 = align1 + (random () & 24);
|
||||||
|
pos = random () & 511;
|
||||||
|
j = align1 > align2 ? align1 : align2;
|
||||||
|
if (pos + j >= 511)
|
||||||
|
pos = 510 - j - (random () & 7);
|
||||||
|
len1 = random () & 511;
|
||||||
|
if (pos >= len1 && (random () & 1))
|
||||||
|
len1 = pos + (random () & 7);
|
||||||
|
if (len1 + j >= 512)
|
||||||
|
len1 = 511 - j - (random () & 7);
|
||||||
|
if (pos >= len1)
|
||||||
|
len2 = len1;
|
||||||
|
else
|
||||||
|
len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
|
||||||
|
j = (pos > len2 ? pos : len2) + align1 + 64;
|
||||||
|
if (j > 512)
|
||||||
|
j = 512;
|
||||||
|
for (i = 0; i < j; ++i)
|
||||||
|
{
|
||||||
|
p1[i] = tolower (random () & 255);
|
||||||
|
if (i < len1 + align1 && !p1[i])
|
||||||
|
{
|
||||||
|
p1[i] = tolower (random () & 255);
|
||||||
|
if (!p1[i])
|
||||||
|
p1[i] = tolower (1 + (random () & 127));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = 0; i < j; ++i)
|
||||||
|
{
|
||||||
|
p2[i] = toupper (random () & 255);
|
||||||
|
if (i < len2 + align2 && !p2[i])
|
||||||
|
{
|
||||||
|
p2[i] = toupper (random () & 255);
|
||||||
|
if (!p2[i])
|
||||||
|
toupper (p2[i] = 1 + (random () & 127));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = 0;
|
||||||
|
memcpy (p2 + align2, p1 + align1, pos);
|
||||||
|
if (pos < len1)
|
||||||
|
{
|
||||||
|
if (tolower (p2[align2 + pos]) == p1[align1 + pos])
|
||||||
|
{
|
||||||
|
p2[align2 + pos] = toupper (random () & 255);
|
||||||
|
if (tolower (p2[align2 + pos]) == p1[align1 + pos])
|
||||||
|
p2[align2 + pos] = toupper (p1[align1 + pos]
|
||||||
|
+ 3 + (random () & 127));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p1[align1 + pos] < tolower (p2[align2 + pos]))
|
||||||
|
result = -1;
|
||||||
|
else
|
||||||
|
result = 1;
|
||||||
|
}
|
||||||
|
p1[len1 + align1] = 0;
|
||||||
|
p2[len2 + align2] = 0;
|
||||||
|
|
||||||
|
FOR_EACH_IMPL (impl, 1)
|
||||||
|
{
|
||||||
|
r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
|
||||||
|
/* Test whether on 64-bit architectures where ABI requires
|
||||||
|
callee to promote has the promotion been done. */
|
||||||
|
asm ("" : "=g" (r) : "0" (r));
|
||||||
|
if ((r == 0 && result)
|
||||||
|
|| (r < 0 && result >= 0)
|
||||||
|
|| (r > 0 && result <= 0))
|
||||||
|
{
|
||||||
|
error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
|
||||||
|
n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
test_main (void)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
test_init ();
|
||||||
|
|
||||||
|
printf ("%23s", "");
|
||||||
|
FOR_EACH_IMPL (impl, 0)
|
||||||
|
printf ("\t%s", impl->name);
|
||||||
|
putchar ('\n');
|
||||||
|
|
||||||
|
for (i = 1; i < 16; ++i)
|
||||||
|
{
|
||||||
|
do_test (i, i, i, 127, 0);
|
||||||
|
do_test (i, i, i, 127, 1);
|
||||||
|
do_test (i, i, i, 127, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 1; i < 10; ++i)
|
||||||
|
{
|
||||||
|
do_test (0, 0, 2 << i, 127, 0);
|
||||||
|
do_test (0, 0, 2 << i, 254, 0);
|
||||||
|
do_test (0, 0, 2 << i, 127, 1);
|
||||||
|
do_test (0, 0, 2 << i, 254, 1);
|
||||||
|
do_test (0, 0, 2 << i, 127, -1);
|
||||||
|
do_test (0, 0, 2 << i, 254, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 1; i < 8; ++i)
|
||||||
|
{
|
||||||
|
do_test (i, 2 * i, 8 << i, 127, 0);
|
||||||
|
do_test (2 * i, i, 8 << i, 254, 0);
|
||||||
|
do_test (i, 2 * i, 8 << i, 127, 1);
|
||||||
|
do_test (2 * i, i, 8 << i, 254, 1);
|
||||||
|
do_test (i, 2 * i, 8 << i, 127, -1);
|
||||||
|
do_test (2 * i, i, 8 << i, 254, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
do_random_tests ();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "../test-skeleton.c"
|
@ -12,7 +12,8 @@ sysdep_routines += _mcount
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(subdir),string)
|
ifeq ($(subdir),string)
|
||||||
sysdep_routines += cacheinfo
|
sysdep_routines += cacheinfo strcasecmp_l-nonascii
|
||||||
|
gen-as-const-headers += locale-defines.sym
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(subdir),elf)
|
ifeq ($(subdir),elf)
|
||||||
|
11
sysdeps/x86_64/locale-defines.sym
Normal file
11
sysdeps/x86_64/locale-defines.sym
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#include <locale/localeinfo.h>
|
||||||
|
#include <langinfo.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
--
|
||||||
|
|
||||||
|
LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales)
|
||||||
|
LC_CTYPE
|
||||||
|
_NL_CTYPE_NONASCII_CASE
|
||||||
|
LOCALE_DATA_VALUES offsetof (struct __locale_data, values)
|
||||||
|
SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0])
|
1
sysdeps/x86_64/strcasecmp.S
Normal file
1
sysdeps/x86_64/strcasecmp.S
Normal file
@ -0,0 +1 @@
|
|||||||
|
/* In strcasecmp_l.S. */
|
5
sysdeps/x86_64/strcasecmp_l-nonascii.c
Normal file
5
sysdeps/x86_64/strcasecmp_l-nonascii.c
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define __strcasecmp_l __strcasecmp_l_nonascii
|
||||||
|
#define USE_IN_EXTENDED_LOCALE_MODEL 1
|
||||||
|
#include <string/strcasecmp.c>
|
6
sysdeps/x86_64/strcasecmp_l.S
Normal file
6
sysdeps/x86_64/strcasecmp_l.S
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#define STRCMP __strcasecmp_l
|
||||||
|
#define USE_AS_STRCASECMP_L
|
||||||
|
#include "strcmp.S"
|
||||||
|
|
||||||
|
weak_alias (__strcasecmp_l, strcasecmp_l)
|
||||||
|
libc_hidden_def (strcasecmp_l)
|
@ -51,6 +51,15 @@
|
|||||||
je LABEL(strcmp_exitz); \
|
je LABEL(strcmp_exitz); \
|
||||||
mov %r9, %r11
|
mov %r9, %r11
|
||||||
|
|
||||||
|
#elif defined USE_AS_STRCASECMP_L
|
||||||
|
# include "locale-defines.h"
|
||||||
|
|
||||||
|
/* No support for strcasecmp outside libc so far since it is not needed. */
|
||||||
|
# ifdef NOT_IN_lib
|
||||||
|
# error "strcasecmp_l not implemented so far"
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# define UPDATE_STRNCMP_COUNTER
|
||||||
#else
|
#else
|
||||||
# define UPDATE_STRNCMP_COUNTER
|
# define UPDATE_STRNCMP_COUNTER
|
||||||
# ifndef STRCMP
|
# ifndef STRCMP
|
||||||
@ -64,6 +73,19 @@
|
|||||||
.section .text.ssse3,"ax",@progbits
|
.section .text.ssse3,"ax",@progbits
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_AS_STRCASECMP_L
|
||||||
|
ENTRY (__strcasecmp)
|
||||||
|
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
|
||||||
|
movq %fs:(%rax),%rdx
|
||||||
|
|
||||||
|
/* 5-byte NOP. */
|
||||||
|
.byte 0x0f,0x1f,0x44,0x00,0x00
|
||||||
|
END (__strcasecmp)
|
||||||
|
weak_alias (__strcasecmp, strcasecmp)
|
||||||
|
libc_hidden_def (__strcasecmp)
|
||||||
|
/* FALLTHROUGH to strcasecmp_l. */
|
||||||
|
#endif
|
||||||
|
|
||||||
ENTRY (BP_SYM (STRCMP))
|
ENTRY (BP_SYM (STRCMP))
|
||||||
#ifdef NOT_IN_libc
|
#ifdef NOT_IN_libc
|
||||||
/* Simple version since we can't use SSE registers in ld.so. */
|
/* Simple version since we can't use SSE registers in ld.so. */
|
||||||
@ -84,6 +106,18 @@ L(neq): movl $1, %eax
|
|||||||
ret
|
ret
|
||||||
END (BP_SYM (STRCMP))
|
END (BP_SYM (STRCMP))
|
||||||
#else /* NOT_IN_libc */
|
#else /* NOT_IN_libc */
|
||||||
|
# ifdef USE_AS_STRCASECMP_L
|
||||||
|
/* We have to fall back on the C implementation for locales
|
||||||
|
with encodings not matching ASCII for single bytes. */
|
||||||
|
# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
|
||||||
|
movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
|
||||||
|
# else
|
||||||
|
movq (%rdx), %rax
|
||||||
|
# endif
|
||||||
|
testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
|
||||||
|
jne __strcasecmp_l_nonascii
|
||||||
|
# endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This implementation uses SSE to compare up to 16 bytes at a time.
|
* This implementation uses SSE to compare up to 16 bytes at a time.
|
||||||
*/
|
*/
|
||||||
@ -99,6 +133,26 @@ END (BP_SYM (STRCMP))
|
|||||||
/* Use 64bit AND here to avoid long NOP padding. */
|
/* Use 64bit AND here to avoid long NOP padding. */
|
||||||
and $0x3f, %rcx /* rsi alignment in cache line */
|
and $0x3f, %rcx /* rsi alignment in cache line */
|
||||||
and $0x3f, %rax /* rdi alignment in cache line */
|
and $0x3f, %rax /* rdi alignment in cache line */
|
||||||
|
# ifdef USE_AS_STRCASECMP_L
|
||||||
|
.section .rodata.cst16,"aM",@progbits,16
|
||||||
|
.align 16
|
||||||
|
.Lbelowupper:
|
||||||
|
.quad 0x4040404040404040
|
||||||
|
.quad 0x4040404040404040
|
||||||
|
.Ltopupper:
|
||||||
|
.quad 0x5b5b5b5b5b5b5b5b
|
||||||
|
.quad 0x5b5b5b5b5b5b5b5b
|
||||||
|
.Ltouppermask:
|
||||||
|
.quad 0x2020202020202020
|
||||||
|
.quad 0x2020202020202020
|
||||||
|
.previous
|
||||||
|
movdqa .Lbelowupper(%rip), %xmm5
|
||||||
|
# define UCLOW_reg %xmm5
|
||||||
|
movdqa .Ltopupper(%rip), %xmm6
|
||||||
|
# define UCHIGH_reg %xmm6
|
||||||
|
movdqa .Ltouppermask(%rip), %xmm7
|
||||||
|
# define LCQWORD_reg %xmm7
|
||||||
|
# endif
|
||||||
cmp $0x30, %ecx
|
cmp $0x30, %ecx
|
||||||
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
|
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
|
||||||
cmp $0x30, %eax
|
cmp $0x30, %eax
|
||||||
@ -107,6 +161,26 @@ END (BP_SYM (STRCMP))
|
|||||||
movlpd (%rsi), %xmm2
|
movlpd (%rsi), %xmm2
|
||||||
movhpd 8(%rdi), %xmm1
|
movhpd 8(%rdi), %xmm1
|
||||||
movhpd 8(%rsi), %xmm2
|
movhpd 8(%rsi), %xmm2
|
||||||
|
# ifdef USE_AS_STRCASECMP_L
|
||||||
|
# define TOLOWER(reg1, reg2) \
|
||||||
|
movdqa reg1, %xmm8; \
|
||||||
|
movdqa UCHIGH_reg, %xmm9; \
|
||||||
|
movdqa reg2, %xmm10; \
|
||||||
|
movdqa UCHIGH_reg, %xmm11; \
|
||||||
|
pcmpgtb UCLOW_reg, %xmm8; \
|
||||||
|
pcmpgtb reg1, %xmm9; \
|
||||||
|
pcmpgtb UCLOW_reg, %xmm10; \
|
||||||
|
pcmpgtb reg2, %xmm11; \
|
||||||
|
pand %xmm9, %xmm8; \
|
||||||
|
pand %xmm11, %xmm10; \
|
||||||
|
pand LCQWORD_reg, %xmm8; \
|
||||||
|
pand LCQWORD_reg, %xmm10; \
|
||||||
|
por %xmm8, reg1; \
|
||||||
|
por %xmm10, reg2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
# else
|
||||||
|
# define TOLOWER(reg1, reg2)
|
||||||
|
# endif
|
||||||
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
|
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
|
||||||
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
||||||
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
|
pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
|
||||||
@ -159,7 +233,13 @@ LABEL(ashr_0):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
|
pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
|
||||||
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
||||||
|
# ifndef USE_AS_STRCASECMP_L
|
||||||
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
|
pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
|
||||||
|
# else
|
||||||
|
movdqa (%rdi), %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
|
||||||
|
# endif
|
||||||
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
||||||
pmovmskb %xmm1, %r9d
|
pmovmskb %xmm1, %r9d
|
||||||
shr %cl, %edx /* adjust 0xffff for offset */
|
shr %cl, %edx /* adjust 0xffff for offset */
|
||||||
@ -183,6 +263,7 @@ LABEL(ashr_0):
|
|||||||
LABEL(loop_ashr_0):
|
LABEL(loop_ashr_0):
|
||||||
movdqa (%rsi, %rcx), %xmm1
|
movdqa (%rsi, %rcx), %xmm1
|
||||||
movdqa (%rdi, %rcx), %xmm2
|
movdqa (%rdi, %rcx), %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -198,6 +279,7 @@ LABEL(loop_ashr_0):
|
|||||||
add $16, %rcx
|
add $16, %rcx
|
||||||
movdqa (%rsi, %rcx), %xmm1
|
movdqa (%rsi, %rcx), %xmm1
|
||||||
movdqa (%rdi, %rcx), %xmm2
|
movdqa (%rdi, %rcx), %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -214,7 +296,7 @@ LABEL(loop_ashr_0):
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* The following cases will be handled by ashr_1
|
* The following cases will be handled by ashr_1
|
||||||
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
|
* rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
|
||||||
* n(15) n -15 0(15 +(n-15) - n) ashr_1
|
* n(15) n -15 0(15 +(n-15) - n) ashr_1
|
||||||
*/
|
*/
|
||||||
.p2align 4
|
.p2align 4
|
||||||
@ -224,6 +306,7 @@ LABEL(ashr_1):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
||||||
pslldq $15, %xmm2 /* shift first string to align with second */
|
pslldq $15, %xmm2 /* shift first string to align with second */
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
|
pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
|
||||||
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -263,6 +346,7 @@ LABEL(gobble_ashr_1):
|
|||||||
# else
|
# else
|
||||||
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -292,6 +376,7 @@ LABEL(gobble_ashr_1):
|
|||||||
# else
|
# else
|
||||||
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -351,6 +436,7 @@ LABEL(ashr_2):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $14, %xmm2
|
pslldq $14, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -390,6 +476,7 @@ LABEL(gobble_ashr_2):
|
|||||||
# else
|
# else
|
||||||
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -420,6 +507,7 @@ LABEL(gobble_ashr_2):
|
|||||||
# else
|
# else
|
||||||
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -472,6 +560,7 @@ LABEL(ashr_3):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $13, %xmm2
|
pslldq $13, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -512,6 +601,7 @@ LABEL(gobble_ashr_3):
|
|||||||
# else
|
# else
|
||||||
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -542,6 +632,7 @@ LABEL(gobble_ashr_3):
|
|||||||
# else
|
# else
|
||||||
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -594,6 +685,7 @@ LABEL(ashr_4):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $12, %xmm2
|
pslldq $12, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -634,6 +726,7 @@ LABEL(gobble_ashr_4):
|
|||||||
# else
|
# else
|
||||||
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -664,6 +757,7 @@ LABEL(gobble_ashr_4):
|
|||||||
# else
|
# else
|
||||||
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -716,6 +810,7 @@ LABEL(ashr_5):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $11, %xmm2
|
pslldq $11, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -756,6 +851,7 @@ LABEL(gobble_ashr_5):
|
|||||||
# else
|
# else
|
||||||
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -786,6 +882,7 @@ LABEL(gobble_ashr_5):
|
|||||||
# else
|
# else
|
||||||
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -838,6 +935,7 @@ LABEL(ashr_6):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $10, %xmm2
|
pslldq $10, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -878,6 +976,7 @@ LABEL(gobble_ashr_6):
|
|||||||
# else
|
# else
|
||||||
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -908,6 +1007,7 @@ LABEL(gobble_ashr_6):
|
|||||||
# else
|
# else
|
||||||
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -960,6 +1060,7 @@ LABEL(ashr_7):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $9, %xmm2
|
pslldq $9, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1000,6 +1101,7 @@ LABEL(gobble_ashr_7):
|
|||||||
# else
|
# else
|
||||||
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1030,6 +1132,7 @@ LABEL(gobble_ashr_7):
|
|||||||
# else
|
# else
|
||||||
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1082,6 +1185,7 @@ LABEL(ashr_8):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $8, %xmm2
|
pslldq $8, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1122,6 +1226,7 @@ LABEL(gobble_ashr_8):
|
|||||||
# else
|
# else
|
||||||
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1152,6 +1257,7 @@ LABEL(gobble_ashr_8):
|
|||||||
# else
|
# else
|
||||||
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1204,6 +1310,7 @@ LABEL(ashr_9):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $7, %xmm2
|
pslldq $7, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1244,6 +1351,7 @@ LABEL(gobble_ashr_9):
|
|||||||
# else
|
# else
|
||||||
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1274,6 +1382,7 @@ LABEL(gobble_ashr_9):
|
|||||||
# else
|
# else
|
||||||
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1326,6 +1435,7 @@ LABEL(ashr_10):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $6, %xmm2
|
pslldq $6, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1366,6 +1476,7 @@ LABEL(gobble_ashr_10):
|
|||||||
# else
|
# else
|
||||||
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1396,6 +1507,7 @@ LABEL(gobble_ashr_10):
|
|||||||
# else
|
# else
|
||||||
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1448,6 +1560,7 @@ LABEL(ashr_11):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $5, %xmm2
|
pslldq $5, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1488,6 +1601,7 @@ LABEL(gobble_ashr_11):
|
|||||||
# else
|
# else
|
||||||
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1518,6 +1632,7 @@ LABEL(gobble_ashr_11):
|
|||||||
# else
|
# else
|
||||||
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1570,6 +1685,7 @@ LABEL(ashr_12):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $4, %xmm2
|
pslldq $4, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1610,6 +1726,7 @@ LABEL(gobble_ashr_12):
|
|||||||
# else
|
# else
|
||||||
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1640,6 +1757,7 @@ LABEL(gobble_ashr_12):
|
|||||||
# else
|
# else
|
||||||
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1692,6 +1810,7 @@ LABEL(ashr_13):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $3, %xmm2
|
pslldq $3, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1732,6 +1851,7 @@ LABEL(gobble_ashr_13):
|
|||||||
# else
|
# else
|
||||||
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1762,6 +1882,7 @@ LABEL(gobble_ashr_13):
|
|||||||
# else
|
# else
|
||||||
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1814,6 +1935,7 @@ LABEL(ashr_14):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $2, %xmm2
|
pslldq $2, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1854,6 +1976,7 @@ LABEL(gobble_ashr_14):
|
|||||||
# else
|
# else
|
||||||
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1884,6 +2007,7 @@ LABEL(gobble_ashr_14):
|
|||||||
# else
|
# else
|
||||||
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -1936,6 +2060,7 @@ LABEL(ashr_15):
|
|||||||
movdqa (%rsi), %xmm1
|
movdqa (%rsi), %xmm1
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pslldq $1, %xmm2
|
pslldq $1, %xmm2
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
pcmpeqb %xmm1, %xmm2
|
pcmpeqb %xmm1, %xmm2
|
||||||
psubb %xmm0, %xmm2
|
psubb %xmm0, %xmm2
|
||||||
pmovmskb %xmm2, %r9d
|
pmovmskb %xmm2, %r9d
|
||||||
@ -1978,6 +2103,7 @@ LABEL(gobble_ashr_15):
|
|||||||
# else
|
# else
|
||||||
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -2008,6 +2134,7 @@ LABEL(gobble_ashr_15):
|
|||||||
# else
|
# else
|
||||||
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
|
palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
|
||||||
# endif
|
# endif
|
||||||
|
TOLOWER (%xmm1, %xmm2)
|
||||||
|
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
pcmpeqb %xmm2, %xmm1
|
pcmpeqb %xmm2, %xmm1
|
||||||
@ -2049,6 +2176,7 @@ LABEL(ashr_15_exittail):
|
|||||||
|
|
||||||
.p2align 4
|
.p2align 4
|
||||||
LABEL(aftertail):
|
LABEL(aftertail):
|
||||||
|
TOLOWER (%xmm1, %xmm3)
|
||||||
pcmpeqb %xmm3, %xmm1
|
pcmpeqb %xmm3, %xmm1
|
||||||
psubb %xmm0, %xmm1
|
psubb %xmm0, %xmm1
|
||||||
pmovmskb %xmm1, %edx
|
pmovmskb %xmm1, %edx
|
||||||
@ -2076,6 +2204,12 @@ LABEL(less16bytes):
|
|||||||
movzbl (%rsi, %rdx), %ecx
|
movzbl (%rsi, %rdx), %ecx
|
||||||
movzbl (%rdi, %rdx), %eax
|
movzbl (%rdi, %rdx), %eax
|
||||||
|
|
||||||
|
# ifdef USE_AS_STRCASECMP_L
|
||||||
|
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
|
||||||
|
movl (%rdx,%rcx,4), %ecx
|
||||||
|
movl (%rdx,%rax,4), %eax
|
||||||
|
# endif
|
||||||
|
|
||||||
sub %ecx, %eax
|
sub %ecx, %eax
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user