mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 12:30:06 +00:00
Optimized strcasecmp for Power7
This commit is contained in:
parent
36b1a74da5
commit
f0b264f174
13
ChangeLog
13
ChangeLog
@ -1,3 +1,16 @@
|
||||
2011-11-22 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
||||
|
||||
* sysdeps/powerpc/Makefile: Added locale-defines.sym generation.
|
||||
* sysdeps/powerpc/locale-defines.sym: Locale definitions for strcasecmp
|
||||
optimized code.
|
||||
* sysdeps/powerpc/powerpc32/power7/Makefile: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/strcasecmp.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/Makefile: Added unroll-loop option
|
||||
for strncasecmp/strncasecmp_l compilation.
|
||||
* sysdeps/powerpc/powerpc64/power7/strcasecmp.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S: New file.
|
||||
|
||||
2011-12-08 Marek Polacek <mpolacek@redhat.com>
|
||||
|
||||
[BZ #13484]
|
||||
|
3
NEWS
3
NEWS
@ -62,7 +62,8 @@ Version 2.15
|
||||
* Optimized strcasecmp and strncasecmp for SSSE3 and SSE4.2 on x86-32.
|
||||
Implemented by Ulrich Drepper.
|
||||
|
||||
* Optimized nearbyint for PPC. Implemented by Adhemerval Zanella.
|
||||
* Optimized nearbyint and strcasecmp for PPC.
|
||||
Implemented by Adhemerval Zanella.
|
||||
|
||||
Version 2.14
|
||||
|
||||
|
@ -23,4 +23,6 @@ endif
|
||||
ifeq ($(subdir),csu)
|
||||
# get offset to rtld_global._dl_hwcap
|
||||
gen-as-const-headers += rtld-global-offsets.sym
|
||||
# get offset to __locale_struct.__ctype_tolower
|
||||
gen-as-const-headers += locale-defines.sym
|
||||
endif
|
||||
|
5
sysdeps/powerpc/locale-defines.sym
Normal file
5
sysdeps/powerpc/locale-defines.sym
Normal file
@ -0,0 +1,5 @@
|
||||
#include <locale/localeinfo.h>
|
||||
|
||||
--
|
||||
|
||||
LOCALE_CTYPE_TOLOWER offsetof (struct __locale_struct, __ctype_tolower)
|
4
sysdeps/powerpc/powerpc32/power7/Makefile
Normal file
4
sysdeps/powerpc/powerpc32/power7/Makefile
Normal file
@ -0,0 +1,4 @@
|
||||
ifeq ($(subdir),string)
|
||||
CFLAGS-strncase.c += -funroll-loops
|
||||
CFLAGS-strncase_l.c += -funroll-loops
|
||||
endif
|
132
sysdeps/powerpc/powerpc32/power7/strcasecmp.S
Normal file
132
sysdeps/powerpc/powerpc32/power7/strcasecmp.S
Normal file
@ -0,0 +1,132 @@
|
||||
/* Optimized strcasecmp implementation for PowerPC32.
|
||||
Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
#include <locale-defines.h>
|
||||
|
||||
/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] )
|
||||
|
||||
or if defined USE_IN_EXTENDED_LOCALE_MODEL:
|
||||
|
||||
int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4],
|
||||
__locale_t loc [r5]) */
|
||||
|
||||
#ifndef STRCMP
|
||||
# define __STRCMP __strcasecmp
|
||||
# define STRCMP strcasecmp
|
||||
#endif
|
||||
|
||||
ENTRY (BP_SYM (__STRCMP))
|
||||
|
||||
#define rRTN r3 /* Return value */
|
||||
#define rSTR1 r5 /* 1st string */
|
||||
#define rSTR2 r4 /* 2nd string */
|
||||
#define rLOCARG r5 /* 3rd argument: locale_t */
|
||||
#define rCHAR1 r6 /* Byte readed from 1st string */
|
||||
#define rCHAR2 r7 /* Byte readed from 2nd string */
|
||||
#define rADDR1 r8 /* Address of tolower(rCHAR1) */
|
||||
#define rADDR2 r12 /* Address of tolower(rCHAR2) */
|
||||
#define rLWR1 r8 /* Byte tolower(rCHAR1) */
|
||||
#define rLWR2 r12 /* Byte tolower(rCHAR2) */
|
||||
#define rTMP r0
|
||||
#define rGOT r9 /* Address of the Global Offset Table */
|
||||
#define rLOC r11 /* Default locale address */
|
||||
|
||||
cmpw cr7, r3, r4
|
||||
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
||||
# ifdef SHARED
|
||||
mflr rTMP
|
||||
bcl 20,31,.L1
|
||||
.L1: mflr rGOT
|
||||
addis rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@ha
|
||||
addi rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@l
|
||||
lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT)
|
||||
add rLOC, rLOC, __libc_tsd_LOCALE@tls
|
||||
lwz rLOC, 0(rLOC)
|
||||
mtlr rTMP
|
||||
# else
|
||||
lis rTMP,_GLOBAL_OFFSET_TABLE_@ha
|
||||
la rLOC,_GLOBAL_OFFSET_TABLE_@l(rTMP)
|
||||
lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT)
|
||||
add rLOC, rLOC, __libc_tsd_LOCALE@tls
|
||||
lwz rLOC, 0(rLOC)
|
||||
# endif /* SHARED */
|
||||
#else
|
||||
mr rLOC, rLOCARG
|
||||
#endif
|
||||
mr rSTR1, rRTN
|
||||
lwz rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
|
||||
li rRTN, 0
|
||||
beqlr cr7
|
||||
|
||||
/* Unrolling loop for POWER: loads are done with 'lbz' plus
|
||||
offset and string descriptors are only updated in the end
|
||||
of loop unrolling. */
|
||||
|
||||
L(loop):
|
||||
lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
|
||||
lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
|
||||
sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
|
||||
sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
|
||||
lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
|
||||
lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
|
||||
cmpwi cr7, rCHAR1, 0 /* *s1 == '\0' ? */
|
||||
subf. r3, rLWR2, rLWR1
|
||||
bnelr
|
||||
beqlr cr7
|
||||
lbz rCHAR1, 1(rSTR1)
|
||||
lbz rCHAR2, 1(rSTR2)
|
||||
sldi rADDR1, rCHAR1, 2
|
||||
sldi rADDR2, rCHAR2, 2
|
||||
lwzx rLWR1, rLOC, rADDR1
|
||||
lwzx rLWR2, rLOC, rADDR2
|
||||
cmpwi cr7, rCHAR1, 0
|
||||
subf. r3, rLWR2, rLWR1
|
||||
bnelr
|
||||
beqlr cr7
|
||||
lbz rCHAR1, 2(rSTR1)
|
||||
lbz rCHAR2, 2(rSTR2)
|
||||
sldi rADDR1, rCHAR1, 2
|
||||
sldi rADDR2, rCHAR2, 2
|
||||
lwzx rLWR1, rLOC, rADDR1
|
||||
lwzx rLWR2, rLOC, rADDR2
|
||||
cmpwi cr7, rCHAR1, 0
|
||||
subf. r3, rLWR2, rLWR1
|
||||
bnelr
|
||||
beqlr cr7
|
||||
lbz rCHAR1, 3(rSTR1)
|
||||
lbz rCHAR2, 3(rSTR2)
|
||||
/* Increment both string descriptors */
|
||||
addi rSTR1, rSTR1, 4
|
||||
addi rSTR2, rSTR2, 4
|
||||
sldi rADDR1, rCHAR1, 2
|
||||
sldi rADDR2, rCHAR2, 2
|
||||
lwzx rLWR1, rLOC, rADDR1
|
||||
lwzx rLWR2, rLOC, rADDR2
|
||||
cmpwi cr7, rCHAR1, 0
|
||||
subf. r3, rLWR2, rLWR1
|
||||
bnelr
|
||||
bne cr7,L(loop)
|
||||
blr
|
||||
END (BP_SYM (__STRCMP))
|
||||
|
||||
weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP))
|
||||
libc_hidden_builtin_def (__STRCMP)
|
5
sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S
Normal file
5
sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S
Normal file
@ -0,0 +1,5 @@
|
||||
#define USE_IN_EXTENDED_LOCALE_MODEL
|
||||
#define STRCMP strcasecmp_l
|
||||
#define __STRCMP __strcasecmp_l
|
||||
|
||||
#include "strcasecmp.S"
|
@ -3,3 +3,8 @@ ifeq ($(subdir),elf)
|
||||
# optimization may require a TOC reference before relocations are resolved.
|
||||
CFLAGS-rtld.c += -mno-vsx
|
||||
endif
|
||||
|
||||
ifeq ($(subdir),string)
|
||||
CFLAGS-strncase.c += -funroll-loops
|
||||
CFLAGS-strncase_l.c += -funroll-loops
|
||||
endif
|
||||
|
125
sysdeps/powerpc/powerpc64/power7/strcasecmp.S
Normal file
125
sysdeps/powerpc/powerpc64/power7/strcasecmp.S
Normal file
@ -0,0 +1,125 @@
|
||||
/* Optimized strcasecmp implementation for PowerPC64.
|
||||
Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
#include <locale-defines.h>
|
||||
|
||||
/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] )
|
||||
|
||||
or if defined USE_IN_EXTENDED_LOCALE_MODEL:
|
||||
|
||||
int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4],
|
||||
__locale_t loc [r5]) */
|
||||
|
||||
#ifndef STRCMP
|
||||
# define __STRCMP __strcasecmp
|
||||
# define STRCMP strcasecmp
|
||||
#endif
|
||||
|
||||
ENTRY (BP_SYM (__STRCMP))
|
||||
CALL_MCOUNT 2
|
||||
|
||||
#define rRTN r3 /* Return value */
|
||||
#define rSTR1 r5 /* 1st string */
|
||||
#define rSTR2 r4 /* 2nd string */
|
||||
#define rLOCARG r5 /* 3rd argument: locale_t */
|
||||
#define rCHAR1 r6 /* Byte readed from 1st string */
|
||||
#define rCHAR2 r7 /* Byte readed from 2nd string */
|
||||
#define rADDR1 r8 /* Address of tolower(rCHAR1) */
|
||||
#define rADDR2 r12 /* Address of tolower(rCHAR2) */
|
||||
#define rLWR1 r8 /* Word tolower(rCHAR1) */
|
||||
#define rLWR2 r12 /* Word tolower(rCHAR2) */
|
||||
#define rTMP r9
|
||||
#define rLOC r11 /* Default locale address */
|
||||
|
||||
cmpd cr7, r3, r4
|
||||
#ifndef USE_IN_EXTENDED_LOCALE_MODEL
|
||||
ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
|
||||
add rLOC, rTMP, __libc_tsd_LOCALE@tls
|
||||
ld rLOC, 0(rLOC)
|
||||
#else
|
||||
mr rLOC, rLOCARG
|
||||
#endif
|
||||
ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
|
||||
mr rSTR1, rRTN
|
||||
li rRTN, 0
|
||||
beqlr cr7
|
||||
|
||||
|
||||
/* Unrolling loop for POWER: loads are done with 'lbz' plus
|
||||
offset and string descriptors are only updated in the end
|
||||
of loop unrolling. */
|
||||
|
||||
lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
|
||||
lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
|
||||
L(loop):
|
||||
cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
|
||||
sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
|
||||
sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
|
||||
lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
|
||||
lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
|
||||
cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
|
||||
crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
|
||||
beq cr1, L(done)
|
||||
lbz rCHAR1, 1(rSTR1)
|
||||
lbz rCHAR2, 1(rSTR2)
|
||||
cmpdi rCHAR1, 0
|
||||
sldi rADDR1, rCHAR1, 2
|
||||
sldi rADDR2, rCHAR2, 2
|
||||
lwzx rLWR1, rLOC, rADDR1
|
||||
lwzx rLWR2, rLOC, rADDR2
|
||||
cmpw cr1, rLWR1, rLWR2
|
||||
crorc 4*cr1+eq,eq,4*cr1+eq
|
||||
beq cr1, L(done)
|
||||
lbz rCHAR1, 2(rSTR1)
|
||||
lbz rCHAR2, 2(rSTR2)
|
||||
cmpdi rCHAR1, 0
|
||||
sldi rADDR1, rCHAR1, 2
|
||||
sldi rADDR2, rCHAR2, 2
|
||||
lwzx rLWR1, rLOC, rADDR1
|
||||
lwzx rLWR2, rLOC, rADDR2
|
||||
cmpw cr1, rLWR1, rLWR2
|
||||
crorc 4*cr1+eq,eq,4*cr1+eq
|
||||
beq cr1, L(done)
|
||||
lbz rCHAR1, 3(rSTR1)
|
||||
lbz rCHAR2, 3(rSTR2)
|
||||
cmpdi rCHAR1, 0
|
||||
/* Increment both string descriptors */
|
||||
addi rSTR1, rSTR1, 4
|
||||
addi rSTR2, rSTR2, 4
|
||||
sldi rADDR1, rCHAR1, 2
|
||||
sldi rADDR2, rCHAR2, 2
|
||||
lwzx rLWR1, rLOC, rADDR1
|
||||
lwzx rLWR2, rLOC, rADDR2
|
||||
cmpw cr1, rLWR1, rLWR2
|
||||
crorc 4*cr1+eq,eq,4*cr1+eq
|
||||
beq cr1,L(done)
|
||||
lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
|
||||
lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
|
||||
b L(loop)
|
||||
L(done):
|
||||
subf r0, rLWR2, rLWR1
|
||||
extsw rRTN, r0
|
||||
blr
|
||||
END (BP_SYM (__STRCMP))
|
||||
|
||||
weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP))
|
||||
libc_hidden_builtin_def (__STRCMP)
|
5
sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S
Normal file
5
sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S
Normal file
@ -0,0 +1,5 @@
|
||||
#define USE_IN_EXTENDED_LOCALE_MODEL
|
||||
#define STRCMP strcasecmp_l
|
||||
#define __STRCMP __strcasecmp_l
|
||||
|
||||
#include "strcasecmp.S"
|
Loading…
Reference in New Issue
Block a user