mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 13:00:06 +00:00
powerpc: Various P7-optimized string functions
This commit is contained in:
parent
022f6b8920
commit
fe2f79db99
17
ChangeLog
17
ChangeLog
@ -1,3 +1,20 @@
|
||||
2010-08-19 Luis Machado <luisgpm@br.ibm.com>
|
||||
|
||||
* sysdeps/powerpc/powerpc32/power7/memchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/memrchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/rawmemchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/strchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/strchrnul.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/strlen.S: New file.
|
||||
* sysdeps/powerpc/powerpc32/power7/strnlen.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/memchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/memrchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/rawmemchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/strchr.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/strchrnul.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/strlen.S: New file.
|
||||
* sysdeps/powerpc/powerpc64/power7/strnlen.S: New file.
|
||||
|
||||
2010-07-26 Anton Blanchard <anton@samba.org>
|
||||
|
||||
* malloc/malloc.c (sYSTRIm): Replace divide and multiply with mask.
|
||||
|
170
sysdeps/powerpc/powerpc32/power7/memchr.S
Normal file
170
sysdeps/powerpc/powerpc32/power7/memchr.S
Normal file
@ -0,0 +1,170 @@
|
||||
/* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (__memchr))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
clrrwi r8,r3,2
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
add r7,r3,r5 /* Calculate the last acceptable address. */
|
||||
cmplwi r5,16
|
||||
ble L(small_range)
|
||||
|
||||
cmplw cr7,r3,r7 /* Is the address equal or less than r3? If
|
||||
it's equal or less, it means size is either 0
|
||||
or a negative number. */
|
||||
ble cr7,L(proceed)
|
||||
|
||||
li r7,-1 /* Make r11 the biggest if r4 <= 0. */
|
||||
L(proceed):
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE's in WORD1. */
|
||||
slw r10,r10,r6
|
||||
srw r10,r10,r6
|
||||
cmplwi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
bge cr6,L(null)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop_setup)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwzu r12,4(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmplwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
bge cr6,L(null)
|
||||
|
||||
L(loop_setup):
|
||||
sub r5,r7,r9
|
||||
srwi r6,r5,3 /* Number of loop iterations. */
|
||||
mtctr r6 /* Setup the counter. */
|
||||
b L(loop)
|
||||
/* Main loop to look for BYTE backwards in the string. Since
|
||||
it's a small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the byte-checking process for bigger strings. */
|
||||
|
||||
lwz r12,4(r8)
|
||||
lwzu r11,8(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpb r9,r11,r4
|
||||
or r5,r9,r10 /* Merge everything in one word. */
|
||||
cmplwi cr7,r5,0
|
||||
bne cr7,L(found)
|
||||
bdnz L(loop)
|
||||
/* We're here because the counter reached 0, and that means we
|
||||
didn't have any matches for BYTE in the whole range. Just
|
||||
return the original range. */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
blt cr6,L(loop_small)
|
||||
b L(null)
|
||||
|
||||
/* OK, one (or both) of the words contains BYTE. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains BYTE. */
|
||||
.align 4
|
||||
L(found):
|
||||
cmplwi cr6,r10,0
|
||||
addi r8,r8,-4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* BYTE must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r10,r9
|
||||
addi r8,r8,4
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as BYTE in the original
|
||||
word from the string. Use that to calculate the pointer.
|
||||
We need to make sure BYTE is *before* the end of the
|
||||
range. */
|
||||
L(done):
|
||||
cntlzw r0,r10 /* Count leading zeroes before the match. */
|
||||
srwi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r8,r0
|
||||
cmplw r3,r7
|
||||
bge L(null)
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(null):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* Deals with size <= 16. */
|
||||
.align 4
|
||||
L(small_range):
|
||||
cmplwi r5,0
|
||||
beq L(null)
|
||||
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
|
||||
slw r10,r10,r6
|
||||
srw r10,r10,r6
|
||||
cmplwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,4
|
||||
cmplw r9,r7
|
||||
bge L(null)
|
||||
b L(loop_small)
|
||||
|
||||
.p2align 5
|
||||
L(loop_small):
|
||||
lwzu r12,4(r8)
|
||||
cmpb r10,r12,r4
|
||||
addi r9,r8,4
|
||||
cmplwi cr6,r10,0
|
||||
bne cr6,L(done)
|
||||
cmplw r9,r7
|
||||
bge L(null)
|
||||
b L(loop_small)
|
||||
|
||||
END (BP_SYM (__memchr))
|
||||
weak_alias (BP_SYM (__memchr), BP_SYM(memchr))
|
||||
libc_hidden_builtin_def (memchr)
|
178
sysdeps/powerpc/powerpc32/power7/memrchr.S
Normal file
178
sysdeps/powerpc/powerpc32/power7/memrchr.S
Normal file
@ -0,0 +1,178 @@
|
||||
/* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (__memrchr))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
mr r7,r3
|
||||
add r3,r7,r5 /* Calculate the last acceptable address. */
|
||||
cmplw cr7,r3,r7 /* Is the address equal or less than r3? */
|
||||
|
||||
/* Replicate BYTE to word. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
bge cr7,L(proceed)
|
||||
|
||||
li r3,-1 /* Make r11 the biggest if r4 <= 0. */
|
||||
L(proceed):
|
||||
li r6,-4
|
||||
addi r9,r3,-1
|
||||
clrrwi r8,r9,2
|
||||
addi r8,r8,4
|
||||
neg r0,r3
|
||||
rlwinm r0,r0,3,27,28 /* Calculate padding. */
|
||||
|
||||
cmplwi r5,16
|
||||
ble L(small_range)
|
||||
|
||||
lwbrx r12,r8,r6 /* Load reversed word from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE in WORD1. */
|
||||
slw r10,r10,r0
|
||||
srw r10,r10,r0
|
||||
cmplwi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,-4
|
||||
cmplw cr6,r9,r7
|
||||
ble cr6,L(null)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
mr r8,r9
|
||||
bt 29,L(loop_setup)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwbrx r12,r8,r6
|
||||
cmpb r10,r12,r4
|
||||
cmplwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r8,r8,-4
|
||||
cmplw cr6,r8,r7
|
||||
ble cr6,L(null)
|
||||
|
||||
L(loop_setup):
|
||||
li r0,-8
|
||||
sub r5,r8,r7
|
||||
srwi r9,r5,3 /* Number of loop iterations. */
|
||||
mtctr r9 /* Setup the counter. */
|
||||
b L(loop)
|
||||
/* Main loop to look for BYTE backwards in the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the byte-checking process for bigger strings. */
|
||||
|
||||
lwbrx r12,r8,r6
|
||||
lwbrx r11,r8,r0
|
||||
addi r8,r8,-4
|
||||
cmpb r10,r12,r4
|
||||
cmpb r9,r11,r4
|
||||
or r5,r9,r10 /* Merge everything in one word. */
|
||||
cmplwi cr7,r5,0
|
||||
bne cr7,L(found)
|
||||
addi r8,r8,-4
|
||||
bdnz L(loop)
|
||||
/* We're here because the counter reached 0, and that means we
|
||||
didn't have any matches for BYTE in the whole range. Just return
|
||||
the original range. */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
bgt cr6,L(loop_small)
|
||||
b L(null)
|
||||
|
||||
/* OK, one (or both) of the words contains BYTE. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains BYTE. */
|
||||
.align 4
|
||||
L(found):
|
||||
cmplwi cr6,r10,0
|
||||
addi r8,r8,4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* BYTE must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r10,r9
|
||||
addi r8,r8,-4
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as BYTE in the original
|
||||
word from the string. Use that to calculate the pointer.
|
||||
We need to make sure BYTE is *before* the end of the
|
||||
range. */
|
||||
L(done):
|
||||
cntlzw r0,r10 /* Count leading zeroes before the match. */
|
||||
srwi r6,r0,3 /* Convert leading zeroes to bytes. */
|
||||
addi r0,r6,1
|
||||
sub r3,r8,r0
|
||||
cmplw r3,r7
|
||||
blt L(null)
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(null):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* Deals with size <= 16. */
|
||||
.align 4
|
||||
L(small_range):
|
||||
cmplwi r5,0
|
||||
beq L(null)
|
||||
|
||||
lwbrx r12,r8,r6 /* Load reversed word from memory. */
|
||||
cmpb r10,r12,r4 /* Check for null bytes in WORD1. */
|
||||
slw r10,r10,r0
|
||||
srw r10,r10,r0
|
||||
cmplwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
addi r8,r8,-4
|
||||
cmplw r8,r7
|
||||
ble L(null)
|
||||
b L(loop_small)
|
||||
|
||||
.p2align 5
|
||||
L(loop_small):
|
||||
lwbrx r12,r8,r6
|
||||
cmpb r10,r12,r4
|
||||
cmplwi cr6,r10,0
|
||||
bne cr6,L(done)
|
||||
addi r8,r8,-4
|
||||
cmplw r8,r7
|
||||
ble L(null)
|
||||
b L(loop_small)
|
||||
|
||||
END (BP_SYM (__memrchr))
|
||||
weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr))
|
||||
libc_hidden_builtin_def (memrchr)
|
102
sysdeps/powerpc/powerpc32/power7/rawmemchr.S
Normal file
102
sysdeps/powerpc/powerpc32/power7/rawmemchr.S
Normal file
@ -0,0 +1,102 @@
|
||||
/* Optimized rawmemchr implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] rawmemchr (void *s [r3], int c [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM(__rawmemchr))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
clrrwi r8,r3,2 /* Align the address to word boundary. */
|
||||
|
||||
/* Replicate byte to word. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
|
||||
/* Now r4 has a word of c bytes. */
|
||||
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
cmpb r5,r12,r4 /* Compare each byte against c byte. */
|
||||
slw r5,r5,r6 /* Move left to discard ignored bits. */
|
||||
srw r5,r5,r6 /* Bring the bits back as zeros. */
|
||||
cmpwi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwzu r12,4(r8)
|
||||
cmpb r5,r12,r4
|
||||
cmpwi cr7,r5,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
/* Main loop to look for the end of the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the byte-checking process for bigger strings. */
|
||||
lwz r12,4(r8)
|
||||
lwzu r11,8(r8)
|
||||
cmpb r5,r12,r4
|
||||
cmpb r6,r11,r4
|
||||
or r7,r5,r6
|
||||
cmpwi cr7,r7,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the words contains a 'c' byte. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains a c byte. */
|
||||
|
||||
cmpwi cr6,r5,0
|
||||
addi r8,r8,-4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The 'c' byte must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
mr r5,r6
|
||||
addi r8,r8,4
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the 'c' byte in the original
|
||||
word from the string. Use that fact to find out what is
|
||||
the position of the byte inside the string. */
|
||||
L(done):
|
||||
cntlzw r0,r5 /* Count leading zeros before the match. */
|
||||
srwi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r8,r0 /* Return address of the matching char. */
|
||||
blr
|
||||
END (BP_SYM (__rawmemchr))
|
||||
weak_alias (__rawmemchr,rawmemchr)
|
||||
libc_hidden_builtin_def (__rawmemchr)
|
203
sysdeps/powerpc/powerpc32/power7/strchr.S
Normal file
203
sysdeps/powerpc/powerpc32/power7/strchr.S
Normal file
@ -0,0 +1,203 @@
|
||||
/* Optimized strchr implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strchr (char *s [r3], int c [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM(strchr))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
clrrwi r8,r3,2 /* Align the address to word boundary. */
|
||||
cmpwi cr7,r4,0
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
li r0,0 /* Word with null chars to use
|
||||
with cmpb. */
|
||||
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
|
||||
beq cr7,L(null_match)
|
||||
|
||||
/* Replicate byte to word. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
|
||||
/* Now r4 has a word of c bytes and r0 has
|
||||
a word of null bytes. */
|
||||
|
||||
cmpb r10,r12,r4 /* Compare each byte against c byte. */
|
||||
cmpb r11,r12,r0 /* Compare each byte against null byte. */
|
||||
|
||||
/* Move the words left and right to discard the bits that are
|
||||
not part of the string and to bring them back as zeros. */
|
||||
|
||||
slw r10,r10,r6
|
||||
slw r11,r11,r6
|
||||
srw r10,r10,r6
|
||||
srw r11,r11,r6
|
||||
or r5,r10,r11 /* OR the results to speed things up. */
|
||||
cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes
|
||||
have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwzu r12,4(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpb r11,r12,r0
|
||||
or r5,r10,r11
|
||||
cmpwi cr7,r5,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
lwz r12,4(r8)
|
||||
lwzu r9,8(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpb r11,r12,r0
|
||||
cmpb r6,r9,r4
|
||||
cmpb r7,r9,r0
|
||||
or r12,r10,r11
|
||||
or r9,r6,r7
|
||||
or r5,r12,r9
|
||||
cmpwi cr7,r5,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the words contains a c/null byte. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains a c/null byte. */
|
||||
|
||||
cmpwi cr6,r12,0
|
||||
addi r8,r8,-4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The c/null byte must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r10,r6
|
||||
mr r11,r7
|
||||
addi r8,r8,4
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the c/null byte in the original
|
||||
word from the string. Use that to calculate the pointer. */
|
||||
L(done):
|
||||
cntlzw r4,r10 /* Count leading zeroes before c matches. */
|
||||
cntlzw r0,r11 /* Count leading zeroes before null matches. */
|
||||
cmplw cr7,r4,r0
|
||||
bgt cr7,L(no_match)
|
||||
srwi r0,r4,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r8,r0 /* Return address of the matching c byte
|
||||
or null in case c was not found. */
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(no_match):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* We are here because strchr was called with a null byte. */
|
||||
.align 4
|
||||
L(null_match):
|
||||
/* r0 has a word of null bytes. */
|
||||
|
||||
cmpb r5,r12,r0 /* Compare each byte against null bytes. */
|
||||
|
||||
/* Move the words left and right to discard the bits that are
|
||||
not part of the string and to bring them back as zeros. */
|
||||
|
||||
slw r5,r5,r6
|
||||
srw r5,r5,r6
|
||||
cmpwi cr7,r5,0 /* If r10 == 0, no c or null bytes
|
||||
have been found. */
|
||||
bne cr7,L(done_null)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop_null)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwzu r12,4(r8)
|
||||
cmpb r5,r12,r0
|
||||
cmpwi cr7,r5,0
|
||||
bne cr7,L(done_null)
|
||||
b L(loop_null) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
/* Main loop to look for the end of the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop_null):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
lwz r12,4(r8)
|
||||
lwzu r11,8(r8)
|
||||
cmpb r5,r12,r0
|
||||
cmpb r10,r11,r0
|
||||
or r6,r5,r10
|
||||
cmpwi cr7,r6,0
|
||||
beq cr7,L(loop_null)
|
||||
|
||||
/* OK, one (or both) of the words contains a null byte. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains a null byte. */
|
||||
|
||||
cmpwi cr6,r5,0
|
||||
addi r8,r8,-4
|
||||
bne cr6,L(done_null)
|
||||
|
||||
/* The null byte must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r5,r10
|
||||
addi r8,r8,4
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the null byte in the original
|
||||
word from the string. Use that to calculate the pointer. */
|
||||
L(done_null):
|
||||
cntlzw r0,r5 /* Count leading zeros before the match. */
|
||||
srwi r0,r0,3 /* Convert leading zeros to bytes. */
|
||||
add r3,r8,r0 /* Return address of the matching null byte. */
|
||||
blr
|
||||
END (BP_SYM (strchr))
|
||||
weak_alias (BP_SYM (strchr), BP_SYM (index))
|
||||
libc_hidden_builtin_def (strchr)
|
117
sysdeps/powerpc/powerpc32/power7/strchrnul.S
Normal file
117
sysdeps/powerpc/powerpc32/power7/strchrnul.S
Normal file
@ -0,0 +1,117 @@
|
||||
/* Optimized strchrnul implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strchrnul (char *s [r3], int c [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM(__strchrnul))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
clrrwi r8,r3,2 /* Align the address to word boundary. */
|
||||
|
||||
/* Replicate byte to word. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
li r0,0 /* Word with null chars to use
|
||||
with cmpb. */
|
||||
|
||||
/* Now r4 has a word of c bytes and r0 has
|
||||
a word of null bytes. */
|
||||
|
||||
cmpb r10,r12,r0 /* Compare each byte against c byte. */
|
||||
cmpb r9,r12,r4 /* Compare each byte against null byte. */
|
||||
|
||||
/* Move the words left and right to discard the bits that are
|
||||
not part of the string and bring them back as zeros. */
|
||||
slw r10,r10,r6
|
||||
slw r9,r9,r6
|
||||
srw r10,r10,r6
|
||||
srw r9,r9,r6
|
||||
or r5,r9,r10 /* OR the results to speed things up. */
|
||||
cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes
|
||||
have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwzu r12,4(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r12,r4
|
||||
or r5,r9,r10
|
||||
cmpwi cr7,r5,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
lwz r12,4(r8)
|
||||
lwzu r11,8(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r12,r4
|
||||
cmpb r6,r11,r0
|
||||
cmpb r7,r11,r4
|
||||
or r5,r9,r10
|
||||
or r10,r6,r7
|
||||
or r11,r5,r10
|
||||
cmpwi cr7,r11,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the words contains a c/null byte. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains a c/null byte. */
|
||||
|
||||
cmpwi cr6,r5,0
|
||||
addi r8,r8,-4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The c/null byte must be in the second word. Adjust the
|
||||
address again and move the result of cmpb to r10 so we can calculate
|
||||
the pointer. */
|
||||
mr r5,r10
|
||||
addi r8,r8,4
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the c/null byte in the original
|
||||
word from the string. Use that to calculate the pointer. */
|
||||
L(done):
|
||||
cntlzw r0,r5 /* Count leading zeros before the match. */
|
||||
srwi r0,r0,3 /* Convert leading zeros to bytes. */
|
||||
add r3,r8,r0 /* Return address of matching c/null byte. */
|
||||
blr
|
||||
END (BP_SYM (__strchrnul))
|
||||
weak_alias (__strchrnul,strchrnul)
|
||||
libc_hidden_builtin_def (__strchrnul)
|
98
sysdeps/powerpc/powerpc32/power7/strlen.S
Normal file
98
sysdeps/powerpc/powerpc32/power7/strlen.S
Normal file
@ -0,0 +1,98 @@
|
||||
/* Optimized strlen implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strlen (char *s [r3]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (strlen))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
clrrwi r4,r3,2 /* Align the address to word boundary. */
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
li r0,0 /* Word with null chars to use with cmpb. */
|
||||
li r5,-1 /* MASK = 0xffffffffffffffff. */
|
||||
lwz r12,0(r4) /* Load word from memory. */
|
||||
srw r5,r5,r6 /* MASK = MASK >> padding. */
|
||||
orc r9,r12,r5 /* Mask bits that are not part of the string. */
|
||||
cmpb r10,r9,r0 /* Check for null bytes in WORD1. */
|
||||
cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r4
|
||||
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
lwzu r12,4(r4)
|
||||
cmpb r10,r12,r0
|
||||
cmpwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
/* Main loop to look for the end of the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
|
||||
lwz r12, 4(r4)
|
||||
lwzu r11, 8(r4)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r11,r0
|
||||
or r8,r9,r10 /* Merge everything in one word. */
|
||||
cmpwi cr7,r8,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the words contains a null byte. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains a null byte. */
|
||||
|
||||
cmpwi cr6,r10,0
|
||||
addi r4,r4,-4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The null byte must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
length. */
|
||||
|
||||
mr r10,r9
|
||||
addi r4,r4,4
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the null byte in the original
|
||||
word from the string. Use that to calculate the length. */
|
||||
L(done):
|
||||
cntlzw r0,r10 /* Count leading zeroes before the match. */
|
||||
subf r5,r3,r4
|
||||
srwi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r5,r0 /* Compute final length. */
|
||||
blr
|
||||
END (BP_SYM (strlen))
|
||||
libc_hidden_builtin_def (strlen)
|
172
sysdeps/powerpc/powerpc32/power7/strnlen.S
Normal file
172
sysdeps/powerpc/powerpc32/power7/strnlen.S
Normal file
@ -0,0 +1,172 @@
|
||||
/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strnlen (char *s [r3], int size [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (__strnlen))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
clrrwi r8,r3,2 /* Align the address to word boundary. */
|
||||
add r7,r3,r4 /* Calculate the last acceptable address. */
|
||||
cmplwi r4,16
|
||||
li r0,0 /* Word with null chars. */
|
||||
ble L(small_range)
|
||||
|
||||
cmplw cr7,r3,r7 /* Is the address equal or less than r3? If
|
||||
it's equal or less, it means size is either 0
|
||||
or a negative number. */
|
||||
ble cr7,L(proceed)
|
||||
|
||||
li r7,-1 /* Make r11 the biggest if r4 <= 0. */
|
||||
L(proceed):
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
|
||||
slw r10,r10,r6
|
||||
srw r10,r10,r6
|
||||
cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
bge cr6,L(end_max)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 29,L(loop_setup)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
lwzu r12,4(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmplwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
bge cr6,L(end_max)
|
||||
|
||||
L(loop_setup):
|
||||
sub r5,r7,r9
|
||||
srwi r6,r5,3 /* Number of loop iterations. */
|
||||
mtctr r6 /* Setup the counter. */
|
||||
b L(loop)
|
||||
/* Main loop to look for the null byte backwards in the string. Since
|
||||
it's a small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two words, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
|
||||
lwz r12,4(r8)
|
||||
lwzu r11,8(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r11,r0
|
||||
or r5,r9,r10 /* Merge everything in one word. */
|
||||
cmplwi cr7,r5,0
|
||||
bne cr7,L(found)
|
||||
bdnz L(loop)
|
||||
/* We're here because the counter reached 0, and that means we
|
||||
didn't have any matches for null in the whole range. Just return
|
||||
the original size. */
|
||||
addi r9,r8,4
|
||||
cmplw cr6,r9,r7
|
||||
blt cr6,L(loop_small)
|
||||
|
||||
L(end_max):
|
||||
sub r3,r7,r3
|
||||
blr
|
||||
|
||||
/* OK, one (or both) of the words contains a null byte. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains a null byte. */
|
||||
.align 4
|
||||
L(found):
|
||||
cmplwi cr6,r10,0
|
||||
addi r8,r8,-4
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The null byte must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
length. */
|
||||
|
||||
mr r10,r9
|
||||
addi r8,r8,4
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the null byte in the original
|
||||
word from the string. Use that to calculate the length.
|
||||
We need to make sure the null char is *before* the end of the
|
||||
range. */
|
||||
L(done):
|
||||
cntlzw r0,r10 /* Count leading zeroes before the match. */
|
||||
srwi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r9,r8,r0
|
||||
sub r6,r9,r3 /* Length until the match. */
|
||||
cmplw r9,r7
|
||||
bgt L(end_max)
|
||||
mr r3,r6
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(zero):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* Deals with size <= 32. */
|
||||
.align 4
|
||||
L(small_range):
|
||||
cmplwi r4,0
|
||||
beq L(zero)
|
||||
|
||||
rlwinm r6,r3,3,27,28 /* Calculate padding. */
|
||||
lwz r12,0(r8) /* Load word from memory. */
|
||||
cmpb r10,r12,r0 /* Check for null bytes in WORD1. */
|
||||
slw r10,r10,r6
|
||||
srw r10,r10,r6
|
||||
cmplwi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
addi r9,r8,4
|
||||
cmplw r9,r7
|
||||
bge L(end_max)
|
||||
b L(loop_small)
|
||||
|
||||
.p2align 5
|
||||
L(loop_small):
|
||||
lwzu r12,4(r8)
|
||||
cmpb r10,r12,r0
|
||||
addi r9,r8,4
|
||||
cmplwi cr6,r10,0
|
||||
bne cr6,L(done)
|
||||
cmplw r9,r7
|
||||
bge L(end_max)
|
||||
b L(loop_small)
|
||||
END (BP_SYM (__strnlen))
|
||||
weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen))
|
||||
libc_hidden_builtin_def (strnlen)
|
171
sysdeps/powerpc/powerpc64/power7/memchr.S
Normal file
171
sysdeps/powerpc/powerpc64/power7/memchr.S
Normal file
@ -0,0 +1,171 @@
|
||||
/* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (__memchr))
|
||||
CALL_MCOUNT 2
|
||||
dcbt 0,r3
|
||||
clrrdi r8,r3,3
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
insrdi r4,r4,32,0
|
||||
add r7,r3,r5 /* Calculate the last acceptable address. */
|
||||
cmpldi r5,32
|
||||
ble L(small_range)
|
||||
|
||||
cmpld cr7,r3,r7 /* Is the address equal or less than r3? If
|
||||
it's equal or less, it means size is either 0
|
||||
or a negative number. */
|
||||
ble cr7,L(proceed)
|
||||
|
||||
li r7,-1 /* Make r11 the biggest if r4 <= 0. */
|
||||
L(proceed):
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
ld r12,0(r8) /* Load doubleword from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE's in DWORD1. */
|
||||
sld r10,r10,r6
|
||||
srd r10,r10,r6
|
||||
cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
bge cr6,L(null)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop_setup)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
ldu r12,8(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpldi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
bge cr6,L(null)
|
||||
|
||||
L(loop_setup):
|
||||
sub r5,r7,r9
|
||||
srdi r6,r5,4 /* Number of loop iterations. */
|
||||
mtctr r6 /* Setup the counter. */
|
||||
b L(loop)
|
||||
/* Main loop to look for BYTE backwards in the string. Since
|
||||
it's a small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the byte-checking process for bigger strings. */
|
||||
|
||||
ld r12,8(r8)
|
||||
ldu r11,16(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpb r9,r11,r4
|
||||
or r5,r9,r10 /* Merge everything in one doubleword. */
|
||||
cmpldi cr7,r5,0
|
||||
bne cr7,L(found)
|
||||
bdnz L(loop)
|
||||
/* We're here because the counter reached 0, and that means we
|
||||
didn't have any matches for BYTE in the whole range. Just return
|
||||
the original range. */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
blt cr6,L(loop_small)
|
||||
b L(null)
|
||||
|
||||
/* OK, one (or both) of the doublewords contains BYTE. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains BYTE. */
|
||||
.align 4
|
||||
L(found):
|
||||
cmpldi cr6,r10,0
|
||||
addi r8,r8,-8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* BYTE must be in the second doubleword. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r10,r9
|
||||
addi r8,r8,8
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as BYTE in the original
|
||||
doubleword from the string. Use that to calculate the pointer.
|
||||
We need to make sure BYTE is *before* the end of the
|
||||
range. */
|
||||
L(done):
|
||||
cntlzd r0,r10 /* Count leading zeroes before the match. */
|
||||
srdi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r8,r0
|
||||
cmpld r3,r7
|
||||
bge L(null)
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(null):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* Deals with size <= 32. */
|
||||
.align 4
|
||||
L(small_range):
|
||||
cmpldi r5,0
|
||||
beq L(null)
|
||||
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
ld r12,0(r8) /* Load word from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
|
||||
sld r10,r10,r6
|
||||
srd r10,r10,r6
|
||||
cmpldi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,8
|
||||
cmpld r9,r7
|
||||
bge L(null)
|
||||
b L(loop_small)
|
||||
|
||||
.p2align 5
|
||||
L(loop_small):
|
||||
ldu r12,8(r8)
|
||||
cmpb r10,r12,r4
|
||||
addi r9,r8,8
|
||||
cmpldi cr6,r10,0
|
||||
bne cr6,L(done)
|
||||
cmpld r9,r7
|
||||
bge L(null)
|
||||
b L(loop_small)
|
||||
|
||||
END (BP_SYM (__memchr))
|
||||
weak_alias (BP_SYM (__memchr), BP_SYM(memchr))
|
||||
libc_hidden_builtin_def (memchr)
|
180
sysdeps/powerpc/powerpc64/power7/memrchr.S
Normal file
180
sysdeps/powerpc/powerpc64/power7/memrchr.S
Normal file
@ -0,0 +1,180 @@
|
||||
/* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (__memrchr))
|
||||
CALL_MCOUNT
|
||||
dcbt 0,r3
|
||||
mr r7,r3
|
||||
add r3,r7,r5 /* Calculate the last acceptable address. */
|
||||
cmpld cr7,r3,r7 /* Is the address equal or less than r3? */
|
||||
|
||||
/* Replicate BYTE to doubleword. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
insrdi r4,r4,32,0
|
||||
bge cr7,L(proceed)
|
||||
|
||||
li r3,-1 /* Make r11 the biggest if r4 <= 0. */
|
||||
L(proceed):
|
||||
li r6,-8
|
||||
addi r9,r3,-1
|
||||
clrrdi r8,r9,3
|
||||
addi r8,r8,8
|
||||
neg r0,r3
|
||||
rlwinm r0,r0,3,26,28 /* Calculate padding. */
|
||||
|
||||
cmpldi r5,32
|
||||
ble L(small_range)
|
||||
|
||||
ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
|
||||
sld r10,r10,r0
|
||||
srd r10,r10,r0
|
||||
cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,-8
|
||||
cmpld cr6,r9,r7
|
||||
ble cr6,L(null)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
mr r8,r9
|
||||
bt 28,L(loop_setup)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
ldbrx r12,r8,r6
|
||||
cmpb r10,r12,r4
|
||||
cmpldi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already. */
|
||||
addi r8,r8,-8
|
||||
cmpld cr6,r8,r7
|
||||
ble cr6,L(null)
|
||||
|
||||
L(loop_setup):
|
||||
li r0,-16
|
||||
sub r5,r8,r7
|
||||
srdi r9,r5,4 /* Number of loop iterations. */
|
||||
mtctr r9 /* Setup the counter. */
|
||||
b L(loop)
|
||||
/* Main loop to look for BYTE backwards in the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the byte-checking process for bigger strings. */
|
||||
|
||||
ldbrx r12,r8,r6
|
||||
ldbrx r11,r8,r0
|
||||
addi r8,r8,-8
|
||||
cmpb r10,r12,r4
|
||||
cmpb r9,r11,r4
|
||||
or r5,r9,r10 /* Merge everything in one doubleword. */
|
||||
cmpldi cr7,r5,0
|
||||
bne cr7,L(found)
|
||||
addi r8,r8,-8
|
||||
bdnz L(loop)
|
||||
/* We're here because the counter reached 0, and that means we
|
||||
didn't have any matches for BYTE in the whole range. Just return
|
||||
the original range. */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
bgt cr6,L(loop_small)
|
||||
b L(null)
|
||||
|
||||
/* OK, one (or both) of the words contains BYTE. Check
|
||||
the first word and decrement the address in case the first
|
||||
word really contains BYTE. */
|
||||
.align 4
|
||||
L(found):
|
||||
cmpldi cr6,r10,0
|
||||
addi r8,r8,8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* BYTE must be in the second word. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r10,r9
|
||||
addi r8,r8,-8
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the BYTE in the original
|
||||
word from the string. Use that to calculate the pointer.
|
||||
We need to make sure BYTE is *before* the end of the
|
||||
range. */
|
||||
L(done):
|
||||
cntlzd r0,r10 /* Count leading zeroes before the match. */
|
||||
srdi r6,r0,3 /* Convert leading zeroes to bytes. */
|
||||
addi r0,r6,1
|
||||
sub r3,r8,r0
|
||||
cmpld r3,r7
|
||||
blt L(null)
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(null):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* Deals with size <= 32. */
|
||||
.align 4
|
||||
L(small_range):
|
||||
cmpldi r5,0
|
||||
beq L(null)
|
||||
|
||||
ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */
|
||||
cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
|
||||
sld r10,r10,r0
|
||||
srd r10,r10,r0
|
||||
cmpldi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r8,r8,-8
|
||||
cmpld r8,r7
|
||||
ble L(null)
|
||||
b L(loop_small)
|
||||
|
||||
.p2align 5
|
||||
L(loop_small):
|
||||
ldbrx r12,r8,r6
|
||||
cmpb r10,r12,r4
|
||||
cmpldi cr6,r10,0
|
||||
bne cr6,L(done)
|
||||
addi r8,r8,-8
|
||||
cmpld r8,r7
|
||||
ble L(null)
|
||||
b L(loop_small)
|
||||
|
||||
END (BP_SYM (__memrchr))
|
||||
weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr))
|
||||
libc_hidden_builtin_def (memrchr)
|
103
sysdeps/powerpc/powerpc64/power7/rawmemchr.S
Normal file
103
sysdeps/powerpc/powerpc64/power7/rawmemchr.S
Normal file
@ -0,0 +1,103 @@
|
||||
/* Optimized rawmemchr implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] rawmemchr (void *s [r3], int c [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM(__rawmemchr))
|
||||
CALL_MCOUNT 2
|
||||
dcbt 0,r3
|
||||
clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
|
||||
|
||||
/* Replicate byte to doubleword. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
insrdi r4,r4,32,0
|
||||
|
||||
/* Now r4 has a doubleword of c bytes. */
|
||||
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
ld r12,0(r8) /* Load doubleword from memory. */
|
||||
cmpb r5,r12,r4 /* Compare each byte against c byte. */
|
||||
sld r5,r5,r6 /* Move left to discard ignored bits. */
|
||||
srd r5,r5,r6 /* Bring the bits back as zeros. */
|
||||
cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
ldu r12,8(r8)
|
||||
cmpb r5,r12,r4
|
||||
cmpdi cr7,r5,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
/* Main loop to look for the end of the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the byte-checking process for bigger strings. */
|
||||
ld r12,8(r8)
|
||||
ldu r11,16(r8)
|
||||
cmpb r5,r12,r4
|
||||
cmpb r6,r11,r4
|
||||
or r7,r5,r6
|
||||
cmpdi cr7,r7,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the doublewords contains a 'c' byte. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains a c byte. */
|
||||
|
||||
cmpdi cr6,r5,0
|
||||
addi r8,r8,-8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The 'c' byte must be in the second doubleword. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
mr r5,r6
|
||||
addi r8,r8,8
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the 'c' byte in the original
|
||||
doubleword from the string. Use that fact to find out what is
|
||||
the position of the byte inside the string. */
|
||||
L(done):
|
||||
cntlzd r0,r5 /* Count leading zeros before the match. */
|
||||
srdi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r8,r0 /* Return address of the matching char. */
|
||||
blr
|
||||
END (BP_SYM (__rawmemchr))
|
||||
weak_alias (__rawmemchr,rawmemchr)
|
||||
libc_hidden_builtin_def (__rawmemchr)
|
204
sysdeps/powerpc/powerpc64/power7/strchr.S
Normal file
204
sysdeps/powerpc/powerpc64/power7/strchr.S
Normal file
@ -0,0 +1,204 @@
|
||||
/* Optimized strchr implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strchr (char *s [r3], int c [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM(strchr))
|
||||
CALL_MCOUNT 2
|
||||
dcbt 0,r3
|
||||
clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
|
||||
cmpdi cr7,r4,0
|
||||
ld r12,0(r8) /* Load doubleword from memory. */
|
||||
li r0,0 /* Doubleword with null chars to use
|
||||
with cmpb. */
|
||||
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
|
||||
beq cr7,L(null_match)
|
||||
|
||||
/* Replicate byte to doubleword. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
insrdi r4,r4,32,0
|
||||
|
||||
/* Now r4 has a doubleword of c bytes and r0 has
|
||||
a doubleword of null bytes. */
|
||||
|
||||
cmpb r10,r12,r4 /* Compare each byte against c byte. */
|
||||
cmpb r11,r12,r0 /* Compare each byte against null byte. */
|
||||
|
||||
/* Move the doublewords left and right to discard the bits that are
|
||||
not part of the string and bring them back as zeros. */
|
||||
|
||||
sld r10,r10,r6
|
||||
sld r11,r11,r6
|
||||
srd r10,r10,r6
|
||||
srd r11,r11,r6
|
||||
or r5,r10,r11 /* OR the results to speed things up. */
|
||||
cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
|
||||
have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a doubleword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
ldu r12,8(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpb r11,r12,r0
|
||||
or r5,r10,r11
|
||||
cmpdi cr7,r5,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
ld r12,8(r8)
|
||||
ldu r9,16(r8)
|
||||
cmpb r10,r12,r4
|
||||
cmpb r11,r12,r0
|
||||
cmpb r6,r9,r4
|
||||
cmpb r7,r9,r0
|
||||
or r12,r10,r11
|
||||
or r9,r6,r7
|
||||
or r5,r12,r9
|
||||
cmpdi cr7,r5,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the doublewords contains a c/null byte. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains a c/null byte. */
|
||||
|
||||
cmpdi cr6,r12,0
|
||||
addi r8,r8,-8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The c/null byte must be in the second doubleword. Adjust the
|
||||
address again and move the result of cmpb to r10 so we can calculate
|
||||
the pointer. */
|
||||
|
||||
mr r10,r6
|
||||
mr r11,r7
|
||||
addi r8,r8,8
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the c/null byte in the original
|
||||
doubleword from the string. Use that to calculate the pointer. */
|
||||
L(done):
|
||||
cntlzd r4,r10 /* Count leading zeroes before c matches. */
|
||||
cntlzd r0,r11 /* Count leading zeroes before null matches. */
|
||||
cmpld cr7,r4,r0
|
||||
bgt cr7,L(no_match)
|
||||
srdi r0,r4,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r8,r0 /* Return address of the matching c byte
|
||||
or null in case c was not found. */
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(no_match):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* We are here because strchr was called with a null byte. */
|
||||
.align 4
|
||||
L(null_match):
|
||||
/* r0 has a doubleword of null bytes. */
|
||||
|
||||
cmpb r5,r12,r0 /* Compare each byte against null bytes. */
|
||||
|
||||
/* Move the doublewords left and right to discard the bits that are
|
||||
not part of the string and bring them back as zeros. */
|
||||
|
||||
sld r5,r5,r6
|
||||
srd r5,r5,r6
|
||||
cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes
|
||||
have been found. */
|
||||
bne cr7,L(done_null)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop_null)
|
||||
|
||||
/* Handle WORD2 of pair. */
|
||||
ldu r12,8(r8)
|
||||
cmpb r5,r12,r0
|
||||
cmpdi cr7,r5,0
|
||||
bne cr7,L(done_null)
|
||||
b L(loop_null) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
/* Main loop to look for the end of the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop_null):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
ld r12,8(r8)
|
||||
ldu r11,16(r8)
|
||||
cmpb r5,r12,r0
|
||||
cmpb r10,r11,r0
|
||||
or r6,r5,r10
|
||||
cmpdi cr7,r6,0
|
||||
beq cr7,L(loop_null)
|
||||
|
||||
/* OK, one (or both) of the doublewords contains a null byte. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains a null byte. */
|
||||
|
||||
cmpdi cr6,r5,0
|
||||
addi r8,r8,-8
|
||||
bne cr6,L(done_null)
|
||||
|
||||
/* The null byte must be in the second doubleword. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
pointer. */
|
||||
|
||||
mr r5,r10
|
||||
addi r8,r8,8
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the null byte in the original
|
||||
doubleword from the string. Use that to calculate the pointer. */
|
||||
L(done_null):
|
||||
cntlzd r0,r5 /* Count leading zeros before the match. */
|
||||
srdi r0,r0,3 /* Convert leading zeros to bytes. */
|
||||
add r3,r8,r0 /* Return address of the matching null byte. */
|
||||
blr
|
||||
END (BP_SYM (strchr))
|
||||
weak_alias (BP_SYM (strchr), BP_SYM (index))
|
||||
libc_hidden_builtin_def (strchr)
|
118
sysdeps/powerpc/powerpc64/power7/strchrnul.S
Normal file
118
sysdeps/powerpc/powerpc64/power7/strchrnul.S
Normal file
@ -0,0 +1,118 @@
|
||||
/* Optimized strchrnul implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strchrnul (char *s [r3], int c [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM(__strchrnul))
|
||||
CALL_MCOUNT 2
|
||||
dcbt 0,r3
|
||||
clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
|
||||
|
||||
/* Replicate byte to doubleword. */
|
||||
rlwimi r4,r4,8,16,23
|
||||
rlwimi r4,r4,16,0,15
|
||||
insrdi r4,r4,32,0
|
||||
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
ld r12,0(r8) /* Load doubleword from memory. */
|
||||
li r0,0 /* Doubleword with null chars to use
|
||||
with cmpb. */
|
||||
|
||||
/* Now r4 has a doubleword of c bytes and r0 has
|
||||
a doubleword of null bytes. */
|
||||
|
||||
cmpb r10,r12,r0 /* Compare each byte against c byte. */
|
||||
cmpb r9,r12,r4 /* Compare each byte against null byte. */
|
||||
|
||||
/* Move the doublewords left and right to discard the bits that are
|
||||
not part of the string and to bring them back as zeros. */
|
||||
sld r10,r10,r6
|
||||
sld r9,r9,r6
|
||||
srd r10,r10,r6
|
||||
srd r9,r9,r6
|
||||
or r5,r9,r10 /* OR the results to speed things up. */
|
||||
cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
|
||||
have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
ldu r12,8(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r12,r4
|
||||
or r5,r9,r10
|
||||
cmpdi cr7,r5,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
ld r12,8(r8)
|
||||
ldu r11,16(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r12,r4
|
||||
cmpb r6,r11,r0
|
||||
cmpb r7,r11,r4
|
||||
or r5,r9,r10
|
||||
or r10,r6,r7
|
||||
or r11,r5,r10
|
||||
cmpdi cr7,r11,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the doublewords contains a c/null byte. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains a c/null byte. */
|
||||
|
||||
cmpdi cr6,r5,0
|
||||
addi r8,r8,-8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The c/null byte must be in the second doubleword. Adjust the
|
||||
address again and move the result of cmpb to r10 so we can calculate
|
||||
the pointer. */
|
||||
mr r5,r10
|
||||
addi r8,r8,8
|
||||
|
||||
/* r5 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the c/null byte in the original
|
||||
doubleword from the string. Use that to calculate the pointer. */
|
||||
L(done):
|
||||
cntlzd r0,r5 /* Count leading zeros before the match. */
|
||||
srdi r0,r0,3 /* Convert leading zeros to bytes. */
|
||||
add r3,r8,r0 /* Return address of matching c/null byte. */
|
||||
blr
|
||||
END (BP_SYM (__strchrnul))
|
||||
weak_alias (__strchrnul,strchrnul)
|
||||
libc_hidden_builtin_def (__strchrnul)
|
99
sysdeps/powerpc/powerpc64/power7/strlen.S
Normal file
99
sysdeps/powerpc/powerpc64/power7/strlen.S
Normal file
@ -0,0 +1,99 @@
|
||||
/* Optimized strlen implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strlen (char *s [r3]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (strlen))
|
||||
CALL_MCOUNT 1
|
||||
dcbt 0,r3
|
||||
clrrdi r4,r3,3 /* Align the address to doubleword boundary. */
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
li r0,0 /* Doubleword with null chars to use
|
||||
with cmpb. */
|
||||
li r5,-1 /* MASK = 0xffffffffffffffff. */
|
||||
ld r12,0(r4) /* Load doubleword from memory. */
|
||||
srd r5,r5,r6 /* MASK = MASK >> padding. */
|
||||
orc r9,r12,r5 /* Mask bits that are not part of the string. */
|
||||
cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */
|
||||
cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
mtcrf 0x01,r4
|
||||
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
ldu r12,8(r4)
|
||||
cmpb r10,r12,r0
|
||||
cmpdi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
b L(loop) /* We branch here (rather than falling through)
|
||||
to skip the nops due to heavy alignment
|
||||
of the loop below. */
|
||||
|
||||
/* Main loop to look for the end of the string. Since it's a
|
||||
small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
|
||||
ld r12, 8(r4)
|
||||
ldu r11, 16(r4)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r11,r0
|
||||
or r8,r9,r10 /* Merge everything in one doubleword. */
|
||||
cmpdi cr7,r8,0
|
||||
beq cr7,L(loop)
|
||||
|
||||
/* OK, one (or both) of the doublewords contains a null byte. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains a null byte. */
|
||||
|
||||
cmpdi cr6,r10,0
|
||||
addi r4,r4,-8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The null byte must be in the second doubleword. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
length. */
|
||||
|
||||
mr r10,r9
|
||||
addi r4,r4,8
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the null byte in the original
|
||||
doubleword from the string. Use that to calculate the length. */
|
||||
L(done):
|
||||
cntlzd r0,r10 /* Count leading zeroes before the match. */
|
||||
subf r5,r3,r4
|
||||
srdi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r3,r5,r0 /* Compute final length. */
|
||||
blr
|
||||
END (BP_SYM (strlen))
|
||||
libc_hidden_builtin_def (strlen)
|
173
sysdeps/powerpc/powerpc64/power7/strnlen.S
Normal file
173
sysdeps/powerpc/powerpc64/power7/strnlen.S
Normal file
@ -0,0 +1,173 @@
|
||||
/* Optimized strnlen implementation for PowerPC64/POWER7 using cmpb insn.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Luis Machado <luisgpm@br.ibm.com>.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
|
||||
02110-1301 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* int [r3] strnlen (char *s [r3], int size [r4]) */
|
||||
.machine power7
|
||||
ENTRY (BP_SYM (__strnlen))
|
||||
CALL_MCOUNT 2
|
||||
dcbt 0,r3
|
||||
clrrdi r8,r3,3
|
||||
add r7,r3,r4 /* Calculate the last acceptable address. */
|
||||
cmpldi r4,32
|
||||
li r0,0 /* Doubleword with null chars. */
|
||||
/* If we have less than 33 bytes to search, skip to a faster code. */
|
||||
ble L(small_range)
|
||||
|
||||
cmpld cr7,r3,r7 /* Is the address equal or less than r3? If
|
||||
it's equal or less, it means size is either 0
|
||||
or a negative number. */
|
||||
ble cr7,L(proceed)
|
||||
|
||||
li r7,-1 /* Make r11 the biggest if r4 <= 0. */
|
||||
L(proceed):
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
ld r12,0(r8) /* Load doubleword from memory. */
|
||||
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
|
||||
sld r10,r10,r6
|
||||
srd r10,r10,r6
|
||||
cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
bge cr6,L(end_max)
|
||||
|
||||
mtcrf 0x01,r8
|
||||
/* Are we now aligned to a quadword boundary? If so, skip to
|
||||
the main loop. Otherwise, go through the alignment code. */
|
||||
|
||||
bt 28,L(loop_setup)
|
||||
|
||||
/* Handle DWORD2 of pair. */
|
||||
ldu r12,8(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpldi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
/* Are we done already? */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
bge cr6,L(end_max)
|
||||
|
||||
L(loop_setup):
|
||||
sub r5,r7,r9
|
||||
srdi r6,r5,4 /* Number of loop iterations. */
|
||||
mtctr r6 /* Setup the counter. */
|
||||
b L(loop)
|
||||
/* Main loop to look for the null byte backwards in the string. Since
|
||||
it's a small loop (< 8 instructions), align it to 32-bytes. */
|
||||
.p2align 5
|
||||
L(loop):
|
||||
/* Load two doublewords, compare and merge in a
|
||||
single register for speed. This is an attempt
|
||||
to speed up the null-checking process for bigger strings. */
|
||||
|
||||
ld r12,8(r8)
|
||||
ldu r11,16(r8)
|
||||
cmpb r10,r12,r0
|
||||
cmpb r9,r11,r0
|
||||
or r5,r9,r10 /* Merge everything in one doubleword. */
|
||||
cmpldi cr7,r5,0
|
||||
bne cr7,L(found)
|
||||
bdnz L(loop)
|
||||
/* We're here because the counter reached 0, and that means we
|
||||
didn't have any matches for null in the whole range. Just return
|
||||
the original size. */
|
||||
addi r9,r8,8
|
||||
cmpld cr6,r9,r7
|
||||
blt cr6,L(loop_small)
|
||||
|
||||
L(end_max):
|
||||
sub r3,r7,r3
|
||||
blr
|
||||
|
||||
/* OK, one (or both) of the doublewords contains a null byte. Check
|
||||
the first doubleword and decrement the address in case the first
|
||||
doubleword really contains a null byte. */
|
||||
.align 4
|
||||
L(found):
|
||||
cmpldi cr6,r10,0
|
||||
addi r8,r8,-8
|
||||
bne cr6,L(done)
|
||||
|
||||
/* The null byte must be in the second doubleword. Adjust the address
|
||||
again and move the result of cmpb to r10 so we can calculate the
|
||||
length. */
|
||||
|
||||
mr r10,r9
|
||||
addi r8,r8,8
|
||||
|
||||
/* r10 has the output of the cmpb instruction, that is, it contains
|
||||
0xff in the same position as the null byte in the original
|
||||
doubleword from the string. Use that to calculate the length.
|
||||
We need to make sure the null char is *before* the start of the
|
||||
range (since we're going backwards). */
|
||||
L(done):
|
||||
cntlzd r0,r10 /* Count leading zeroes before the match. */
|
||||
srdi r0,r0,3 /* Convert leading zeroes to bytes. */
|
||||
add r9,r8,r0
|
||||
sub r6,r9,r3 /* Length until the match. */
|
||||
cmpld r9,r7
|
||||
bgt L(end_max)
|
||||
mr r3,r6
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(zero):
|
||||
li r3,0
|
||||
blr
|
||||
|
||||
/* Deals with size <= 32. */
|
||||
.align 4
|
||||
L(small_range):
|
||||
cmpldi r4,0
|
||||
beq L(zero)
|
||||
|
||||
rlwinm r6,r3,3,26,28 /* Calculate padding. */
|
||||
ld r12,0(r8) /* Load word from memory. */
|
||||
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
|
||||
sld r10,r10,r6
|
||||
srd r10,r10,r6
|
||||
cmpldi cr7,r10,0
|
||||
bne cr7,L(done)
|
||||
|
||||
addi r9,r8,8
|
||||
cmpld r9,r7
|
||||
bge L(end_max)
|
||||
b L(loop_small)
|
||||
|
||||
.p2align 5
|
||||
L(loop_small):
|
||||
ldu r12,8(r8)
|
||||
cmpb r10,r12,r0
|
||||
addi r9,r8,8
|
||||
cmpldi cr6,r10,0
|
||||
bne cr6,L(done)
|
||||
cmpld r9,r7
|
||||
bge L(end_max)
|
||||
b L(loop_small)
|
||||
END (BP_SYM (__strnlen))
|
||||
weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen))
|
||||
libc_hidden_builtin_def (strnlen)
|
Loading…
Reference in New Issue
Block a user