Enable optimized string routines for several PowerPC 4XX family processors.

This patch provides optimized string routines for the 4XX family of PowerPC
and Power Architecture processors.  For the 476 processor, it can be
configured for either hard-fp with -mcpu=476 --with-fp, or for soft-fp with
-mcpu=476 (since the default is soft-fp).
This commit is contained in:
Luis Machado 2011-01-12 12:59:09 -06:00 committed by Ryan Arnold
parent 9524dbd1f3
commit a72cc2b29d
18 changed files with 1181 additions and 0 deletions

View File

@ -1,3 +1,23 @@
2010-09-03 Luis Machado <luisgpm@br.ibm.com>
* sysdeps/powerpc/dl-procinfo.c: New file.
* sysdeps/powerpc/dl-procinfo.h: New file.
* sysdeps/powerpc/powerpc32/405/memcmp.S: New file.
* sysdeps/powerpc/powerpc32/405/memcpy.S: New file.
* sysdeps/powerpc/powerpc32/405/memset.S: New file.
* sysdeps/powerpc/powerpc32/405/strcmp.S: New file.
* sysdeps/powerpc/powerpc32/405/strcpy.S: New file.
* sysdeps/powerpc/powerpc32/405/strlen.S: New file.
* sysdeps/powerpc/powerpc32/405/strncmp.S: New file.
* sysdeps/powerpc/powerpc32/440/Implies: New file.
* sysdeps/powerpc/powerpc32/464/Implies: New file.
* sysdeps/powerpc/powerpc32/476/Implies: New file.
* sysdeps/powerpc/powerpc32/Makefile: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies: New file.
2010-04-14 Joseph Myers <joseph@codesourcery.com> 2010-04-14 Joseph Myers <joseph@codesourcery.com>
* libc-abis: Remove. * libc-abis: Remove.

View File

@ -0,0 +1,96 @@
/* Data for processor capability information. PowerPC version.
Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
/* This information must be kept in sync with the _DL_HWCAP_COUNT and
_DL_PLATFORM_COUNT definitions in procinfo.h.
If anything should be added here check whether the size of each string
is still ok with the given array size.
All the #ifdefs in the definitions are quite irritating but
necessary if we want to avoid duplicating the information. There
are three different modes:
- PROCINFO_DECL is defined. This means we are only interested in
declarations.
- PROCINFO_DECL is not defined:
+ if SHARED is defined the file is included in an array
initializer. The .element = { ... } syntax is needed.
+ if SHARED is not defined a normal array initialization is
needed.
*/
#ifndef PROCINFO_CLASS
# define PROCINFO_CLASS
#endif
#if !defined PROCINFO_DECL && defined SHARED
._dl_powerpc_cap_flags
#else
PROCINFO_CLASS const char _dl_powerpc_cap_flags[25][10]
#endif
#ifndef PROCINFO_DECL
= {
"vsx",
"arch_2_06", "power6x", "dfp", "pa6t",
"arch_2_05", "ic_snoop", "smt", "booke",
"cellbe", "power5+", "power5", "power4",
"notb", "efpdouble", "efpsingle", "spe",
"ucache", "4xxmac", "mmu", "fpu",
"altivec", "ppc601", "ppc64", "ppc32",
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
;
#else
,
#endif
#if !defined PROCINFO_DECL && defined SHARED
._dl_powerpc_platforms
#else
PROCINFO_CLASS const char _dl_powerpc_platforms[12][12]
#endif
#ifndef PROCINFO_DECL
= {
[PPC_PLATFORM_POWER4] = "power4",
[PPC_PLATFORM_PPC970] = "ppc970",
[PPC_PLATFORM_POWER5] = "power5",
[PPC_PLATFORM_POWER5_PLUS] = "power5+",
[PPC_PLATFORM_POWER6] = "power6",
[PPC_PLATFORM_CELL_BE] = "ppc-cell-be",
[PPC_PLATFORM_POWER6X] = "power6x",
[PPC_PLATFORM_POWER7] = "power7",
[PPC_PLATFORM_PPC405] = "ppc405",
[PPC_PLATFORM_PPC440] = "ppc440",
[PPC_PLATFORM_PPC464] = "ppc464",
[PPC_PLATFORM_PPC476] = "ppc476"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
;
#else
,
#endif
#undef PROCINFO_DECL
#undef PROCINFO_CLASS

View File

@ -0,0 +1,168 @@
/* Processor capability information handling macros. PowerPC version.
Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef _DL_PROCINFO_H
#define _DL_PROCINFO_H 1
#include <ldsodefs.h>
#include <sysdep.h> /* This defines the PPC_FEATURE_* macros. */
/* There are 25 bits used, but they are bits 7..31. */
#define _DL_HWCAP_FIRST 7
#define _DL_HWCAP_COUNT 32
/* These bits influence library search. */
#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
+ PPC_FEATURE_HAS_DFP)
#define _DL_PLATFORMS_COUNT 12
#define _DL_FIRST_PLATFORM 32
/* Mask to filter out platforms. */
#define _DL_HWCAP_PLATFORM (((1ULL << _DL_PLATFORMS_COUNT) - 1) \
<< _DL_FIRST_PLATFORM)
/* Platform bits (relative to _DL_FIRST_PLATFORM). */
#define PPC_PLATFORM_POWER4 0
#define PPC_PLATFORM_PPC970 1
#define PPC_PLATFORM_POWER5 2
#define PPC_PLATFORM_POWER5_PLUS 3
#define PPC_PLATFORM_POWER6 4
#define PPC_PLATFORM_CELL_BE 5
#define PPC_PLATFORM_POWER6X 6
#define PPC_PLATFORM_POWER7 7
#define PPC_PLATFORM_PPC405 8
#define PPC_PLATFORM_PPC440 9
#define PPC_PLATFORM_PPC464 10
#define PPC_PLATFORM_PPC476 11
static inline const char *
__attribute__ ((unused))
_dl_hwcap_string (int idx)
{
return GLRO(dl_powerpc_cap_flags)[idx - _DL_HWCAP_FIRST];
}
static inline const char *
__attribute__ ((unused))
_dl_platform_string (int idx)
{
return GLRO(dl_powerpc_platforms)[idx - _DL_FIRST_PLATFORM];
}
static inline int
__attribute__ ((unused))
_dl_string_hwcap (const char *str)
{
for (int i = _DL_HWCAP_FIRST; i < _DL_HWCAP_COUNT; ++i)
if (strcmp (str, _dl_hwcap_string (i)) == 0)
return i;
return -1;
}
static inline int
__attribute__ ((unused, always_inline))
_dl_string_platform (const char *str)
{
if (str == NULL)
return -1;
if (strncmp (str, GLRO(dl_powerpc_platforms)[PPC_PLATFORM_POWER4], 5) == 0)
{
int ret;
str += 5;
switch (*str)
{
case '4':
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER4;
break;
case '5':
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5;
if (str[1] == '+')
{
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5_PLUS;
++str;
}
break;
case '6':
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6;
if (str[1] == 'x')
{
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6X;
++str;
}
break;
case '7':
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER7;
break;
default:
return -1;
}
if (str[1] == '\0')
return ret;
}
else if (strncmp (str, GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC970],
3) == 0)
{
if (strcmp (str + 3, GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC970]
+ 3) == 0)
return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC970;
else if (strcmp (str + 3,
GLRO(dl_powerpc_platforms)[PPC_PLATFORM_CELL_BE] + 3)
== 0)
return _DL_FIRST_PLATFORM + PPC_PLATFORM_CELL_BE;
else if (strcmp (str + 3,
GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC405] + 3)
== 0)
return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC405;
else if (strcmp (str + 3,
GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC440] + 3)
== 0)
return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC440;
else if (strcmp (str + 3,
GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC464] + 3)
== 0)
return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC464;
else if (strcmp (str + 3,
GLRO(dl_powerpc_platforms)[PPC_PLATFORM_PPC476] + 3)
== 0)
return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC476;
}
return -1;
}
#ifdef IS_IN_rtld
static inline int
__attribute__ ((unused))
_dl_procinfo (int word)
{
_dl_printf ("AT_HWCAP: ");
for (int i = _DL_HWCAP_FIRST; i < _DL_HWCAP_COUNT; ++i)
if (word & (1 << i))
_dl_printf (" %s", _dl_hwcap_string (i));
_dl_printf ("\n");
return 0;
}
#endif
#endif /* dl-procinfo.h */

View File

@ -0,0 +1,131 @@
/* Optimized memcmp implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* memcmp
r3:source1 address, return equality
r4:source2 address
r5:byte count
Check 2 words from src1 and src2. If unequal jump to end and
return src1 > src2 or src1 < src2.
If count = zero check bytes before zero counter and then jump to end and
return src1 > src2, src1 < src2 or src1 = src2.
If src1 = src2 and no null, repeat. */
EALIGN (BP_SYM (memcmp), 5, 0)
srwi. r6,r5,5
beq L(preword2_count_loop)
mtctr r6
clrlwi r5,r5,27
L(word8_compare_loop):
lwz r10,0(r3)
lwz r6,4(r3)
lwz r8,0(r4)
lwz r9,4(r4)
cmplw cr5,r8,r10
cmplw cr1,r9,r6
bne cr5,L(st2)
bne cr1,L(st1)
lwz r10,8(r3)
lwz r6,12(r3)
lwz r8,8(r4)
lwz r9,12(r4)
cmplw cr5,r8,r10
cmplw cr1,r9,r6
bne cr5,L(st2)
bne cr1,L(st1)
lwz r10,16(r3)
lwz r6,20(r3)
lwz r8,16(r4)
lwz r9,20(r4)
cmplw cr5,r8,r10
cmplw cr1,r9,r6
bne cr5,L(st2)
bne cr1,L(st1)
lwz r10,24(r3)
lwz r6,28(r3)
addi r3,r3,0x20
lwz r8,24(r4)
lwz r9,28(r4)
addi r4,r4,0x20
cmplw cr5,r8,r10
cmplw cr1,r9,r6
bne cr5,L(st2)
bne cr1,L(st1)
bdnz L(word8_compare_loop)
L(preword2_count_loop):
srwi. r6,r5,3
beq L(prebyte_count_loop)
mtctr r6
clrlwi r5,r5,29
L(word2_count_loop):
lwz r10,0(r3)
lwz r6,4(r3)
addi r3,r3,0x08
lwz r8,0(r4)
lwz r9,4(r4)
addi r4,r4,0x08
cmplw cr5,r8,r10
cmplw cr1,r9,r6
bne cr5,L(st2)
bne cr1,L(st1)
bdnz L(word2_count_loop)
L(prebyte_count_loop):
addi r5,r5,1
mtctr r5
bdz L(end_memcmp)
L(byte_count_loop):
lbz r6,0(r3)
addi r3,r3,0x01
lbz r8,0(r4)
addi r4,r4,0x01
cmplw cr5,r8,r6
bne cr5,L(st2)
bdnz L(byte_count_loop)
L(end_memcmp):
addi r3,r0,0
blr
L(l_r):
addi r3,r0,1
blr
L(st1):
blt cr1,L(l_r)
addi r3,r0,-1
blr
L(st2):
blt cr5,L(l_r)
addi r3,r0,-1
blr
END (BP_SYM (memcmp))
libc_hidden_builtin_def (memcmp)
weak_alias (memcmp,bcmp)

View File

@ -0,0 +1,133 @@
/* Optimized memcpy implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* memcpy
r0:return address
r3:destination address
r4:source address
r5:byte count
Save return address in r0.
If destinationn and source are unaligned and copy count is greater than 256
then copy 0-3 bytes to make destination aligned.
If 32 or more bytes to copy we use 32 byte copy loop.
Finaly we copy 0-31 extra bytes. */
EALIGN (BP_SYM (memcpy), 5, 0)
/* Check if bytes to copy are greater than 256 and if
source and destination are unaligned */
cmpwi r5,0x0100
addi r0,r3,0
ble L(string_count_loop)
neg r6,r3
clrlwi. r6,r6,30
beq L(string_count_loop)
neg r6,r4
clrlwi. r6,r6,30
beq L(string_count_loop)
mtctr r6
subf r5,r6,r5
L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */
lbz r8,0x0(r4)
addi r4,r4,1
stb r8,0x0(r3)
addi r3,r3,1
bdnz L(unaligned_bytecopy_loop)
srwi. r7,r5,5
beq L(preword2_count_loop)
mtctr r7
L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */
lwz r6,0(r4)
lwz r7,4(r4)
lwz r8,8(r4)
lwz r9,12(r4)
subi r5,r5,0x20
stw r6,0(r3)
stw r7,4(r3)
stw r8,8(r3)
stw r9,12(r3)
lwz r6,16(r4)
lwz r7,20(r4)
lwz r8,24(r4)
lwz r9,28(r4)
addi r4,r4,0x20
stw r6,16(r3)
stw r7,20(r3)
stw r8,24(r3)
stw r9,28(r3)
addi r3,r3,0x20
bdnz L(word8_count_loop_no_dcbt)
L(preword2_count_loop): /* Copy remaining 0-31 bytes */
clrlwi. r12,r5,27
beq L(end_memcpy)
mtxer r12
lswx r5,0,r4
stswx r5,0,r3
mr r3,r0
blr
L(string_count_loop): /* Copy odd 0-31 bytes */
clrlwi. r12,r5,28
add r3,r3,r5
add r4,r4,r5
beq L(pre_string_copy)
mtxer r12
subf r4,r12,r4
subf r3,r12,r3
lswx r6,0,r4
stswx r6,0,r3
L(pre_string_copy): /* Check how many 32 byte chunck to copy */
srwi. r7,r5,4
beq L(end_memcpy)
mtctr r7
L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */
lwz r6,-4(r4)
lwz r7,-8(r4)
lwz r8,-12(r4)
lwzu r9,-16(r4)
stw r6,-4(r3)
stw r7,-8(r3)
stw r8,-12(r3)
stwu r9,-16(r3)
bdz L(end_memcpy)
lwz r6,-4(r4)
lwz r7,-8(r4)
lwz r8,-12(r4)
lwzu r9,-16(r4)
stw r6,-4(r3)
stw r7,-8(r3)
stw r8,-12(r3)
stwu r9,-16(r3)
bdnz L(word4_count_loop_no_dcbt)
L(end_memcpy):
mr r3,r0
blr
END (BP_SYM (memcpy))
libc_hidden_builtin_def (memcpy)

View File

@ -0,0 +1,155 @@
/* Optimized memset implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* memset
r3:destination address and return address
r4:source integer to copy
r5:byte count
r11:sources integer to copy in all 32 bits of reg
r12:temp return address
Save return address in r12
If destinationn is unaligned and count is greater tha 255 bytes
set 0-3 bytes to make destination aligned
If count is greater tha 255 bytes and setting zero to memory
use dbcz to set memeory when we can
otherwsie do the follwoing
If 16 or more words to set we use 16 word copy loop.
Finaly we set 0-15 extra bytes with string store. */
EALIGN (BP_SYM (memset), 5, 0)
rlwinm r11,r4,0,24,31
rlwimi r11,r4,8,16,23
rlwimi r11,r11,16,0,15
addi r12,r3,0
cmpwi r5,0x00FF
ble L(preword8_count_loop)
cmpwi r4,0x00
beq L(use_dcbz)
neg r6,r3
clrlwi. r6,r6,30
beq L(preword8_count_loop)
addi r8,0,1
mtctr r6
subi r3,r3,1
L(unaligned_bytecopy_loop):
stbu r11,0x1(r3)
subf. r5,r8,r5
beq L(end_memset)
bdnz L(unaligned_bytecopy_loop)
addi r3,r3,1
L(preword8_count_loop):
srwi. r6,r5,4
beq L(preword2_count_loop)
mtctr r6
addi r3,r3,-4
mr r8,r11
mr r9,r11
mr r10,r11
L(word8_count_loop_no_dcbt):
stwu r8,4(r3)
stwu r9,4(r3)
subi r5,r5,0x10
stwu r10,4(r3)
stwu r11,4(r3)
bdnz L(word8_count_loop_no_dcbt)
addi r3,r3,4
L(preword2_count_loop):
clrlwi. r7,r5,28
beq L(end_memset)
mr r8,r11
mr r9,r11
mr r10,r11
mtxer r7
stswx r8,0,r3
L(end_memset):
addi r3,r12,0
blr
L(use_dcbz):
neg r6,r3
clrlwi. r7,r6,28
beq L(skip_string_loop)
mr r8,r11
mr r9,r11
mr r10,r11
subf r5,r7,r5
mtxer r7
stswx r8,0,r3
add r3,r3,r7
L(skip_string_loop):
clrlwi r8,r6,25
srwi. r8,r8,4
beq L(dcbz_pre_loop)
mtctr r8
L(word_loop):
stw r11,0(r3)
subi r5,r5,0x10
stw r11,4(r3)
stw r11,8(r3)
stw r11,12(r3)
addi r3,r3,0x10
bdnz L(word_loop)
L(dcbz_pre_loop):
srwi r6,r5,7
mtctr r6
addi r7,0,0
L(dcbz_loop):
dcbz r3,r7
addi r3,r3,0x80
subi r5,r5,0x80
bdnz L(dcbz_loop)
srwi. r6,r5,4
beq L(postword2_count_loop)
mtctr r6
L(postword8_count_loop):
stw r11,0(r3)
subi r5,r5,0x10
stw r11,4(r3)
stw r11,8(r3)
stw r11,12(r3)
addi r3,r3,0x10
bdnz L(postword8_count_loop)
L(postword2_count_loop):
clrlwi. r7,r5,28
beq L(end_memset)
mr r8,r11
mr r9,r11
mr r10,r11
mtxer r7
stswx r8,0,r3
b L(end_memset)
END (BP_SYM (memset))
libc_hidden_builtin_def (memset)

View File

@ -0,0 +1,137 @@
/* Optimized strcmp implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* strcmp
Register Use
r0:temp return equality
r3:source1 address, return equality
r4:source2 address
Implementation description
Check 2 words from src1 and src2. If unequal jump to end and
return src1 > src2 or src1 < src2.
If null check bytes before null and then jump to end and
return src1 > src2, src1 < src2 or src1 = src2.
If src1 = src2 and no null, repeat. */
EALIGN (BP_SYM(strcmp),5,0)
neg r7,r3
clrlwi r7,r7,20
neg r8,r4
clrlwi r8,r8,20
srwi. r7,r7,5
beq L(byte_loop)
srwi. r8,r8,5
beq L(byte_loop)
cmplw r7,r8
mtctr r7
ble L(big_loop)
mtctr r8
L(big_loop):
lwz r5,0(r3)
lwz r6,4(r3)
lwz r8,0(r4)
lwz r9,4(r4)
dlmzb. r12,r5,r6
bne L(end_check)
cmplw r5,r8
bne L(st1)
cmplw r6,r9
bne L(st1)
lwz r5,8(r3)
lwz r6,12(r3)
lwz r8,8(r4)
lwz r9,12(r4)
dlmzb. r12,r5,r6
bne L(end_check)
cmplw r5,r8
bne L(st1)
cmplw r6,r9
bne L(st1)
lwz r5,16(r3)
lwz r6,20(r3)
lwz r8,16(r4)
lwz r9,20(r4)
dlmzb. r12,r5,r6
bne L(end_check)
cmplw r5,r8
bne L(st1)
cmplw r6,r9
bne L(st1)
lwz r5,24(r3)
lwz r6,28(r3)
addi r3,r3,0x20
lwz r8,24(r4)
lwz r9,28(r4)
addi r4,r4,0x20
dlmzb. r12,r5,r6
bne L(end_check)
cmplw r5,r8
bne L(st1)
cmplw r6,r9
bne L(st1)
bdnz L(big_loop)
b L(byte_loop)
L(end_check):
subfic r12,r12,4
blt L(end_check2)
rlwinm r12,r12,3,0,31
srw r5,r5,r12
srw r8,r8,r12
cmplw r5,r8
bne L(st1)
b L(end_strcmp)
L(end_check2):
addi r12,r12,4
cmplw r5,r8
rlwinm r12,r12,3,0,31
bne L(st1)
srw r6,r6,r12
srw r9,r9,r12
cmplw r6,r9
bne L(st1)
L(end_strcmp):
addi r3,r0,0
blr
L(st1):
mfcr r3
blr
L(byte_loop):
lbz r5,0(r3)
addi r3,r3,1
lbz r6,0(r4)
addi r4,r4,1
cmplw r5,r6
bne L(st1)
cmpwi r5,0
beq L(end_strcmp)
b L(byte_loop)
END (BP_SYM (strcmp))
libc_hidden_builtin_def (strcmp)

View File

@ -0,0 +1,110 @@
/* Optimized strcpy implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* strcpy
Register Use
r3:destination and return address
r4:source address
r10:temp destination address
Implementation description
Loop by checking 2 words at a time, with dlmzb. Check if there is a null
in the 2 words. If there is a null jump to end checking to determine
where in the last 8 bytes it is. Copy the appropriate bytes of the last
8 according to the null position. */
EALIGN (BP_SYM (strcpy), 5, 0)
neg r7,r4
subi r4,r4,1
clrlwi. r8,r7,29
subi r10,r3,1
beq L(pre_word8_loop)
mtctr r8
L(loop):
lbzu r5,0x01(r4)
cmpi cr5,r5,0x0
stbu r5,0x01(r10)
beq cr5,L(end_strcpy)
bdnz L(loop)
L(pre_word8_loop):
subi r4,r4,3
subi r10,r10,3
L(word8_loop):
lwzu r5,0x04(r4)
lwzu r6,0x04(r4)
dlmzb. r11,r5,r6
bne L(byte_copy)
stwu r5,0x04(r10)
stwu r6,0x04(r10)
lwzu r5,0x04(r4)
lwzu r6,0x04(r4)
dlmzb. r11,r5,r6
bne L(byte_copy)
stwu r5,0x04(r10)
stwu r6,0x04(r10)
lwzu r5,0x04(r4)
lwzu r6,0x04(r4)
dlmzb. r11,r5,r6
bne L(byte_copy)
stwu r5,0x04(r10)
stwu r6,0x04(r10)
lwzu r5,0x04(r4)
lwzu r6,0x04(r4)
dlmzb. r11,r5,r6
bne L(byte_copy)
stwu r5,0x04(r10)
stwu r6,0x04(r10)
b L(word8_loop)
L(last_bytes_copy):
stwu r5,0x04(r10)
subi r11,r11,4
mtctr r11
addi r10,r10,3
subi r4,r4,1
L(last_bytes_copy_loop):
lbzu r5,0x01(r4)
stbu r5,0x01(r10)
bdnz L(last_bytes_copy_loop)
blr
L(byte_copy):
blt L(last_bytes_copy)
mtctr r11
addi r10,r10,3
subi r4,r4,5
L(last_bytes_copy_loop2):
lbzu r5,0x01(r4)
stbu r5,0x01(r10)
bdnz L(last_bytes_copy_loop2)
L(end_strcpy):
blr
END (BP_SYM (strcpy))
libc_hidden_builtin_def (strcpy)

View File

@ -0,0 +1,78 @@
/* Optimized strlen implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* strlen
Register Use
r3:source address and return length of string
r4:byte counter
Implementation description
Load 2 words at a time and count bytes, if we find null we subtract one from
the count and return the count value. We need to subtract one because
we don't count the null character as a byte. */
EALIGN (BP_SYM (strlen),5,0)
neg r7,r3
clrlwi. r8,r7,29
addi r4,0,0
beq L(byte_count_loop)
mtctr r8
L(loop):
lbz r5,0(r3)
cmpi cr5,r5,0x0
addi r3,r3,0x1
addi r4,r4,0x1
beq cr5,L(end_strlen)
bdnz L(loop)
L(byte_count_loop):
lwz r5,0(r3)
lwz r6,4(r3)
dlmzb. r12,r5,r6
add r4,r4,r12
bne L(end_strlen)
lwz r5,8(r3)
lwz r6,12(r3)
dlmzb. r12,r5,r6
add r4,r4,r12
bne L(end_strlen)
lwz r5,16(r3)
lwz r6,20(r3)
dlmzb. r12,r5,r6
add r4,r4,r12
bne L(end_strlen)
lwz r5,24(r3)
lwz r6,28(r3)
addi r3,r3,0x20
dlmzb. r12,r5,r6
add r4,r4,r12
bne L(end_strlen)
b L(byte_count_loop)
L(end_strlen):
addi r3,r4,-1
blr
END (BP_SYM (strlen))
libc_hidden_builtin_def (strlen)

View File

@ -0,0 +1,131 @@
/* Optimized strncmp implementation for PowerPC476.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
02110-1301 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* strncmp
Register Use
r0:temp return equality
r3:source1 address, return equality
r4:source2 address
r5:byte count
Implementation description
Touch in 3 lines of D-cache.
If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned
Check 2 words from src1 and src2. If unequal jump to end and
return src1 > src2 or src1 < src2.
If null check bytes before null and then jump to end and
return src1 > src2, src1 < src2 or src1 = src2.
If count = zero check bytes before zero counter and then jump to end and
return src1 > src2, src1 < src2 or src1 = src2.
If src1 = src2 and no null, repeat. */
EALIGN (BP_SYM(strncmp),5,0)
neg r7,r3
clrlwi r7,r7,20
neg r8,r4
clrlwi r8,r8,20
srwi. r7,r7,3
beq L(prebyte_count_loop)
srwi. r8,r8,3
beq L(prebyte_count_loop)
cmplw r7,r8
mtctr r7
ble L(preword2_count_loop)
mtctr r8
L(preword2_count_loop):
srwi. r6,r5,3
beq L(prebyte_count_loop)
mfctr r7
cmplw r6,r7
bgt L(set_count_loop)
mtctr r6
clrlwi r5,r5,29
L(word2_count_loop):
lwz r10,0(r3)
lwz r6,4(r3)
addi r3,r3,0x08
lwz r8,0(r4)
lwz r9,4(r4)
addi r4,r4,0x08
dlmzb. r12,r10,r6
bne L(end_check)
cmplw r10,r8
bne L(st1)
cmplw r6,r9
bne L(st1)
bdnz L(word2_count_loop)
L(prebyte_count_loop):
addi r5,r5,1
mtctr r5
bdz L(end_strncmp)
L(byte_count_loop):
lbz r6,0(r3)
addi r3,r3,1
lbz r7,0(r4)
addi r4,r4,1
cmplw r6,r7
bne L(st1)
cmpwi r6,0
beq L(end_strncmp)
bdnz L(byte_count_loop)
b L(end_strncmp)
L(set_count_loop):
slwi r7,r7,3
subf r5,r7,r5
b L(word2_count_loop)
L(end_check):
subfic r12,r12,4
blt L(end_check2)
rlwinm r12,r12,3,0,31
srw r10,r10,r12
srw r8,r8,r12
cmplw r10,r8
bne L(st1)
b L(end_strncmp)
L(end_check2):
addi r12,r12,4
cmplw r10,r8
rlwinm r12,r12,3,0,31
bne L(st1)
srw r6,r6,r12
srw r9,r9,r12
cmplw r6,r9
bne L(st1)
L(end_strncmp):
addi r3,r0,0
blr
L(st1):
mfcr r3
blr
END (BP_SYM (strncmp))
libc_hidden_builtin_def (strncmp)

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/405/fpu
powerpc/powerpc32/405

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/440/fpu
powerpc/powerpc32/440

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/464/fpu
powerpc/powerpc32/464

View File

@ -0,0 +1,8 @@
# Some Powerpc32 variants assume soft-fp is the default even though there is
# an fp variant so provide -mhard-float if --with-fp is explicitly passed.
ifeq ($(with-fp),yes)
+cflags += -mhard-float
ASFLAGS += -mhard-float
sysdep-LDFLAGS += -mhard-float
endif

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/405/fpu
powerpc/powerpc32/405

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/440/fpu
powerpc/powerpc32/440

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/464/fpu
powerpc/powerpc32/464

View File

@ -0,0 +1,2 @@
powerpc/powerpc32/476/fpu
powerpc/powerpc32/476