Fix spelling errors in sysdeps/powerpc files.

This commit is contained in:
Anton Blanchard 2013-01-07 11:20:53 -06:00 committed by Ryan S. Arnold
parent 375607b9cc
commit 2ccdea26f2
43 changed files with 167 additions and 121 deletions

View File

@ -1,3 +1,49 @@
2013-01-07 Anton Blanchard <anton@samba.org>
* sysdeps/powerpc/fpu/feholdexcpt.c: Fixed spelling errors.
* sysdeps/powerpc/fpu/feupdateenv.c: Likewise.
* sysdeps/powerpc/fpu/math_ldbl.h: Likewise.
* sysdeps/powerpc/powerpc32/bits/atomic.h: Likewise.
* sysdeps/powerpc/powerpc32/cell/memcpy.S: Likewise.
* sysdeps/powerpc/powerpc32/dl-machine.c: Likewise.
* sysdeps/powerpc/powerpc32/dl-start.S: Likewise.
* sysdeps/powerpc/powerpc32/memset.S: Likewise.
* sysdeps/powerpc/powerpc32/power4/fpu/mpa.c: Likewise.
* sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c: Likewise.
* sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S: Likewise.
* sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S: Likewise.
* sysdeps/powerpc/powerpc32/power4/hp-timing.h: Likewise.
* sysdeps/powerpc/powerpc32/power4/memcmp.S: Likewise.
* sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise.
* sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S: Likewise.
* sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S: Likewise.
* sysdeps/powerpc/powerpc32/power6/memcpy.S: Likewise.
* sysdeps/powerpc/powerpc32/power7/memchr.S: Likewise.
* sysdeps/powerpc/powerpc32/power7/memcmp.S: Likewise.
* sysdeps/powerpc/powerpc32/power7/memrchr.S: Likewise.
* sysdeps/powerpc/powerpc32/power7/strcasecmp.S: Likewise.
* sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise.
* sysdeps/powerpc/powerpc32/strncmp.S: Likewise.
* sysdeps/powerpc/powerpc64/bits/atomic.h: Likewise.
* sysdeps/powerpc/powerpc64/cell/memcpy.S: Likewise.
* sysdeps/powerpc/powerpc64/dl-machine.h: Likewise.
* sysdeps/powerpc/powerpc64/fpu/s_ceill.S: Likewise.
* sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S: Likewise.
* sysdeps/powerpc/powerpc64/hp-timing.h: Likewise.
* sysdeps/powerpc/powerpc64/memcpy.S: Likewise.
* sysdeps/powerpc/powerpc64/power4/fpu/mpa.c: Likewise.
* sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c: Likewise.
* sysdeps/powerpc/powerpc64/power4/memcmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power4/memcpy.S: Likewise.
* sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power6/memcpy.S: Likewise.
* sysdeps/powerpc/powerpc64/power7/memchr.S: Likewise.
* sysdeps/powerpc/powerpc64/power7/memcmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power7/memrchr.S: Likewise.
* sysdeps/powerpc/powerpc64/power7/strcasecmp.S: Likewise.
* sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise.
* sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
2013-01-07 Joseph Myers <joseph@codesourcery.com>
* malloc/malloc.h (__MALLOC_P): Remove all definitions.

View File

@ -33,7 +33,7 @@ feholdexcept (fenv_t *envp)
new.l[1] = old.l[1] & 7;
new.l[0] = old.l[0];
/* If the old env had any eabled exceptions, then mask SIGFPE in the
/* If the old env had any enabled exceptions, then mask SIGFPE in the
MSR FE0/FE1 bits. This may allow the FPU to run faster because it
always takes the default action and can not generate SIGFPE. */
if ((old.l[1] & _FPU_MASK_ALL) != 0)

View File

@ -37,14 +37,14 @@ __feupdateenv (const fenv_t *envp)
unchanged. */
new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF);
/* If the old env has no eabled exceptions and the new env has any enabled
/* If the old env has no enabled exceptions and the new env has any enabled
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put
the hardware into "precise mode" and may cause the FPU to run slower on
some hardware. */
if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
(void)__fe_nomask_env ();
/* If the old env had any eabled exceptions and the new env has no enabled
/* If the old env had any enabled exceptions and the new env has no enabled
exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
FPU to run faster because it always takes the default action and can not
generate SIGFPE. */

View File

@ -27,7 +27,7 @@ ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x)
lo |= (1ULL << 52);
lo = lo << 7; /* pre-shift lo to match ieee854. */
/* The lower double is normalized separately from the upper. We
may need to adjust the lower manitissa to reflect this. */
may need to adjust the lower mantissa to reflect this. */
ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
if (ediff > 53)
lo = lo >> (ediff-53);

View File

@ -21,7 +21,7 @@
This is a hint to the hardware to expect additional updates adjacent
to the lock word or not. If we are acquiring a Mutex, the hint
should be true. Otherwise we releasing a Mutex or doing a simple
atomic operation. In that case we don't expect addtional updates
atomic operation. In that case we don't expect additional updates
adjacent to the lock word after the Store Conditional and the hint
should be false. */
@ -35,7 +35,7 @@
/*
* The 32-bit exchange_bool is different on powerpc64 because the subf
* does signed 64-bit arthmatic while the lwarx is 32-bit unsigned
* does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
* (a load word and zero (high 32) form). So powerpc64 has a slightly
* different version in sysdeps/powerpc/powerpc64/bits/atomic.h.
*/

View File

@ -34,7 +34,7 @@
* latency to memory is >400 clocks
* To improve copy performance we need to prefetch source data
* far ahead to hide this latency
* For best performance instructionforms ending in "." like "andi."
* For best performance instruction forms ending in "." like "andi."
* should be avoided as the are implemented in microcode on CELL.
* The below code is loop unrolled for the CELL cache line of 128 bytes
*/
@ -146,7 +146,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
lfd fp9, 0x08(r4)
dcbz r11,r6
lfd fp10, 0x10(r4) /* 4 register stride copy is optimal */
lfd fp11, 0x18(r4) /* to hide 1st level cache lantency. */
lfd fp11, 0x18(r4) /* to hide 1st level cache latency. */
lfd fp12, 0x20(r4)
stfd fp9, 0x08(r6)
stfd fp10, 0x10(r6)

View File

@ -113,7 +113,7 @@ __elf_preferred_address (struct link_map *loader, size_t maplength,
/* Otherwise, quickly look for a suitable gap between 0x3FFFF and
0x70000000. 0x3FFFF is so that references off NULL pointers will
cause a segfault, 0x70000000 is just paranoia (it should always
be superceded by the program's load address). */
be superseded by the program's load address). */
low = 0x0003FFFF;
high = 0x70000000;
for (nsid = 0; nsid < DL_NNS; ++nsid)

View File

@ -74,7 +74,7 @@ _dl_start_user:
slwi r5,r3,2
add r6,r4,r5
addi r5,r6,4
/* pass the auxilary vector in r6. This is passed to us just after _envp. */
/* pass the auxiliary vector in r6. This is passed to us just after _envp. */
2: lwzu r0,4(r6)
cmpwi r0,0
bne 2b

View File

@ -275,7 +275,7 @@ L(checklinesize):
beq cr1,L(nondcbz)
/* If the cache line size is 32 bytes then goto to L(zloopstart),
which is coded specificly for 32-byte lines (and 601). */
which is coded specifically for 32-byte lines (and 601). */
cmplwi cr1,rCLS,32
beq cr1,L(zloopstart)

View File

@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
if (k > p2) {i1=k-p2; i2=p2+1; }
else {i1=1; i2=k; }
#if 1
/* rearange this inner loop to allow the fmadd instructions to be
/* rearrange this inner loop to allow the fmadd instructions to be
independent and execute in parallel on processors that have
dual symetrical FP pipelines. */
dual symmetrical FP pipelines. */
if (i1 < (i2-1))
{
/* make sure we have at least 2 iterations */
@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
zk += x->d[i1]*y->d[i1];
}
#else
/* The orginal code. */
/* The original code. */
for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
#endif

View File

@ -59,7 +59,7 @@ __slowpow (double x, double y, double z)
res1 = (double) (ldpp - ldeps);
if (res != res1) /* if result still not accurate enough */
{ /* use mpa for higher persision. */
{ /* use mpa for higher precision. */
mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
static const mp_no eps = { -3, {1.0, 4.0} };
int p;

View File

@ -22,7 +22,7 @@
/* double [fp1] sqrt (double x [fp1])
Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
The fsqrt instruction generates the correct value for all inputs and
sets the appropriate floating point exceptions. Extented checking is
sets the appropriate floating point exceptions. Extended checking is
only needed to set errno (via __kernel_standard) if the input value
is negative.

View File

@ -22,7 +22,7 @@
/* float [fp1] sqrts (float x [fp1])
Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
The fsqrts instruction generates the correct value for all inputs and
sets the appropriate floating point exceptions. Extented checking is
sets the appropriate floating point exceptions. Extended checking is
only needed to set errno (via __kernel_standard) if the input value
is negative.

View File

@ -82,7 +82,7 @@ typedef unsigned long long int hp_timing_t;
/* That's quite simple. Use the `mftb' instruction. Note that the value
might not be 100% accurate since there might be some more instructions
running in this moment. This could be changed by using a barrier like
'lwsync' right before the `mftb' instruciton. But we are not interested
'lwsync' right before the `mftb' instruction. But we are not interested
in accurate clock cycles here so we don't do this. */
#define HP_TIMING_NOW(Var) \

View File

@ -69,7 +69,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
Otherwise we know the two strings have the same alignment (but not
yet word aligned). So we force the string addresses to the next lower
word boundary and special case this first word using shift left to
eliminate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (word aligned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
versioning for the first word. This insures that the loop count is
@ -517,7 +517,7 @@ L(zeroLength):
Otherwise we know that rSTR1 is not aready word aligned yet.
So we can force the string addresses to the next lower word
boundary and special case this first word using shift left to
eliminate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (Wualigned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
versioning for the first W. This insures that the loop count is

View File

@ -51,7 +51,7 @@ EALIGN (BP_SYM(strncmp), 4, 0)
cmplwi cr1, rN, 0
lis rFEFE, -0x101
bne L(unaligned)
/* We are word alligned so set up for two loops. first a word
/* We are word aligned so set up for two loops. first a word
loop, then fall into the byte loop if any residual. */
srwi. rTMP, rN, 2
clrlwi rN, rN, 30

View File

@ -22,7 +22,7 @@
/* double [fp1] sqrt (double x [fp1])
Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
The fsqrt instruction generates the correct value for all inputs and
sets the appropriate floating point exceptions. Extented checking is
sets the appropriate floating point exceptions. Extended checking is
only needed to set errno (via __kernel_standard) if the input value
is negative.

View File

@ -22,7 +22,7 @@
/* float [fp1] sqrts (float x [fp1])
Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
The fsqrts instruction generates the correct value for all inputs and
sets the appropriate floating point exceptions. Extented checking is
sets the appropriate floating point exceptions. Extended checking is
only needed to set errno (via __kernel_standard) if the input value
is negative.

View File

@ -411,31 +411,31 @@ L(wdu):
not. For power4, power5 and power6 machines there is penalty for
unaligned loads (src) that cross 32-byte, cacheline, or page
boundaries. So we want to use simple (unaligned) loads where
posible but avoid them where we know the load would span a 32-byte
possible but avoid them where we know the load would span a 32-byte
boundary.
At this point we know we have at least 29 (32-3) bytes to copy
the src is unaligned. and we may cross at least one 32-byte
boundary. Also we have the following regester values:
boundary. Also we have the following register values:
r3 == adjusted dst, word aligned
r4 == unadjusted src
r5 == unadjusted len
r9 == adjusted Word length
r10 == src alignment (1-3)
r12 == adjuested src, not aligned
r12 == adjusted src, not aligned
r31 == adjusted len
First we need to copy word upto but not crossing the next 32-byte
First we need to copy word up to but not crossing the next 32-byte
boundary. Then perform aligned loads just before and just after
the boundary and use shifts and or to gernerate the next aligned
the boundary and use shifts and or to generate the next aligned
word for dst. If more then 32 bytes remain we copy (unaligned src)
the next 7 words and repeat the loop until less then 32-bytes
remaim.
remain.
Then if more then 4 bytes remain we again use aligned loads,
shifts and or to generate the next dst word. We then process the
remaining words using unaligned loads as needed. Finally we check
if there more then 0 bytes (1-3) bytes remainting and use
if there more then 0 bytes (1-3) bytes remaining and use
halfword and or byte load/stores to complete the copy.
*/
mr 4,12 /* restore unaligned adjusted src ptr */
@ -512,7 +512,7 @@ L(wdu_h32_4):
addi 3,3,4
.align 4
L(wdu_h32_0):
/* set up for 32-byte boundry crossing word move and possibly 32-byte
/* set up for 32-byte boundary crossing word move and possibly 32-byte
move loop. */
clrrwi 12,4,2
cmplwi cr5,31,32

View File

@ -44,7 +44,7 @@ L(proceed):
rlwinm r6,r3,3,27,28 /* Calculate padding. */
cmpli cr6,r6,0 /* cr6 == Do we have padding? */
lwz r12,0(r8) /* Load word from memory. */
cmpb r10,r12,r4 /* Check for BYTE's in WORD1. */
cmpb r10,r12,r4 /* Check for BYTEs in WORD1. */
beq cr6,L(proceed_no_padding)
slw r10,r10,r6
srw r10,r10,r6

View File

@ -73,7 +73,7 @@ EALIGN (BP_SYM(memcmp),4,0)
Otherwise we know the two strings have the same alignment (but not
yet word aligned). So we force the string addresses to the next lower
word boundary and special case this first word using shift left to
eliminate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (word aligned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
versioning for the first word. This insures that the loop count is
@ -520,7 +520,7 @@ L(zeroLength):
Otherwise we know that rSTR1 is not aready word aligned yet.
So we can force the string addresses to the next lower word
boundary and special case this first word using shift left to
eliminate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (Wualigned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
versioning for the first W. This insures that the loop count is

View File

@ -51,7 +51,7 @@ L(proceed):
cmpb r10,r12,r4 /* Check for BYTE in WORD1. */
slw r10,r10,r0
srw r10,r10,r0
cmplwi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
cmplwi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */
bne cr7,L(done)
/* Are we done already? */

View File

@ -39,8 +39,8 @@ ENTRY (BP_SYM (__STRCMP))
#define rSTR1 r5 /* 1st string */
#define rSTR2 r4 /* 2nd string */
#define rLOCARG r5 /* 3rd argument: locale_t */
#define rCHAR1 r6 /* Byte readed from 1st string */
#define rCHAR2 r7 /* Byte readed from 2nd string */
#define rCHAR1 r6 /* Byte read from 1st string */
#define rCHAR2 r7 /* Byte read from 2nd string */
#define rADDR1 r8 /* Address of tolower(rCHAR1) */
#define rADDR2 r12 /* Address of tolower(rCHAR2) */
#define rLWR1 r8 /* Byte tolower(rCHAR1) */

View File

@ -55,7 +55,7 @@ EALIGN (BP_SYM(strncmp),5,0)
cmplwi cr1,rN,0
lis rFEFE,-0x101
bne L(unaligned)
/* We are word alligned so set up for two loops. first a word
/* We are word aligned so set up for two loops. first a word
loop, then fall into the byte loop if any residual. */
srwi. rTMP,rN,2
clrlwi rN,rN,30

View File

@ -49,7 +49,7 @@ EALIGN (BP_SYM(strncmp), 4, 0)
cmplwi cr1, rN, 0
lis rFEFE, -0x101
bne L(unaligned)
/* We are word alligned so set up for two loops. first a word
/* We are word aligned so set up for two loops. first a word
loop, then fall into the byte loop if any residual. */
srwi. rTMP, rN, 2
clrlwi rN, rN, 30

View File

@ -21,7 +21,7 @@
This is a hint to the hardware to expect additional updates adjacent
to the lock word or not. If we are acquiring a Mutex, the hint
should be true. Otherwise we releasing a Mutex or doing a simple
atomic operation. In that case we don't expect addtional updates
atomic operation. In that case we don't expect additional updates
adjacent to the lock word after the Store Conditional and the hint
should be false. */
@ -34,7 +34,7 @@
#endif
/* The 32-bit exchange_bool is different on powerpc64 because the subf
does signed 64-bit arthmatic while the lwarx is 32-bit unsigned
does signed 64-bit arithmetic while the lwarx is 32-bit unsigned
(a load word and zero (high 32) form) load.
In powerpc64 register values are 64-bit by default, including oldval.
The value in old val unknown sign extension, lwarx loads the 32-bit

View File

@ -34,7 +34,7 @@
* latency to memory is >400 clocks
* To improve copy performance we need to prefetch source data
* far ahead to hide this latency
* For best performance instructionforms ending in "." like "andi."
* For best performance instruction forms ending in "." like "andi."
* should be avoided as the are implemented in microcode on CELL.
* The below code is loop unrolled for the CELL cache line of 128 bytes
*/
@ -146,7 +146,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
ld r9, 0x08(r4)
dcbz r11,r6
ld r7, 0x10(r4) /* 4 register stride copy is optimal */
ld r8, 0x18(r4) /* to hide 1st level cache lantency. */
ld r8, 0x18(r4) /* to hide 1st level cache latency. */
ld r0, 0x20(r4)
std r9, 0x08(r6)
std r7, 0x10(r6)

View File

@ -202,7 +202,7 @@ BODY_PREFIX "_dl_start_user:\n" \
" sldi 5,3,3\n" \
" add 6,4,5\n" \
" addi 5,6,8\n" \
/* Pass the auxilary vector in r6. This is passed to us just after \
/* Pass the auxiliary vector in r6. This is passed to us just after \
_envp. */ \
"2: ldu 0,8(6)\n" \
" cmpdi 0,0\n" \
@ -322,13 +322,13 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
/* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section.
elf_get_dynamic_info takes care of the standard entries but
doesn't know exactly what to do with processor specific
entires. */
entries. */
if (info[DT_PPC64(GLINK)] != NULL)
info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr;
if (lazy)
{
/* The function descriptor of the appropriate trampline
/* The function descriptor of the appropriate trampoline
routine is used to set the 1st and 2nd doubleword of the
plt_reserve. */
Elf64_FuncDesc *resolve_fd;

View File

@ -31,7 +31,7 @@
PowerPC64 long double uses the IBM extended format which is
represented two 64-floating point double values. The values are
non-overlapping giving an effective precision of 106 bits. The first
double contains the high order bits of mantisa and is always ceiled
double contains the high order bits of mantissa and is always ceiled
to represent a normal ceiling of long double to double. Since the
long double value is sum of the high and low values, the low double
normally has the opposite sign to compensate for the this ceiling.
@ -40,7 +40,7 @@
1) |x| < 2**52, all the integer bits are in the high double.
ceil the high double and set the low double to -0.0.
2) |x| >= 2**52, ceiling involves both doubles.
See the comment before lable .L2 for details.
See the comment before label .L2 for details.
*/
ENTRY (__ceill)

View File

@ -26,16 +26,16 @@
.section ".text"
/* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2])
IEEE 1003.1 nearbyintl function. nearbyintl is simular to the rintl
IEEE 1003.1 nearbyintl function. nearbyintl is similar to the rintl
but does raise the "inexact" exception. This implementation is
based on rintl but explicitly maskes the inexact exception on entry
based on rintl but explicitly masks the inexact exception on entry
and clears any pending inexact before restoring the exception mask
on exit.
PowerPC64 long double uses the IBM extended format which is
represented two 64-floating point double values. The values are
non-overlapping giving an effective precision of 106 bits. The first
double contains the high order bits of mantisa and is always rounded
double contains the high order bits of mantissa and is always rounded
to represent a normal rounding of long double to double. Since the
long double value is sum of the high and low values, the low double
normally has the opposite sign to compensate for the this rounding.
@ -44,7 +44,7 @@
1) |x| < 2**52, all the integer bits are in the high double.
floor the high double and set the low double to -0.0.
2) |x| >= 2**52, Rounding involves both doubles.
See the comment before lable .L2 for details.
See the comment before label .L2 for details.
*/
ENTRY (__nearbyintl)
mffs fp11 /* Save current FPSCR. */

View File

@ -82,7 +82,7 @@ typedef unsigned long long int hp_timing_t;
/* That's quite simple. Use the `mftb' instruction. Note that the value
might not be 100% accurate since there might be some more instructions
running in this moment. This could be changed by using a barrier like
'lwsync' right before the `mftb' instruciton. But we are not interested
'lwsync' right before the `mftb' instruction. But we are not interested
in accurate clock cycles here so we don't do this. */
#ifdef _ARCH_PWR4
#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var))

View File

@ -28,11 +28,11 @@
with the appropriate combination of byte and halfword load/stores.
There is minimal effort to optimize the alignment of short moves.
The 64-bit implementations of POWER3 and POWER4 do a reasonable job
of handling unligned load/stores that do not cross 32-byte boundries.
of handling unaligned load/stores that do not cross 32-byte boundaries.
Longer moves (>= 32-bytes) justify the effort to get at least the
destination doubleword (8-byte) aligned. Further optimization is
posible when both source and destination are doubleword aligned.
possible when both source and destination are doubleword aligned.
Each case has a optimized unrolled loop. */
EALIGN (BP_SYM (memcpy), 5, 0)
@ -43,9 +43,9 @@ EALIGN (BP_SYM (memcpy), 5, 0)
std 3,-16(1)
std 31,-8(1)
cfi_offset(31,-8)
andi. 11,3,7 /* check alignement of dst. */
andi. 11,3,7 /* check alignment of dst. */
clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */
clrldi 10,4,61 /* check alignement of src. */
clrldi 10,4,61 /* check alignment of src. */
cmpldi cr6,5,8
ble- cr1,.L2 /* If move < 32 bytes use short move code. */
cmpld cr6,10,11
@ -56,7 +56,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
beq .L0
subf 31,0,5
/* Move 0-7 bytes as needed to get the destination doubleword alligned. */
/* Move 0-7 bytes as needed to get the destination doubleword aligned. */
1: bf 31,2f
lbz 6,0(12)
addi 12,12,1
@ -73,10 +73,10 @@ EALIGN (BP_SYM (memcpy), 5, 0)
stw 6,0(3)
addi 3,3,4
0:
clrldi 10,12,61 /* check alignement of src again. */
clrldi 10,12,61 /* check alignment of src again. */
srdi 9,31,3 /* Number of full double words remaining. */
/* Copy doublewords from source to destination, assumpting the
/* Copy doublewords from source to destination, assuming the
destination is aligned on a doubleword boundary.
At this point we know there are at least 25 bytes left (32-7) to copy.
@ -152,7 +152,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
add 12,12,0
/* At this point we have a tail of 0-7 bytes and we know that the
destiniation is double word aligned. */
destination is double word aligned. */
4: bf 29,2f
lwz 6,0(12)
addi 12,12,4
@ -282,7 +282,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
bne cr6,4f
/* Would have liked to use use ld/std here but the 630 processors are
slow for load/store doubles that are not at least word aligned.
Unaligned Load/Store word execute with only a 1 cycle penaltity. */
Unaligned Load/Store word execute with only a 1 cycle penalty. */
lwz 6,0(4)
lwz 7,4(4)
stw 6,0(3)

View File

@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
if (k > p2) {i1=k-p2; i2=p2+1; }
else {i1=1; i2=k; }
#if 1
/* rearange this inner loop to allow the fmadd instructions to be
/* rearrange this inner loop to allow the fmadd instructions to be
independent and execute in parallel on processors that have
dual symetrical FP pipelines. */
dual symmetrical FP pipelines. */
if (i1 < (i2-1))
{
/* make sure we have at least 2 iterations */
@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
zk += x->d[i1]*y->d[i1];
}
#else
/* The orginal code. */
/* The original code. */
for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
#endif

View File

@ -59,7 +59,7 @@ __slowpow (double x, double y, double z)
res1 = (double) (ldpp - ldeps);
if (res != res1) /* if result still not accurate enough */
{ /* use mpa for higher persision. */
{ /* use mpa for higher precision. */
mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1;
static const mp_no eps = { -3, {1.0, 4.0} };
int p;

View File

@ -53,7 +53,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
beq- cr6, L(zeroLength)
dcbt 0,rSTR1
dcbt 0,rSTR2
/* If less than 8 bytes or not aligned, use the unalligned
/* If less than 8 bytes or not aligned, use the unaligned
byte loop. */
blt cr1, L(bytealigned)
std rWORD8,-8(r1)
@ -62,7 +62,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
cfi_offset(rWORD7,-16)
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
compare length is at least 8 bytes. rBITDIF containes the low order
compare length is at least 8 bytes. rBITDIF contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
of rBITDIF to 0. If rBITDIF == 0 then we are already double word
aligned and can perform the DWaligned loop.
@ -70,7 +70,7 @@ EALIGN (BP_SYM(memcmp), 4, 0)
Otherwise we know the two strings have the same alignment (but not
yet DW). So we can force the string addresses to the next lower DW
boundary and special case this first DW word using shift left to
ellimiate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
versioning for the first DW. This insures that the loop count is
@ -152,8 +152,8 @@ L(DWaligned):
L(dP1):
mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
(8-15 byte compare), we want to use only volitile registers. This
means we can avoid restoring non-volitile registers since we did not
(8-15 byte compare), we want to use only volatile registers. This
means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
@ -215,7 +215,7 @@ L(dP2e):
bne cr5, L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
only use volitile registers and avoid restoring non-volitile
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP2x):
@ -256,7 +256,7 @@ L(dP3e):
bne cr6, L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
only use volitile registers and avoid restoring non-volitile
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP3x):
@ -340,7 +340,7 @@ L(d04):
beq L(zeroLength)
/* At this point we have a remainder of 1 to 7 bytes to compare. Since
we are aligned it is safe to load the whole double word, and use
shift right double to elliminate bits beyond the compare length. */
shift right double to eliminate bits beyond the compare length. */
L(d00):
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
@ -496,15 +496,15 @@ L(zeroLength):
.align 4
/* At this point we know the strings have different alignment and the
compare length is at least 8 bytes. rBITDIF containes the low order
compare length is at least 8 bytes. rBITDIF contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word
aligned and can perform the DWunaligned loop.
Otherwise we know that rSTR1 is not aready DW aligned yet.
Otherwise we know that rSTR1 is not already DW aligned yet.
So we can force the string addresses to the next lower DW
boundary and special case this first DW word using shift left to
ellimiate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
versioning for the first DW. This insures that the loop count is
@ -537,7 +537,7 @@ L(unaligned):
clrrdi rSTR2, rSTR2, 3
std r26,-48(r1)
cfi_offset(r26,-48)
/* Compute the leaft/right shift counts for the unalign rSTR2,
/* Compute the left/right shift counts for the unalign rSTR2,
compensating for the logical (DW aligned) start of rSTR1. */
clrldi rSHL, r27, 61
clrrdi rSTR1, rSTR1, 3
@ -876,7 +876,7 @@ L(du14):
sldi. rN, rN, 3
bne cr5, L(duLcr5)
/* At this point we have a remainder of 1 to 7 bytes to compare. We use
shift right double to elliminate bits beyond the compare length.
shift right double to eliminate bits beyond the compare length.
This allows the use of double word subtract to compute the final
result.

View File

@ -28,11 +28,11 @@
with the appropriate combination of byte and halfword load/stores.
There is minimal effort to optimize the alignment of short moves.
The 64-bit implementations of POWER3 and POWER4 do a reasonable job
of handling unligned load/stores that do not cross 32-byte boundries.
of handling unaligned load/stores that do not cross 32-byte boundaries.
Longer moves (>= 32-bytes) justify the effort to get at least the
destination doubleword (8-byte) aligned. Further optimization is
posible when both source and destination are doubleword aligned.
possible when both source and destination are doubleword aligned.
Each case has a optimized unrolled loop. */
.machine power4
@ -44,9 +44,9 @@ EALIGN (BP_SYM (memcpy), 5, 0)
std 3,-16(1)
std 31,-8(1)
cfi_offset(31,-8)
andi. 11,3,7 /* check alignement of dst. */
andi. 11,3,7 /* check alignment of dst. */
clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */
clrldi 10,4,61 /* check alignement of src. */
clrldi 10,4,61 /* check alignment of src. */
cmpldi cr6,5,8
ble- cr1,.L2 /* If move < 32 bytes use short move code. */
cmpld cr6,10,11
@ -57,7 +57,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
beq .L0
subf 31,0,5
/* Move 0-7 bytes as needed to get the destination doubleword alligned. */
/* Move 0-7 bytes as needed to get the destination doubleword aligned. */
1: bf 31,2f
lbz 6,0(12)
addi 12,12,1
@ -74,10 +74,10 @@ EALIGN (BP_SYM (memcpy), 5, 0)
stw 6,0(3)
addi 3,3,4
0:
clrldi 10,12,61 /* check alignement of src again. */
clrldi 10,12,61 /* check alignment of src again. */
srdi 9,31,3 /* Number of full double words remaining. */
/* Copy doublewords from source to destination, assumpting the
/* Copy doublewords from source to destination, assuming the
destination is aligned on a doubleword boundary.
At this point we know there are at least 25 bytes left (32-7) to copy.
@ -154,7 +154,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
add 12,12,0
/* At this point we have a tail of 0-7 bytes and we know that the
destiniation is double word aligned. */
destination is double word aligned. */
4: bf 29,2f
lwz 6,0(12)
addi 12,12,4
@ -284,7 +284,7 @@ EALIGN (BP_SYM (memcpy), 5, 0)
bne cr6,4f
/* Would have liked to use use ld/std here but the 630 processors are
slow for load/store doubles that are not at least word aligned.
Unaligned Load/Store word execute with only a 1 cycle penaltity. */
Unaligned Load/Store word execute with only a 1 cycle penalty. */
lwz 6,0(4)
lwz 7,4(4)
stw 6,0(3)

View File

@ -52,7 +52,7 @@ EALIGN (BP_SYM(strncmp), 4, 0)
cmpldi cr1, rN, 0
lis rFEFE, -0x101
bne L(unaligned)
/* We are doubleword alligned so set up for two loops. first a double word
/* We are doubleword aligned so set up for two loops. first a double word
loop, then fall into the byte loop if any residual. */
srdi. rTMP, rN, 3
clrldi rN, rN, 61

View File

@ -28,16 +28,16 @@
with the appropriate combination of byte and halfword load/stores.
There is minimal effort to optimize the alignment of short moves.
The 64-bit implementations of POWER3 and POWER4 do a reasonable job
of handling unligned load/stores that do not cross 32-byte boundries.
of handling unaligned load/stores that do not cross 32-byte boundaries.
Longer moves (>= 32-bytes) justify the effort to get at least the
destination doubleword (8-byte) aligned. Further optimization is
posible when both source and destination are doubleword aligned.
possible when both source and destination are doubleword aligned.
Each case has a optimized unrolled loop.
For POWER6 unaligned loads will take a 20+ cycle hicup for any
For POWER6 unaligned loads will take a 20+ cycle hiccup for any
L1 cache miss that crosses a 32- or 128-byte boundary. Store
is more forgiving and does not take a hicup until page or
is more forgiving and does not take a hiccup until page or
segment boundaries. So we require doubleword alignment for
the source but may take a risk and only require word alignment
for the destination. */
@ -50,9 +50,9 @@ EALIGN (BP_SYM (memcpy), 7, 0)
neg 0,3
std 3,-16(1)
std 31,-8(1)
andi. 11,3,7 /* check alignement of dst. */
andi. 11,3,7 /* check alignment of dst. */
clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */
clrldi 10,4,61 /* check alignement of src. */
clrldi 10,4,61 /* check alignment of src. */
cmpldi cr6,5,8
ble- cr1,.L2 /* If move < 32 bytes use short move code. */
mtcrf 0x01,0
@ -61,8 +61,8 @@ EALIGN (BP_SYM (memcpy), 7, 0)
beq .L0
subf 5,0,5
/* Move 0-7 bytes as needed to get the destination doubleword alligned.
Duplicate some code to maximize fall-throught and minimize agen delays. */
/* Move 0-7 bytes as needed to get the destination doubleword aligned.
Duplicate some code to maximize fall-through and minimize agen delays. */
1: bf 31,2f
lbz 6,0(4)
stb 6,0(3)
@ -95,10 +95,10 @@ EALIGN (BP_SYM (memcpy), 7, 0)
add 4,4,0
add 3,3,0
clrldi 10,4,61 /* check alignement of src again. */
clrldi 10,4,61 /* check alignment of src again. */
srdi 9,5,3 /* Number of full double words remaining. */
/* Copy doublewords from source to destination, assumpting the
/* Copy doublewords from source to destination, assuming the
destination is aligned on a doubleword boundary.
At this point we know there are at least 25 bytes left (32-7) to copy.
@ -130,7 +130,7 @@ EALIGN (BP_SYM (memcpy), 7, 0)
load, load, store, store every 2 cycles.
The following code is sensitive to cache line alignment. Do not
make any change with out first making sure thay don't result in
make any change with out first making sure they don't result in
splitting ld/std pairs across a cache line. */
mtcrf 0x02,5
@ -329,7 +329,7 @@ L(das_tail):
L(das_tail2):
/* At this point we have a tail of 0-7 bytes and we know that the
destiniation is double word aligned. */
destination is double word aligned. */
4: bf 29,2f
lwz 6,0(4)
stw 6,0(3)
@ -537,7 +537,7 @@ L(dus_tailX):
.LE8:
mr 12,4
bne cr6,L(dus_4)
/* Exactly 8 bytes. We may cross a 32-/128-byte boundry and take a ~20
/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20
cycle delay. This case should be rare and any attempt to avoid this
would take most of 20 cycles any way. */
ld 6,0(4)
@ -1146,7 +1146,7 @@ L(du_done):
add 3,3,0
add 12,12,0
/* At this point we have a tail of 0-7 bytes and we know that the
destiniation is double word aligned. */
destination is double word aligned. */
4: bf 29,2f
lwz 6,0(12)
addi 12,12,4

View File

@ -55,7 +55,7 @@ EALIGN (BP_SYM(memcmp),4,0)
beq- cr6,L(zeroLength)
dcbt 0,rSTR1
dcbt 0,rSTR2
/* If less than 8 bytes or not aligned, use the unalligned
/* If less than 8 bytes or not aligned, use the unaligned
byte loop. */
blt cr1,L(bytealigned)
std rWORD8,-8(r1)
@ -64,7 +64,7 @@ EALIGN (BP_SYM(memcmp),4,0)
cfi_offset(rWORD7,-16)
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
compare length is at least 8 bytes. rBITDIF containes the low order
compare length is at least 8 bytes. rBITDIF contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
of rBITDIF to 0. If rBITDIF == 0 then we are already double word
aligned and can perform the DWaligned loop.
@ -72,7 +72,7 @@ EALIGN (BP_SYM(memcmp),4,0)
Otherwise we know the two strings have the same alignment (but not
yet DW). So we can force the string addresses to the next lower DW
boundary and special case this first DW word using shift left to
ellimiate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
versioning for the first DW. This insures that the loop count is
@ -154,8 +154,8 @@ L(DWaligned):
L(dP1):
mtctr rTMP
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
(8-15 byte compare), we want to use only volitile registers. This
means we can avoid restoring non-volitile registers since we did not
(8-15 byte compare), we want to use only volatile registers. This
means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
@ -217,7 +217,7 @@ L(dP2e):
bne cr5,L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
only use volitile registers and avoid restoring non-volitile
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP2x):
@ -258,7 +258,7 @@ L(dP3e):
bne cr6,L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
only use volitile registers and avoid restoring non-volitile
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP3x):
@ -342,7 +342,7 @@ L(d04):
beq L(zeroLength)
/* At this point we have a remainder of 1 to 7 bytes to compare. Since
we are aligned it is safe to load the whole double word, and use
shift right double to elliminate bits beyond the compare length. */
shift right double to eliminate bits beyond the compare length. */
L(d00):
ld rWORD1,8(rSTR1)
ld rWORD2,8(rSTR2)
@ -498,15 +498,15 @@ L(zeroLength):
.align 4
/* At this point we know the strings have different alignment and the
compare length is at least 8 bytes. rBITDIF containes the low order
compare length is at least 8 bytes. rBITDIF contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word
aligned and can perform the DWunaligned loop.
Otherwise we know that rSTR1 is not aready DW aligned yet.
Otherwise we know that rSTR1 is not already DW aligned yet.
So we can force the string addresses to the next lower DW
boundary and special case this first DW word using shift left to
ellimiate bits preceeding the first byte. Since we want to join the
eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
versioning for the first DW. This insures that the loop count is
@ -539,7 +539,7 @@ L(unaligned):
clrrdi rSTR2,rSTR2,3
std r26,-48(r1)
cfi_offset(r26,-48)
/* Compute the leaft/right shift counts for the unalign rSTR2,
/* Compute the left/right shift counts for the unaligned rSTR2,
compensating for the logical (DW aligned) start of rSTR1. */
clrldi rSHL,r27,61
clrrdi rSTR1,rSTR1,3
@ -878,7 +878,7 @@ L(du14):
sldi. rN,rN,3
bne cr5,L(duLcr5)
/* At this point we have a remainder of 1 to 7 bytes to compare. We use
shift right double to elliminate bits beyond the compare length.
shift right double to eliminate bits beyond the compare length.
This allows the use of double word subtract to compute the final
result.

View File

@ -52,7 +52,7 @@ L(proceed):
cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
sld r10,r10,r0
srd r10,r10,r0
cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */
cmpldi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */
bne cr7,L(done)
/* Are we done already? */

View File

@ -40,8 +40,8 @@ ENTRY (BP_SYM (__STRCMP))
#define rSTR1 r5 /* 1st string */
#define rSTR2 r4 /* 2nd string */
#define rLOCARG r5 /* 3rd argument: locale_t */
#define rCHAR1 r6 /* Byte readed from 1st string */
#define rCHAR2 r7 /* Byte readed from 2nd string */
#define rCHAR1 r6 /* Byte read from 1st string */
#define rCHAR2 r7 /* Byte read from 2nd string */
#define rADDR1 r8 /* Address of tolower(rCHAR1) */
#define rADDR2 r12 /* Address of tolower(rCHAR2) */
#define rLWR1 r8 /* Word tolower(rCHAR1) */

View File

@ -56,7 +56,7 @@ EALIGN (BP_SYM(strncmp),5,0)
cmpldi cr1,rN,0
lis rFEFE,-0x101
bne L(unaligned)
/* We are doubleword alligned so set up for two loops. first a double word
/* We are doubleword aligned so set up for two loops. first a double word
loop, then fall into the byte loop if any residual. */
srdi. rTMP,rN,3
clrldi rN,rN,61

View File

@ -50,7 +50,7 @@ EALIGN (BP_SYM(strncmp), 4, 0)
cmpldi cr1, rN, 0
lis rFEFE, -0x101
bne L(unaligned)
/* We are doubleword alligned so set up for two loops. first a double word
/* We are doubleword aligned so set up for two loops. first a double word
loop, then fall into the byte loop if any residual. */
srdi. rTMP, rN, 3
clrldi rN, rN, 61