mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-23 03:10:05 +00:00
Optimize f_mod
Branch prediction for the 32-bit implementation and a new optimized 64-bit implementation.
This commit is contained in:
parent
10d005f77f
commit
0275fff8a1
@ -1,5 +1,9 @@
|
|||||||
2011-10-23 Ulrich Drepper <drepper@gmail.com>
|
2011-10-23 Ulrich Drepper <drepper@gmail.com>
|
||||||
|
|
||||||
|
* sysdeps/ieee754/dbl-64/e_fmod.c (__ieee754_fmod): Add some branch
|
||||||
|
prediction.
|
||||||
|
* sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c: New file.
|
||||||
|
|
||||||
* string/strnlen.c: Don't define STRNLEN, reverse logic.
|
* string/strnlen.c: Don't define STRNLEN, reverse logic.
|
||||||
Remove unused variable magic_bits.
|
Remove unused variable magic_bits.
|
||||||
* sysdeps/i386/i686/multiarch/rtld-strnlen.c: New file.
|
* sysdeps/i386/i686/multiarch/rtld-strnlen.c: New file.
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
/* @(#)e_fmod.c 5.1 93/09/24 */
|
|
||||||
/*
|
/*
|
||||||
* ====================================================
|
* ====================================================
|
||||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
@ -44,7 +43,7 @@ __ieee754_fmod (double x, double y)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* determine ix = ilogb(x) */
|
/* determine ix = ilogb(x) */
|
||||||
if(hx<0x00100000) { /* subnormal x */
|
if(__builtin_expect(hx<0x00100000, 0)) { /* subnormal x */
|
||||||
if(hx==0) {
|
if(hx==0) {
|
||||||
for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
|
for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
|
||||||
} else {
|
} else {
|
||||||
@ -53,7 +52,7 @@ __ieee754_fmod (double x, double y)
|
|||||||
} else ix = (hx>>20)-1023;
|
} else ix = (hx>>20)-1023;
|
||||||
|
|
||||||
/* determine iy = ilogb(y) */
|
/* determine iy = ilogb(y) */
|
||||||
if(hy<0x00100000) { /* subnormal y */
|
if(__builtin_expect(hy<0x00100000, 0)) { /* subnormal y */
|
||||||
if(hy==0) {
|
if(hy==0) {
|
||||||
for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
|
for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
|
||||||
} else {
|
} else {
|
||||||
@ -62,7 +61,7 @@ __ieee754_fmod (double x, double y)
|
|||||||
} else iy = (hy>>20)-1023;
|
} else iy = (hy>>20)-1023;
|
||||||
|
|
||||||
/* set up {hx,lx}, {hy,ly} and align y to x */
|
/* set up {hx,lx}, {hy,ly} and align y to x */
|
||||||
if(ix >= -1022)
|
if(__builtin_expect(ix >= -1022, 1))
|
||||||
hx = 0x00100000|(0x000fffff&hx);
|
hx = 0x00100000|(0x000fffff&hx);
|
||||||
else { /* subnormal x, shift x to normal */
|
else { /* subnormal x, shift x to normal */
|
||||||
n = -1022-ix;
|
n = -1022-ix;
|
||||||
@ -74,7 +73,7 @@ __ieee754_fmod (double x, double y)
|
|||||||
lx = 0;
|
lx = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(iy >= -1022)
|
if(__builtin_expect(iy >= -1022, 1))
|
||||||
hy = 0x00100000|(0x000fffff&hy);
|
hy = 0x00100000|(0x000fffff&hy);
|
||||||
else { /* subnormal y, shift y to normal */
|
else { /* subnormal y, shift y to normal */
|
||||||
n = -1022-iy;
|
n = -1022-iy;
|
||||||
@ -108,7 +107,7 @@ __ieee754_fmod (double x, double y)
|
|||||||
hx = hx+hx+(lx>>31); lx = lx+lx;
|
hx = hx+hx+(lx>>31); lx = lx+lx;
|
||||||
iy -= 1;
|
iy -= 1;
|
||||||
}
|
}
|
||||||
if(iy>= -1022) { /* normalize output */
|
if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */
|
||||||
hx = ((hx-0x00100000)|((iy+1023)<<20));
|
hx = ((hx-0x00100000)|((iy+1023)<<20));
|
||||||
INSERT_WORDS(x,hx|sx,lx);
|
INSERT_WORDS(x,hx|sx,lx);
|
||||||
} else { /* subnormal output */
|
} else { /* subnormal output */
|
||||||
|
104
sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
Normal file
104
sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
/* Rewritten for 64-bit machines by Ulrich Drepper <drepper@gmail.com>. */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __ieee754_fmod(x,y)
|
||||||
|
* Return x mod y in exact arithmetic
|
||||||
|
* Method: shift and subtract
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
#include "math_private.h"
|
||||||
|
|
||||||
|
static const double one = 1.0, Zero[] = {0.0, -0.0,};
|
||||||
|
|
||||||
|
double
|
||||||
|
__ieee754_fmod (double x, double y)
|
||||||
|
{
|
||||||
|
int32_t n,i,ix,iy;
|
||||||
|
int64_t hx,hy,hz,sx;
|
||||||
|
|
||||||
|
EXTRACT_WORDS64(hx,x);
|
||||||
|
EXTRACT_WORDS64(hy,y);
|
||||||
|
sx = hx&UINT64_C(0x8000000000000000); /* sign of x */
|
||||||
|
hx ^=sx; /* |x| */
|
||||||
|
hy &= UINT64_C(0x7fffffffffffffff); /* |y| */
|
||||||
|
|
||||||
|
/* purge off exception values */
|
||||||
|
if(__builtin_expect(hy==0
|
||||||
|
|| hx >= UINT64_C(0x7ff0000000000000)
|
||||||
|
|| hy > UINT64_C(0x7ff0000000000000), 0))
|
||||||
|
/* y=0,or x not finite or y is NaN */
|
||||||
|
return (x*y)/(x*y);
|
||||||
|
if(__builtin_expect(hx<=hy, 0)) {
|
||||||
|
if(hx<hy) return x; /* |x|<|y| return x */
|
||||||
|
return Zero[(uint64_t)sx>>63]; /* |x|=|y| return x*0*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/* determine ix = ilogb(x) */
|
||||||
|
if(__builtin_expect(hx<UINT64_C(0x0010000000000000), 0)) {
|
||||||
|
/* subnormal x */
|
||||||
|
for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
|
||||||
|
} else ix = (hx>>52)-1023;
|
||||||
|
|
||||||
|
/* determine iy = ilogb(y) */
|
||||||
|
if(__builtin_expect(hy<UINT64_C(0x0010000000000000), 0)) { /* subnormal y */
|
||||||
|
for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
|
||||||
|
} else iy = (hy>>52)-1023;
|
||||||
|
|
||||||
|
/* set up hx, hy and align y to x */
|
||||||
|
if(__builtin_expect(ix >= -1022, 1))
|
||||||
|
hx = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hx);
|
||||||
|
else { /* subnormal x, shift x to normal */
|
||||||
|
n = -1022-ix;
|
||||||
|
hx<<=n;
|
||||||
|
}
|
||||||
|
if(__builtin_expect(iy >= -1022, 1))
|
||||||
|
hy = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hy);
|
||||||
|
else { /* subnormal y, shift y to normal */
|
||||||
|
n = -1022-iy;
|
||||||
|
hy<<=n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fix point fmod */
|
||||||
|
n = ix - iy;
|
||||||
|
while(n--) {
|
||||||
|
hz=hx-hy;
|
||||||
|
if(hz<0){hx = hx+hx;}
|
||||||
|
else {
|
||||||
|
if(hz==0) /* return sign(x)*0 */
|
||||||
|
return Zero[(uint64_t)sx>>63];
|
||||||
|
hx = hz+hz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hz=hx-hy;
|
||||||
|
if(hz>=0) {hx=hz;}
|
||||||
|
|
||||||
|
/* convert back to floating value and restore the sign */
|
||||||
|
if(hx==0) /* return sign(x)*0 */
|
||||||
|
return Zero[(uint64_t)sx>>63];
|
||||||
|
while(hx<UINT64_C(0x0010000000000000)) { /* normalize x */
|
||||||
|
hx = hx+hx;
|
||||||
|
iy -= 1;
|
||||||
|
}
|
||||||
|
if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */
|
||||||
|
hx = ((hx-UINT64_C(0x0010000000000000))|((uint64_t)(iy+1023)<<52));
|
||||||
|
INSERT_WORDS64(x,hx|sx);
|
||||||
|
} else { /* subnormal output */
|
||||||
|
n = -1022 - iy;
|
||||||
|
hx>>=n;
|
||||||
|
INSERT_WORDS64(x,hx|sx);
|
||||||
|
x *= one; /* create necessary signal */
|
||||||
|
}
|
||||||
|
return x; /* exact output */
|
||||||
|
}
|
||||||
|
strong_alias (__ieee754_fmod, __fmod_finite)
|
Loading…
Reference in New Issue
Block a user