Optimize f_mod

Branch prediction for the 32-bit implementation and a new optimized 64-bit implementation.
2024-12-23 11:20:07 +00:00 · 2011-10-23 20:55:26 -04:00 · 2011-10-23 20:55:26 -04:00 · 0275fff8a1
commit 0275fff8a1
parent 10d005f77f
3 changed files with 113 additions and 6 deletions
--- a/4
+++ b/4
@ -1,5 +1,9 @@
 2011-10-23  Ulrich Drepper  <drepper@gmail.com>
 	* sysdeps/ieee754/dbl-64/e_fmod.c (__ieee754_fmod): Add some branch
 	prediction.
 	* sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c: New file.
 	* string/strnlen.c: Don't define STRNLEN, reverse logic.
 	Remove unused variable magic_bits.
 	* sysdeps/i386/i686/multiarch/rtld-strnlen.c: New file.
--- a/sysdeps/ieee754/dbl-64/e_fmod.c
+++ b/sysdeps/ieee754/dbl-64/e_fmod.c
@ -1,4 +1,3 @@
 /* @(#)e_fmod.c 5.1 93/09/24 */
 /*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
@ -44,7 +43,7 @@ __ieee754_fmod (double x, double y)
 	}
    /* determine ix = ilogb(x) */
-	if(hx<0x00100000) {	/* subnormal x */
+	if(__builtin_expect(hx<0x00100000, 0)) {	/* subnormal x */
 	    if(hx==0) {
 		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
 	    } else {
@ -53,7 +52,7 @@ __ieee754_fmod (double x, double y)
 	} else ix = (hx>>20)-1023;
    /* determine iy = ilogb(y) */
-	if(hy<0x00100000) {	/* subnormal y */
+	if(__builtin_expect(hy<0x00100000, 0)) {	/* subnormal y */
 	    if(hy==0) {
 		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
 	    } else {
@ -62,7 +61,7 @@ __ieee754_fmod (double x, double y)
 	} else iy = (hy>>20)-1023;
    /* set up {hx,lx}, {hy,ly} and align y to x */
-	if(ix >= -1022)
+	if(__builtin_expect(ix >= -1022, 1))
 	    hx = 0x00100000|(0x000fffff&hx);
 	else {		/* subnormal x, shift x to normal */
 	    n = -1022-ix;
@ -74,7 +73,7 @@ __ieee754_fmod (double x, double y)
 		lx = 0;
 	    }
 	}
-	if(iy >= -1022)
+	if(__builtin_expect(iy >= -1022, 1))
 	    hy = 0x00100000|(0x000fffff&hy);
 	else {		/* subnormal y, shift y to normal */
 	    n = -1022-iy;
@ -108,7 +107,7 @@ __ieee754_fmod (double x, double y)
 	    hx = hx+hx+(lx>>31); lx = lx+lx;
 	    iy -= 1;
 	}
-	if(iy>= -1022) {	/* normalize output */
+	if(__builtin_expect(iy>= -1022, 1)) {	/* normalize output */
 	    hx = ((hx-0x00100000)|((iy+1023)<<20));
 	    INSERT_WORDS(x,hx|sx,lx);
 	} else {		/* subnormal output */
--- a/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_fmod.c
@ -0,0 +1,104 @@
 /* Rewritten for 64-bit machines by Ulrich Drepper <drepper@gmail.com>.  */
 /*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
 /*
 * __ieee754_fmod(x,y)
 * Return x mod y in exact arithmetic
 * Method: shift and subtract
 */
 #include "math.h"
 #include "math_private.h"
 static const double one = 1.0, Zero[] = {0.0, -0.0,};
 double
 __ieee754_fmod (double x, double y)
 {
 	int32_t n,i,ix,iy;
 	int64_t hx,hy,hz,sx;
 	EXTRACT_WORDS64(hx,x);
 	EXTRACT_WORDS64(hy,y);
 	sx = hx&UINT64_C(0x8000000000000000);	/* sign of x */
 	hx ^=sx;				/* |x| */
 	hy &= UINT64_C(0x7fffffffffffffff);	/* |y| */
    /* purge off exception values */
 	if(__builtin_expect(hy==0
 			    || hx >= UINT64_C(0x7ff0000000000000)
 			    || hy > UINT64_C(0x7ff0000000000000), 0))
 	  /* y=0,or x not finite or y is NaN */
 	    return (x*y)/(x*y);
 	if(__builtin_expect(hx<=hy, 0)) {
 	    if(hx<hy) return x;	/* |x|<|y| return x */
 	    return Zero[(uint64_t)sx>>63];	/* |x|=|y| return x*0*/
 	}
    /* determine ix = ilogb(x) */
 	if(__builtin_expect(hx<UINT64_C(0x0010000000000000), 0)) {
 	  /* subnormal x */
 	  for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
 	} else ix = (hx>>52)-1023;
    /* determine iy = ilogb(y) */
 	if(__builtin_expect(hy<UINT64_C(0x0010000000000000), 0)) {	/* subnormal y */
 	  for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
 	} else iy = (hy>>52)-1023;
    /* set up hx, hy and align y to x */
 	if(__builtin_expect(ix >= -1022, 1))
 	    hx = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hx);
 	else {		/* subnormal x, shift x to normal */
 	    n = -1022-ix;
 	    hx<<=n;
 	}
 	if(__builtin_expect(iy >= -1022, 1))
 	    hy = UINT64_C(0x0010000000000000)|(UINT64_C(0x000fffffffffffff)&hy);
 	else {		/* subnormal y, shift y to normal */
 	    n = -1022-iy;
 	    hy<<=n;
 	}
    /* fix point fmod */
 	n = ix - iy;
 	while(n--) {
 	    hz=hx-hy;
 	    if(hz<0){hx = hx+hx;}
 	    else {
 		if(hz==0)		/* return sign(x)*0 */
 		    return Zero[(uint64_t)sx>>63];
 		hx = hz+hz;
 	    }
 	}
 	hz=hx-hy;
 	if(hz>=0) {hx=hz;}
    /* convert back to floating value and restore the sign */
 	if(hx==0)			/* return sign(x)*0 */
 	    return Zero[(uint64_t)sx>>63];
 	while(hx<UINT64_C(0x0010000000000000)) {	/* normalize x */
 	    hx = hx+hx;
 	    iy -= 1;
 	}
 	if(__builtin_expect(iy>= -1022, 1)) {	/* normalize output */
 	  hx = ((hx-UINT64_C(0x0010000000000000))|((uint64_t)(iy+1023)<<52));
 	    INSERT_WORDS64(x,hx|sx);
 	} else {		/* subnormal output */
 	    n = -1022 - iy;
 	    hx>>=n;
 	    INSERT_WORDS64(x,hx|sx);
 	    x *= one;		/* create necessary signal */
 	}
 	return x;		/* exact output */
 }
 strong_alias (__ieee754_fmod, __fmod_finite)