Start optimizing the use of the fenv interfaces in libm itself

This commit is contained in:
Ulrich Drepper 2011-10-18 09:00:46 -04:00
parent 83c7615c2d
commit d38f1dba00
5 changed files with 95 additions and 31 deletions

View File

@ -1,3 +1,18 @@
2011-10-18 Ulrich Drepper <drepper@gmail.com>
* math/math_private.h: Define defaults for libc_fegetround,
libc_fegetroundf, libc_fegetroundl, libc_fesetround, libc_fesetroundf,
libc_fesetroundl, libc_feholdexcept, libc_feholdexceptf,
libc_feholdexceptl, libc_fesetenv, libc_fesetenvf, libc_fesetenvl.
* sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c: Use
libc_feholdexcept, libc_fesetround, libc_fesetenv instead of the
standard functions.
* sysdeps/ieee754/dbl-64/e_exp2.c: Likewise.
Remove comments and hacks for old compiler versions.
* sysdeps/x86_64/fpu/math_private.h: Define special versions of
libc_fegetround, libc_fesetround, libc_feholdexcept, and
libc_feholdexceptl.
2011-10-18 Andreas Schwab <schwab@redhat.com>
* sysdeps/x86_64/fpu/bits/fenv.h: Add C linkage markers.

View File

@ -358,4 +358,26 @@ extern void __docos (double __x, double __dx, double __v[]);
#define math_force_eval(x) __asm __volatile ("" : : "m" (x))
#endif
/* The standards only specify one variant of the fenv.h interfaces.
But at least for some architectures we can be more efficient if we
know what operations are going to be performed. Therefore we
define additional interfaces. By default they refer to the normal
interfaces. */
#define libc_fegetround() fegetround ()
#define libc_fegetroundf() fegetround ()
#define libc_fegetroundl() fegetround ()
#define libc_fesetround(r) (void) fesetround (r)
#define libc_fesetroundf(r) (void) fesetround (r)
#define libc_fesetroundl(r) (void) fesetround (r)
#define libc_feholdexcept(e) (void) feholdexcept (e)
#define libc_feholdexceptf(e) (void) feholdexcept (e)
#define libc_feholdexceptl(e) (void) feholdexcept (e)
#define libc_fesetenv(e) (void) fesetenv (e)
#define libc_fesetenvf(e) (void) fesetenv (e)
#define libc_fesetenvl(e) (void) fesetenv (e)
#endif /* _MATH_PRIVATE_H_ */

View File

@ -25,9 +25,6 @@
17 (1), March 1991, pp. 26-45.
It has been slightly modified to compute 2^x instead of e^x.
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdlib.h>
#include <float.h>
#include <ieee754.h>
@ -38,13 +35,8 @@
#include "t_exp2.h"
/* XXX I know the assembler generates a warning about incorrect section
attributes. But without the attribute here the compiler places the
constants in the .data section. Ideally the constant is placed in
.rodata.cst8 so that it can be merged, but gcc sucks, it ICEs when
we try to force this section on it. --drepper */
static const volatile double TWO1023 = 8.988465674311579539e+307;
static const volatile double TWOM1000 = 9.3326361850321887899e-302;
static const double TWO1023 = 8.988465674311579539e+307;
static const double TWOM1000 = 9.3326361850321887899e-302;
double
__ieee754_exp2 (double x)
@ -72,10 +64,10 @@ __ieee754_exp2 (double x)
union ieee754_double ex2_u, scale_u;
fenv_t oldenv;
feholdexcept (&oldenv);
libc_feholdexcept (&oldenv);
#ifdef FE_TONEAREST
/* If we don't have this, it's too bad. */
fesetround (FE_TONEAREST);
libc_fesetround (FE_TONEAREST);
#endif
/* 1. Argument reduction.
@ -120,9 +112,10 @@ __ieee754_exp2 (double x)
* x + .055504110254308625)
* x + .240226506959100583)
* x + .69314718055994495) * ex2_u.d;
math_opt_barrier (x22);
/* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
fesetenv (&oldenv);
libc_fesetenv (&oldenv);
result = x22 * x + ex2_u.d;

View File

@ -24,22 +24,14 @@
#include "math.h"
#include "math_private.h"
#ifdef __STDC__
static const double
#else
static double
#endif
TWO52[2]={
4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
-4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
};
#ifdef __STDC__
double __nearbyint(double x)
#else
double __nearbyint(x)
double x;
#endif
double
__nearbyint(double x)
{
fenv_t env;
int64_t i0,sx;
@ -47,20 +39,19 @@ TWO52[2]={
EXTRACT_WORDS64(i0,x);
sx = (i0>>63)&1;
j0 = ((i0>>52)&0x7ff)-0x3ff;
if(j0<52) {
if(__builtin_expect(j0<52, 1)) {
if(j0<0) {
if((i0&UINT64_C(0x7fffffffffffffff))==0) return x;
uint64_t i = i0 & UINT64_C(0xfffffffffffff);
i0 &= UINT64_C(0xfffe000000000000);
i0 |= (((i|-i) >> 12) & UINT64_C(0x8000000000000));
INSERT_WORDS64(x,i0);
feholdexcept (&env);
libc_feholdexcept (&env);
double w = TWO52[sx]+x;
double t = w-TWO52[sx];
fesetenv (&env);
EXTRACT_WORDS64(i0,t);
INSERT_WORDS64(t,(i0&UINT64_C(0x7fffffffffffffff))|(sx<<63));
return t;
math_opt_barrier(t);
libc_fesetenv (&env);
return copysign(t, x);
} else {
uint64_t i = UINT64_C(0x000fffffffffffff)>>j0;
if((i0&i)==0) return x; /* x is integral */
@ -73,10 +64,11 @@ TWO52[2]={
else return x; /* x is integral */
}
INSERT_WORDS64(x,i0);
feholdexcept (&env);
libc_feholdexcept (&env);
double w = TWO52[sx]+x;
double t = w-TWO52[sx];
fesetenv (&env);
math_opt_barrier (t);
libc_fesetenv (&env);
return t;
}
weak_alias (__nearbyint, nearbyint)

View File

@ -118,3 +118,45 @@ do { \
__res; })
# endif
#endif
/* Specialized variants of the <fenv.h> interfaces which only handle
either the FPU or the SSE unit. */
#undef libc_fegetround
#define libc_fegetround() \
({ \
unsigned int mxcsr; \
asm volatile ("stmxcsr %0" : "=m" (*&mxcsr)); \
(mxcsr & 0x6000) >> 3; \
})
// #define libc_fegetroundf() fegetround ()
// #define libc_fegetroundl() fegetround ()
#undef libc_fesetround
#define libc_fesetround(r) \
do { \
unsigned int mxcsr; \
asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
} while (0)
// #define libc_fesetroundf(r) (void) fesetround (r)
// #define libc_fesetroundl(r) (void) fesetround (r)
#undef libc_feholdexcept
#define libc_feholdexcept(e) \
do { \
unsigned int mxcsr; \
asm ("stmxcsr %0" : "=m" (*&mxcsr)); \
(e)->__mxcsr = mxcsr; \
mxcsr = (mxcsr | 0x1f80) & ~0x3f; \
asm volatile ("ldmxcsr %0" : : "m" (*&mxcsr)); \
} while (0)
// #define libc_feholdexceptf(e) (void) feholdexcept (e)
// #define libc_feholdexceptl(e) (void) feholdexcept (e)
#undef libc_fesetenv
#define libc_fesetenv(e) \
asm volatile ("ldmxcsr %0" : : "m" ((e)->__mxcsr))
// #define libc_fesetenvf(e) (void) fesetenv (e)
// #define libc_fesetenvl(e) (void) fesetenv (e)