mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-26 06:50:07 +00:00
Create and use libc_feupdateenv_test.
We can reduce the number of STMXCSR, and often we can avoid the call to __feraiseexcept.
This commit is contained in:
parent
eb92c487b3
commit
0fe0f1f86f
@ -1,5 +1,14 @@
|
||||
2012-03-19 Richard Henderson <rth@twiddle.net>
|
||||
|
||||
* sysdeps/generic/math_private.h (default_libc_feupdateenv_test): New.
|
||||
(libc_feupdateenv_test, libc_feupdateenv_testf): New.
|
||||
(libc_feupdateenv_testl): New.
|
||||
* sysdeps/x86_64/fpu/math_private.h (libc_feupdateenv_test): New.
|
||||
(libc_feupdateenv_testf): New.
|
||||
(libc_feupdateenv): Use libc_feupdateenv_test.
|
||||
* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use libc_feupdateenv_test.
|
||||
* sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Likewise.
|
||||
|
||||
* sysdeps/generic/math_private.h (libc_feholdsetround): New.
|
||||
(libc_feholdsetroundf, libc_feholdsetroundl): New.
|
||||
(libc_feresetround, libc_feresetroundf, libc_feresetroundl): New.
|
||||
|
@ -457,6 +457,24 @@ default_libc_feupdateenv (fenv_t *e)
|
||||
# define libc_feupdateenv_53bit libc_feupdateenv
|
||||
#endif
|
||||
|
||||
static __always_inline int
|
||||
default_libc_feupdateenv_test (fenv_t *e, int ex)
|
||||
{
|
||||
int ret = fetestexcept (ex);
|
||||
feupdateenv (e);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef libc_feupdateenv_test
|
||||
# define libc_feupdateenv_test default_libc_feupdateenv_test
|
||||
#endif
|
||||
#ifndef libc_feupdateenv_testf
|
||||
# define libc_feupdateenv_testf default_libc_feupdateenv_test
|
||||
#endif
|
||||
#ifndef libc_feupdateenv_testl
|
||||
# define libc_feupdateenv_testl default_libc_feupdateenv_test
|
||||
#endif
|
||||
|
||||
/* Save and set the rounding mode. The use of fenv_t to store the old mode
|
||||
allows a target-specific version of this function to avoid converting the
|
||||
rounding mode from the fpu format. By default we have no choice but to
|
||||
|
@ -149,35 +149,36 @@ __fma (double x, double y, double z)
|
||||
|
||||
fenv_t env;
|
||||
libc_feholdexcept_setround (&env, FE_TOWARDZERO);
|
||||
|
||||
/* Perform m2 + a2 addition with round to odd. */
|
||||
u.d = a2 + m2;
|
||||
|
||||
if (__builtin_expect (adjust < 0, 0))
|
||||
{
|
||||
if ((u.ieee.mantissa1 & 1) == 0)
|
||||
u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
|
||||
v.d = a1 + u.d;
|
||||
}
|
||||
|
||||
/* Reset rounding mode and test for inexact simultaneously. */
|
||||
int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
|
||||
|
||||
if (__builtin_expect (adjust == 0, 1))
|
||||
{
|
||||
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
|
||||
u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
|
||||
libc_feupdateenv (&env);
|
||||
u.ieee.mantissa1 |= j;
|
||||
/* Result is a1 + u.d. */
|
||||
return a1 + u.d;
|
||||
}
|
||||
else if (__builtin_expect (adjust > 0, 1))
|
||||
{
|
||||
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
|
||||
u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
|
||||
libc_feupdateenv (&env);
|
||||
u.ieee.mantissa1 |= j;
|
||||
/* Result is a1 + u.d, scaled up. */
|
||||
return (a1 + u.d) * 0x1p53;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((u.ieee.mantissa1 & 1) == 0)
|
||||
u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
|
||||
v.d = a1 + u.d;
|
||||
int j = libc_fetestexcept (FE_INEXACT) != 0;
|
||||
libc_feupdateenv (&env);
|
||||
/* Ensure the following computations are performed in default rounding
|
||||
mode instead of just reusing the round to zero computation. */
|
||||
asm volatile ("" : "=m" (u) : "m" (u));
|
||||
/* If a1 + u.d is exact, the only rounding happens during
|
||||
scaling down. */
|
||||
if (j == 0)
|
||||
|
@ -35,12 +35,18 @@ __fmaf (float x, float y, float z)
|
||||
/* Multiplication is always exact. */
|
||||
double temp = (double) x * (double) y;
|
||||
union ieee754_double u;
|
||||
libc_feholdexcept_setroundf (&env, FE_TOWARDZERO);
|
||||
|
||||
libc_feholdexcept_setround (&env, FE_TOWARDZERO);
|
||||
|
||||
/* Perform addition with round to odd. */
|
||||
u.d = temp + (double) z;
|
||||
|
||||
/* Reset rounding mode and test for inexact simultaneously. */
|
||||
int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
|
||||
|
||||
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
|
||||
u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
|
||||
libc_feupdateenv (&env);
|
||||
u.ieee.mantissa1 |= j;
|
||||
|
||||
/* And finally truncation with round to nearest. */
|
||||
return (float) u.d;
|
||||
}
|
||||
|
@ -108,13 +108,33 @@ libc_fesetenv (fenv_t *e)
|
||||
#define libc_fesetenv libc_fesetenv
|
||||
#define libc_fesetenvf libc_fesetenv
|
||||
|
||||
static __always_inline int
|
||||
libc_feupdateenv_test (fenv_t *e, int ex)
|
||||
{
|
||||
unsigned int mxcsr, old_mxcsr, cur_ex;
|
||||
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
|
||||
cur_ex = mxcsr & FE_ALL_EXCEPT;
|
||||
|
||||
/* Merge current exceptions with the old environment. */
|
||||
old_mxcsr = e->__mxcsr;
|
||||
mxcsr = old_mxcsr | cur_ex;
|
||||
asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
|
||||
|
||||
/* Raise SIGFPE for any new exceptions since the hold. Expect that
|
||||
the normal environment has all exceptions masked. */
|
||||
if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
|
||||
__feraiseexcept (cur_ex);
|
||||
|
||||
/* Test for exceptions raised since the hold. */
|
||||
return cur_ex & ex;
|
||||
}
|
||||
#define libc_feupdateenv_test libc_feupdateenv_test
|
||||
#define libc_feupdateenv_testf libc_feupdateenv_test
|
||||
|
||||
static __always_inline void
|
||||
libc_feupdateenv (fenv_t *e)
|
||||
{
|
||||
unsigned int mxcsr;
|
||||
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
|
||||
asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));
|
||||
__feraiseexcept (mxcsr & FE_ALL_EXCEPT);
|
||||
libc_feupdateenv_test (e, 0);
|
||||
}
|
||||
#define libc_feupdateenv libc_feupdateenv
|
||||
#define libc_feupdateenvf libc_feupdateenv
|
||||
|
Loading…
Reference in New Issue
Block a user