Implement accurate fma.

This commit is contained in:
Jakub Jelinek 2010-10-13 22:27:03 -04:00 committed by Ulrich Drepper
parent f90681487d
commit 5e908464b9
17 changed files with 467 additions and 10 deletions

View File

@ -1,3 +1,35 @@
2010-10-13 Jakub Jelinek <jakub@redhat.com>
[BZ #3268]
* math/libm-test.inc (fma_test): Some more fmaf and fma tests.
* sysdeps/i386/i686/multiarch/s_fma.c: Include ldbl-96 version
instead of dbl-64.
* sysdeps/i386/fpu/bits/mathinline.h (fma, fmaf, fmal): Remove
inlines.
* sysdeps/ieee754/ldbl-96/s_fma.c: New file.
* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Fix exponent adjustment
if one of x and y is very large and the other is subnormal.
* sysdeps/s390/fpu/s_fmaf.c: New file.
* sysdeps/s390/fpu/s_fma.c: New file.
* sysdeps/powerpc/fpu/s_fmaf.S: New file.
* sysdeps/powerpc/fpu/s_fma.S: New file.
* sysdeps/powerpc/powerpc32/fpu/s_fma.S: New file.
* sysdeps/powerpc/powerpc64/fpu/s_fma.S: New file.
* sysdeps/unix/sysv/linux/s390/fpu/s_fma.c: New file.
2010-10-12 Jakub Jelinek <jakub@redhat.com>
[BZ #3268]
* math/libm-test.inc (fma_test): Add some more fmaf tests, add
fma tests.
* sysdeps/ieee754/dbl-64/s_fmaf.c (__fmaf): Fix Inf/Nan check.
* sysdeps/ieee754/dbl-64/s_fma.c: New file.
* sysdeps/i386/i686/multiarch/s_fma.c: Include
sysdeps/ieee754/dbl-64/s_fma.c instead of math/s_fma.c.
* sysdeps/x86_64/multiarch/s_fma.c: Likewise.
* sysdeps/ieee754/ldbl-opt/s_fma.c: Likewise.
* sysdeps/ieee754/ldbl-128/s_fma.c: New file.
2010-10-12 Ulrich Drepper <drepper@redhat.com> 2010-10-12 Ulrich Drepper <drepper@redhat.com>
[BZ #12078] [BZ #12078]

View File

@ -2789,9 +2789,25 @@ fma_test (void)
TEST_fff_f (fma, minus_infty, minus_infty, minus_infty, nan_value, INVALID_EXCEPTION); TEST_fff_f (fma, minus_infty, minus_infty, minus_infty, nan_value, INVALID_EXCEPTION);
TEST_fff_f (fma, 1.25L, 0.75L, 0.0625L, 1.0L); TEST_fff_f (fma, 1.25L, 0.75L, 0.0625L, 1.0L);
#ifdef TEST_FLOAT #if defined (TEST_FLOAT) && FLT_MANT_DIG == 24
TEST_fff_f (fma, 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13); TEST_fff_f (fma, 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13);
TEST_fff_f (fma, 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20); TEST_fff_f (fma, 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20);
TEST_fff_f (fma, 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25);
TEST_fff_f (fma, 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52);
TEST_fff_f (fma, 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115);
TEST_fff_f (fma, -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127);
TEST_fff_f (fma, 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127);
#endif
#if defined (TEST_DOUBLE) && DBL_MANT_DIG == 53
TEST_fff_f (fma, 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13);
TEST_fff_f (fma, 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52);
TEST_fff_f (fma, 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0);
TEST_fff_f (fma, 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1);
TEST_fff_f (fma, 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53);
TEST_fff_f (fma, 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176);
TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011);
TEST_fff_f (fma, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023);
TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023);
#endif #endif
END (fma); END (fma);

View File

@ -1,6 +1,6 @@
/* Inline math functions for i387. /* Inline math functions for i387.
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2003,2004,2006,2007,2009 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2003,2004,2006,2007,2009,
Free Software Foundation, Inc. 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by John C. Bowman <bowman@math.ualberta.ca>, 1995. Contributed by John C. Bowman <bowman@math.ualberta.ca>, 1995.
@ -657,8 +657,6 @@ __NTH (ldexpl (long double __x, int __y))
__ldexp_code; __ldexp_code;
} }
__inline_mathcodeNP3 (fma, __x, __y, __z, return (__x * __y) + __z)
__inline_mathopNP (rint, "frndint") __inline_mathopNP (rint, "frndint")
# endif /* __FAST_MATH__ */ # endif /* __FAST_MATH__ */

View File

@ -33,4 +33,4 @@ weak_alias (__fma, fma)
# define __fma __fma_ia32 # define __fma __fma_ia32
#endif #endif
#include <math/s_fma.c> #include <sysdeps/ieee754/ldbl-96/s_fma.c>

View File

@ -0,0 +1,146 @@
/* Compute x * y + z as ternary operation.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
/* This implementation uses rounding to odd to avoid problems with
double rounding. See a paper by Boldo and Melquiond:
http://www.lri.fr/~melquion/doc/08-tc.pdf */
double
__fma (double x, double y, double z)
{
union ieee754_double u, v, w;
int adjust = 0;
u.d = x;
v.d = y;
w.d = z;
if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
|| __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
|| __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
|| __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0))
{
/* If x or y or z is Inf/NaN or if fma will certainly overflow,
compute as x * y + z. */
if (u.ieee.exponent == 0x7ff
|| v.ieee.exponent == 0x7ff
|| w.ieee.exponent == 0x7ff
|| u.ieee.exponent + v.ieee.exponent
> 0x7ff + IEEE754_DOUBLE_BIAS)
return x * y + z;
if (u.ieee.exponent + v.ieee.exponent
>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
{
/* Compute 1p-53 times smaller result and multiply
at the end. */
if (u.ieee.exponent > v.ieee.exponent)
u.ieee.exponent -= DBL_MANT_DIG;
else
v.ieee.exponent -= DBL_MANT_DIG;
/* If x + y exponent is very large and z exponent is very small,
it doesn't matter if we don't adjust it. */
if (w.ieee.exponent > DBL_MANT_DIG)
w.ieee.exponent -= DBL_MANT_DIG;
adjust = 1;
}
else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
{
/* Similarly.
If z exponent is very large and x and y exponents are
very small, it doesn't matter if we don't adjust it. */
if (u.ieee.exponent > v.ieee.exponent)
{
if (u.ieee.exponent > DBL_MANT_DIG)
u.ieee.exponent -= DBL_MANT_DIG;
}
else if (v.ieee.exponent > DBL_MANT_DIG)
v.ieee.exponent -= DBL_MANT_DIG;
w.ieee.exponent -= DBL_MANT_DIG;
adjust = 1;
}
else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
{
u.ieee.exponent -= DBL_MANT_DIG;
if (v.ieee.exponent)
v.ieee.exponent += DBL_MANT_DIG;
else
v.d *= 0x1p53;
}
else
{
v.ieee.exponent -= DBL_MANT_DIG;
if (u.ieee.exponent)
u.ieee.exponent += DBL_MANT_DIG;
else
u.d *= 0x1p53;
}
x = u.d;
y = v.d;
z = w.d;
}
/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
double x1 = x * C;
double y1 = y * C;
double m1 = x * y;
x1 = (x - x1) + x1;
y1 = (y - y1) + y1;
double x2 = x - x1;
double y2 = y - y1;
double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
double a1 = z + m1;
double t1 = a1 - z;
double t2 = a1 - t1;
t1 = m1 - t1;
t2 = z - t2;
double a2 = t1 + t2;
fenv_t env;
feholdexcept (&env);
fesetround (FE_TOWARDZERO);
/* Perform m2 + a2 addition with round to odd. */
u.d = a2 + m2;
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
feupdateenv (&env);
/* Add that to a1. */
a1 = a1 + u.d;
/* And adjust exponent if needed. */
if (__builtin_expect (adjust, 0))
a1 *= 0x1p53;
return a1;
}
#ifndef __fma
weak_alias (__fma, fma)
#endif
#ifdef NO_LONG_DOUBLE
strong_alias (__fma, __fmal)
weak_alias (__fmal, fmal)
#endif

View File

@ -39,7 +39,7 @@ __fmaf (float x, float y, float z)
fesetround (FE_TOWARDZERO); fesetround (FE_TOWARDZERO);
/* Perform addition with round to odd. */ /* Perform addition with round to odd. */
u.d = temp + (double) z; u.d = temp + (double) z;
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0xff) if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0; u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
feupdateenv (&env); feupdateenv (&env);
/* And finally truncation with round to nearest. */ /* And finally truncation with round to nearest. */

View File

@ -0,0 +1,50 @@
/* Compute x * y + z as ternary operation.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
/* This implementation relies on long double being more than twice as
precise as double and uses rounding to odd in order to avoid problems
with double rounding.
See a paper by Boldo and Melquiond:
http://www.lri.fr/~melquion/doc/08-tc.pdf */
double
__fma (double x, double y, double z)
{
fenv_t env;
/* Multiplication is always exact. */
long double temp = (long double) x * (long double) y;
union ieee854_long_double u;
feholdexcept (&env);
fesetround (FE_TOWARDZERO);
/* Perform addition with round to odd. */
u.d = temp + (long double) z;
if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
feupdateenv (&env);
/* And finally truncation with round to nearest. */
return (double) u.d;
}
#ifndef __fma
weak_alias (__fma, fma)
#endif

View File

@ -0,0 +1,70 @@
/* Compute x * y + z as ternary operation.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
/* This implementation uses rounding to odd to avoid problems with
double rounding. See a paper by Boldo and Melquiond:
http://www.lri.fr/~melquion/doc/08-tc.pdf */
double
__fma (double x, double y, double z)
{
/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
long double x1 = x * C;
long double y1 = y * C;
long double m1 = x * y;
x1 = (x - x1) + x1;
y1 = (y - y1) + y1;
long double x2 = x - x1;
long double y2 = y - y1;
long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
long double a1 = z + m1;
long double t1 = a1 - z;
long double t2 = a1 - t1;
t1 = m1 - t1;
t2 = z - t2;
long double a2 = t1 + t2;
fenv_t env;
feholdexcept (&env);
fesetround (FE_TOWARDZERO);
/* Perform m2 + a2 addition with round to odd. */
a2 = a2 + m2;
/* Add that to a1 again using rounding to odd. */
union ieee854_long_double u;
u.d = a1 + a2;
if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
feupdateenv (&env);
/* Add finally round to double precision. */
return u.d;
}
#ifndef __fma
weak_alias (__fma, fma)
#endif

View File

@ -1,5 +1,5 @@
#include <math_ldbl_opt.h> #include <math_ldbl_opt.h>
#include <math/s_fma.c> #include <sysdeps/ieee754/dbl-64/s_fma.c>
#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) #if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
compat_symbol (libm, __fma, fmal, GLIBC_2_1); compat_symbol (libm, __fma, fmal, GLIBC_2_1);
#endif #endif

View File

@ -0,0 +1,33 @@
/* Compute x * y + z as ternary operation. PowerPC version.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
ENTRY(__fma)
/* double [f1] fma (double [f1] x, double [f2] y, double [f3] z); */
fmadd fp1,fp1,fp2,fp3
blr
END(__fma)
weak_alias (__fma,fma)
#ifdef NO_LONG_DOUBLE
weak_alias (__fma,__fmal)
weak_alias (__fma,fmal)
#endif

View File

@ -0,0 +1,28 @@
/* Compute x * y + z as ternary operation. PowerPC version.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
ENTRY(__fmaf)
/* float [f1] fmaf (float [f1] x, float [f2] y, float [f3] z); */
fmadds fp1,fp1,fp2,fp3
blr
END(__fmaf)
weak_alias (__fmaf,fmaf)

View File

@ -0,0 +1,5 @@
#include <math_ldbl_opt.h>
#include <sysdeps/powerpc/fpu/s_fma.S>
#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
compat_symbol (libm, __fma, fmal, GLIBC_2_1)
#endif

View File

@ -0,0 +1,5 @@
#include <math_ldbl_opt.h>
#include <sysdeps/powerpc/fpu/s_fma.S>
#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
compat_symbol (libm, __fma, fmal, GLIBC_2_1)
#endif

37
sysdeps/s390/fpu/s_fma.c Normal file
View File

@ -0,0 +1,37 @@
/* Compute x * y + z as ternary operation. S/390 version.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <math.h>
double
__fma (double x, double y, double z)
{
double r;
asm ("madbr %0,%1,%2" : "=f" (r) : "%f" (x), "fR" (y), "0" (z));
return r;
}
#ifndef __fma
weak_alias (__fma, fma)
#endif
#ifdef NO_LONG_DOUBLE
strong_alias (__fma, __fmal)
weak_alias (__fmal, fmal)
#endif

32
sysdeps/s390/fpu/s_fmaf.c Normal file
View File

@ -0,0 +1,32 @@
/* Compute x * y + z as ternary operation. S/390 version.
Copyright (C) 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <math.h>
float
__fmaf (float x, float y, float z)
{
float r;
asm ("maebr %0,%1,%2" : "=f" (r) : "%f" (x), "fR" (y), "0" (z));
return r;
}
#ifndef __fmaf
weak_alias (__fmaf, fmaf)
#endif

View File

@ -0,0 +1,5 @@
#include <math_ldbl_opt.h>
#include <sysdeps/s390/fpu/s_fma.c>
#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
compat_symbol (libm, __fma, fmal, GLIBC_2_1);
#endif

View File

@ -1,5 +1,5 @@
/* FMA version of fma. /* FMA version of fma.
Copyright (C) 2009 Free Software Foundation, Inc. Copyright (C) 2009, 2010 Free Software Foundation, Inc.
Contributed by Intel Corporation. Contributed by Intel Corporation.
This file is part of the GNU C Library. This file is part of the GNU C Library.
@ -40,4 +40,4 @@ weak_alias (__fma, fma)
# define __fma __fma_sse2 # define __fma __fma_sse2
#endif #endif
#include <math/s_fma.c> #include <sysdeps/ieee754/dbl-64/s_fma.c>