glibc/sysdeps/ieee754/ldbl-96/s_fma.c

/* Compute x * y + z as ternary operation.
   Copyright (C) 2010-2021 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include <float.h>
#include <math.h>
#include <fenv.h>
#include <ieee754.h>
#include <math-barriers.h>
#include <libm-alias-double.h>

/* This implementation uses rounding to odd to avoid problems with
   double rounding.  See a paper by Boldo and Melquiond:
   http://www.lri.fr/~melquion/doc/08-tc.pdf  */

double
__fma (double x, double y, double z)
{
  if (__glibc_unlikely (!isfinite (x) || !isfinite (y)))
    return x * y + z;
  else if (__glibc_unlikely (!isfinite (z)))
    /* If z is Inf, but x and y are finite, the result should be z
       rather than NaN.  */
    return (z + x) + y;

  /* Ensure correct sign of exact 0 + 0.  */
  if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
    {
      x = math_opt_barrier (x);
      return x * y + z;
    }

  fenv_t env;
  feholdexcept (&env);
  fesetround (FE_TONEAREST);

  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
  long double x1 = (long double) x * C;
  long double y1 = (long double) y * C;
  long double m1 = (long double) x * y;
  x1 = (x - x1) + x1;
  y1 = (y - y1) + y1;
  long double x2 = x - x1;
  long double y2 = y - y1;
  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;

  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
  long double a1 = z + m1;
  long double t1 = a1 - z;
  long double t2 = a1 - t1;
  t1 = m1 - t1;
  t2 = z - t2;
  long double a2 = t1 + t2;
  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
  math_force_eval (m2);
  math_force_eval (a2);
  feclearexcept (FE_INEXACT);

  /* If the result is an exact zero, ensure it has the correct sign.  */
  if (a1 == 0 && m2 == 0)
    {
      feupdateenv (&env);
      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
      z = math_opt_barrier (z);
      return z + m1;
    }

  fesetround (FE_TOWARDZERO);
  /* Perform m2 + a2 addition with round to odd.  */
  a2 = a2 + m2;

  /* Add that to a1 again using rounding to odd.  */
  union ieee854_long_double u;
  u.d = a1 + a2;
  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
    u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
  feupdateenv (&env);

  /* Add finally round to double precision.  */
  return u.d;
}
#ifndef __fma
libm_alias_double (__fma, fma)
#endif
Implement accurate fma. 2010-10-14 02:27:03 +00:00			`/* Compute x * y + z as ternary operation.`
Update copyright dates with scripts/update-copyrights I used these shell commands: ../glibc/scripts/update-copyrights $PWD/../gnulib/build-aux/update-copyright (cd ../glibc && git commit -am"[this commit message]") and then ignored the output, which consisted lines saying "FOO: warning: copyright statement not found" for each of 6694 files FOO. I then removed trailing white space from benchtests/bench-pthread-locks.c and iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c, to work around this diagnostic from Savannah: remote: * pre-commit check failed ... remote: * error: lines with trailing whitespace found remote: error: hook declined to update refs/heads/master 2021-01-02 19:32:25 +00:00			`Copyright (C) 2010-2021 Free Software Foundation, Inc.`
Implement accurate fma. 2010-10-14 02:27:03 +00:00			`This file is part of the GNU C Library.`
			`Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.`

			`The GNU C Library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`

			`The GNU C Library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
Replace FSF snail mail address with URLs. 2012-02-09 23:18:22 +00:00			`License along with the GNU C Library; if not, see`
Prefer https to http for gnu.org and fsf.org URLs Also, change sources.redhat.com to sourceware.org. This patch was automatically generated by running the following shell script, which uses GNU sed, and which avoids modifying files imported from upstream: sed -ri ' s,(http\|ftp)(://(.\.)?(gnu\|fsf\|sourceware)\.org($\|[^.]\|\.[^a-z])),https\2,g s,(http\|ftp)(://(.\.)?)sources\.redhat\.com($\|[^.]\|\.[^a-z]),https\2sourceware.org\4,g ' \ $(find $(git ls-files) -prune -type f \ ! -name '.po' \ ! -name 'ChangeLog' \ ! -path COPYING ! -path COPYING.LIB \ ! -path manual/fdl-1.3.texi ! -path manual/lgpl-2.1.texi \ ! -path manual/texinfo.tex ! -path scripts/config.guess \ ! -path scripts/config.sub ! -path scripts/install-sh \ ! -path scripts/mkinstalldirs ! -path scripts/move-if-change \ ! -path INSTALL ! -path locale/programs/charmap-kw.h \ ! -path po/libc.pot ! -path sysdeps/gnu/errlist.c \ ! '(' -name configure \ -execdir test -f configure.ac -o -f configure.in ';' ')' \ ! '(' -name preconfigure \ -execdir test -f preconfigure.ac ';' ')' \ -print) and then by running 'make dist-prepare' to regenerate files built from the altered files, and then executing the following to cleanup: chmod a+x sysdeps/unix/sysv/linux/riscv/configure # Omit irrelevant whitespace and comment-only changes, # perhaps from a slightly-different Autoconf version. git checkout -f \ sysdeps/csky/configure \ sysdeps/hppa/configure \ sysdeps/riscv/configure \ sysdeps/unix/sysv/linux/csky/configure # Omit changes that caused a pre-commit check to fail like this: # remote: * error: sysdeps/powerpc/powerpc64/ppc-mcount.S: trailing lines git checkout -f \ sysdeps/powerpc/powerpc64/ppc-mcount.S \ sysdeps/unix/sysv/linux/s390/s390-64/syscall.S # Omit change that caused a pre-commit check to fail like this: # remote: * error: sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: last line does not end in newline git checkout -f sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S 2019-09-07 05:40:42 +00:00			`<https://www.gnu.org/licenses/>. */`
Implement accurate fma. 2010-10-14 02:27:03 +00:00
			`#include <float.h>`
			`#include <math.h>`
			`#include <fenv.h>`
			`#include <ieee754.h>`
Do not include math-barriers.h in math_private.h. This patch continues the math_private.h cleanup by stopping math_private.h from including math-barriers.h and making the users of the barrier macros include the latter header directly. No attempt is made to remove any math_private.h includes that are now unused, except in strtod_l.c where that is done to avoid line number changes in assertions, so that installed stripped shared libraries can be compared before and after the patch. (I think the floating-point environment support in math_private.h should also move out - some architectures already have fenv_private.h as an architecture-internal header included from their math_private.h - and after moving that out might be a better time to identify unused math_private.h includes.) Tested for x86_64 and x86, and tested with build-many-glibcs.py that installed stripped shared libraries are unchanged by the patch. * sysdeps/generic/math_private.h: Do not include <math-barriers.h>. * stdlib/strtod_l.c: Include <math-barriers.h> instead of <math_private.h>. * math/fromfp.h: Include <math-barriers.h>. * math/math-narrow.h: Likewise. * math/s_nextafter.c: Likewise. * math/s_nexttowardf.c: Likewise. * sysdeps/aarch64/fpu/s_llrint.c: Likewise. * sysdeps/aarch64/fpu/s_llrintf.c: Likewise. * sysdeps/aarch64/fpu/s_lrint.c: Likewise. * sysdeps/aarch64/fpu/s_lrintf.c: Likewise. * sysdeps/i386/fpu/s_nextafterl.c: Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Likewise. * sysdeps/i386/fpu/s_nexttowardf.c: Likewise. * sysdeps/ieee754/dbl-64/e_atan2.c: Likewise. * sysdeps/ieee754/dbl-64/e_atanh.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp.c: Likewise. * sysdeps/ieee754/dbl-64/e_exp2.c: Likewise. * sysdeps/ieee754/dbl-64/e_j0.c: Likewise. * sysdeps/ieee754/dbl-64/e_sqrt.c: Likewise. * sysdeps/ieee754/dbl-64/s_expm1.c: Likewise. * sysdeps/ieee754/dbl-64/s_fma.c: Likewise. * sysdeps/ieee754/dbl-64/s_fmaf.c: Likewise. * sysdeps/ieee754/dbl-64/s_log1p.c: Likewise. * sysdeps/ieee754/dbl-64/s_nearbyint.c: Likewise. * sysdeps/ieee754/dbl-64/wordsize-64/s_nearbyint.c: Likewise. * sysdeps/ieee754/flt-32/e_atanhf.c: Likewise. * sysdeps/ieee754/flt-32/e_j0f.c: Likewise. * sysdeps/ieee754/flt-32/s_expm1f.c: Likewise. * sysdeps/ieee754/flt-32/s_log1pf.c: Likewise. * sysdeps/ieee754/flt-32/s_nearbyintf.c: Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c: Likewise. * sysdeps/ieee754/k_standardl.c: Likewise. * sysdeps/ieee754/ldbl-128/e_asinl.c: Likewise. * sysdeps/ieee754/ldbl-128/e_expl.c: Likewise. * sysdeps/ieee754/ldbl-128/e_powl.c: Likewise. * sysdeps/ieee754/ldbl-128/s_fmal.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nearbyintl.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nextafterl.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c: Likewise. * sysdeps/ieee754/ldbl-128/s_nexttowardf.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/e_asinl.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_fmal.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Likewise. * sysdeps/ieee754/ldbl-128ibm/s_rintl.c: Likewise. * sysdeps/ieee754/ldbl-96/e_atanhl.c: Likewise. * sysdeps/ieee754/ldbl-96/e_j0l.c: Likewise. * sysdeps/ieee754/ldbl-96/s_fma.c: Likewise. * sysdeps/ieee754/ldbl-96/s_fmal.c: Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c: Likewise. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Likewise. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Likewise. * sysdeps/m68k/m680x0/fpu/s_nextafterl.c: Likewise. 2018-05-11 15:11:38 +00:00			`#include <math-barriers.h>`
Use libm_alias_double in ldbl-128, ldbl-96 fma. This patch makes the ldbl-128 and ldbl-96 implementations of fma use libm_alias_double. Tested for x86_64, and tested with build-many-glibcs.py that installed stripped shared libraries are unchanged by the patch. * sysdeps/ieee754/ldbl-128/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. * sysdeps/ieee754/ldbl-96/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. 2017-10-06 20:23:58 +00:00			`#include <libm-alias-double.h>`
Implement accurate fma. 2010-10-14 02:27:03 +00:00
			`/* This implementation uses rounding to odd to avoid problems with`
			`double rounding. See a paper by Boldo and Melquiond:`
			`http://www.lri.fr/~melquion/doc/08-tc.pdf */`

			`double`
			`__fma (double x, double y, double z)`
			`{`
Fix ldbl-96 fma (Inf, Inf, finite) (bug 23272). As reported in bug 23272, the ldbl-96 implementation of fma (fma for double, in terms of ldbl-96 as the internal arithmetic type, as used on 32-bit x86) is missing some of the special-case handling for non-finite arguments, resulting in incorrect NaN results when the first two arguments are infinities, the third is finite and so the infinities go through the logic for finite arguments. This patch fixes it by handling all cases of non-finite arguments up front, with additional fma tests for the problem cases being added to the testsuite. Tested for x86_64 and x86. [BZ #23272] * sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Start by handling all cases of non-finite arguments. * math/libm-test-fma.inc (fma_test_data): Add more tests. 2018-06-11 16:33:42 +00:00			`if (__glibc_unlikely (!isfinite (x) \|\| !isfinite (y)))`
			`return x * y + z;`
			`else if (__glibc_unlikely (!isfinite (z)))`
			`/* If z is Inf, but x and y are finite, the result should be z`
			`rather than NaN. */`
			`return (z + x) + y;`
Implement fmal, some fma bugfixes 2010-10-15 19:26:06 +00:00
Fix sign of exact zero return from fma (bug 14638). 2012-09-29 18:31:54 +00:00			`/* Ensure correct sign of exact 0 + 0. */`
Use glibc_likely instead __builtin_expect. 2014-02-10 13:45:42 +00:00			`if (__glibc_unlikely ((x == 0 \|\| y == 0) && z == 0))`
Call math_opt_barrier inside if Since floating-point operation may trigger floating-point exceptions, we call math_opt_barrier inside if to prevent code motion. [BZ #19465] * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Call math_opt_barrier inside if. * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise. * sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Likewise. * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise. 2016-01-15 13:22:59 +00:00			`{`
			`x = math_opt_barrier (x);`
			`return x * y + z;`
			`}`
Fix sign of exact zero return from fma (bug 14638). 2012-09-29 18:31:54 +00:00
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 19:48:53 +00:00			`fenv_t env;`
			`feholdexcept (&env);`
			`fesetround (FE_TONEAREST);`

Implement accurate fma. 2010-10-14 02:27:03 +00:00			`/* Multiplication m1 + m2 = x * y using Dekker's algorithm. */`
			`#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)`
Implement fmal, some fma bugfixes 2010-10-15 19:26:06 +00:00			`long double x1 = (long double) x * C;`
			`long double y1 = (long double) y * C;`
			`long double m1 = (long double) x * y;`
Implement accurate fma. 2010-10-14 02:27:03 +00:00			`x1 = (x - x1) + x1;`
			`y1 = (y - y1) + y1;`
			`long double x2 = x - x1;`
			`long double y2 = y - y1;`
			`long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;`

			`/* Addition a1 + a2 = z + m1 using Knuth's algorithm. */`
			`long double a1 = z + m1;`
			`long double t1 = a1 - z;`
			`long double t2 = a1 - t1;`
			`t1 = m1 - t1;`
			`t2 = z - t2;`
			`long double a2 = t1 + t2;`
Force eval for fma implementations 2014-08-01 22:13:50 +00:00			`/* Ensure the arithmetic is not scheduled after feclearexcept call. */`
			`math_force_eval (m2);`
			`math_force_eval (a2);`
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 19:48:53 +00:00			`feclearexcept (FE_INEXACT);`

Force eval for fma implementations 2014-08-01 22:13:50 +00:00			`/* If the result is an exact zero, ensure it has the correct sign. */`
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 19:48:53 +00:00			`if (a1 == 0 && m2 == 0)`
			`{`
			`feupdateenv (&env);`
Force eval for fma implementations 2014-08-01 22:13:50 +00:00			`/* Ensure that round-to-nearest value of z + m1 is not reused. */`
			`z = math_opt_barrier (z);`
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796). 2012-11-03 19:48:53 +00:00			`return z + m1;`
			`}`
Implement accurate fma. 2010-10-14 02:27:03 +00:00
			`fesetround (FE_TOWARDZERO);`
			`/* Perform m2 + a2 addition with round to odd. */`
			`a2 = a2 + m2;`

			`/* Add that to a1 again using rounding to odd. */`
			`union ieee854_long_double u;`
			`u.d = a1 + a2;`
			`if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)`
			`u.ieee.mantissa1 \|= fetestexcept (FE_INEXACT) != 0;`
			`feupdateenv (&env);`

			`/* Add finally round to double precision. */`
			`return u.d;`
			`}`
			`#ifndef __fma`
Use libm_alias_double in ldbl-128, ldbl-96 fma. This patch makes the ldbl-128 and ldbl-96 implementations of fma use libm_alias_double. Tested for x86_64, and tested with build-many-glibcs.py that installed stripped shared libraries are unchanged by the patch. * sysdeps/ieee754/ldbl-128/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. * sysdeps/ieee754/ldbl-96/s_fma.c: Include <libm-alias-double.h>. [!__fma] (fma): Define using libm_alias_double. 2017-10-06 20:23:58 +00:00			`libm_alias_double (__fma, fma)`
Implement accurate fma. 2010-10-14 02:27:03 +00:00			`#endif`