2004-05-26 04:47:00 +00:00
|
|
|
/* Double-precision floating point square root.
|
2016-01-04 16:05:18 +00:00
|
|
|
Copyright (C) 1997-2016 Free Software Foundation, Inc.
|
2004-05-26 04:47:00 +00:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-02-09 23:18:22 +00:00
|
|
|
License along with the GNU C Library; if not, see
|
|
|
|
<http://www.gnu.org/licenses/>. */
|
2004-05-26 04:47:00 +00:00
|
|
|
|
|
|
|
#include <math.h>
|
|
|
|
#include <math_private.h>
|
|
|
|
#include <fenv_libc.h>
|
|
|
|
#include <inttypes.h>
|
2013-05-01 15:46:34 +00:00
|
|
|
#include <stdint.h>
|
2004-05-26 04:47:00 +00:00
|
|
|
#include <sysdep.h>
|
|
|
|
#include <ldsodefs.h>
|
|
|
|
|
2015-01-27 18:16:39 +00:00
|
|
|
#ifndef _ARCH_PPCSQ
|
2004-05-26 04:47:00 +00:00
|
|
|
static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */
|
|
|
|
static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
|
|
|
|
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
|
|
|
|
static const float two108 = 3.245185536584267269e+32;
|
|
|
|
static const float twom54 = 5.551115123125782702e-17;
|
|
|
|
extern const float __t_sqrt[1024];
|
|
|
|
|
|
|
|
/* The method is based on a description in
|
|
|
|
Computation of elementary functions on the IBM RISC System/6000 processor,
|
|
|
|
P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
|
2009-11-06 17:33:27 +00:00
|
|
|
Basically, it consists of two interleaved Newton-Raphson approximations,
|
2004-05-26 04:47:00 +00:00
|
|
|
one to find the actual square root, and one to find its reciprocal
|
|
|
|
without the expense of a division operation. The tricky bit here
|
|
|
|
is the use of the POWER/PowerPC multiply-add operation to get the
|
|
|
|
required accuracy with high speed.
|
|
|
|
|
|
|
|
The argument reduction works by a combination of table lookup to
|
|
|
|
obtain the initial guesses, and some careful modification of the
|
|
|
|
generated guesses (which mostly runs on the integer unit, while the
|
2009-11-06 17:33:27 +00:00
|
|
|
Newton-Raphson is running on the FPU). */
|
2004-05-26 04:47:00 +00:00
|
|
|
|
|
|
|
double
|
|
|
|
__slow_ieee754_sqrt (double x)
|
|
|
|
{
|
|
|
|
const float inf = a_inf.value;
|
|
|
|
|
|
|
|
if (x > 0)
|
|
|
|
{
|
|
|
|
/* schedule the EXTRACT_WORDS to get separation between the store
|
2011-10-12 15:27:51 +00:00
|
|
|
and the load. */
|
2004-05-26 04:47:00 +00:00
|
|
|
ieee_double_shape_type ew_u;
|
|
|
|
ieee_double_shape_type iw_u;
|
|
|
|
ew_u.value = (x);
|
|
|
|
if (x != inf)
|
|
|
|
{
|
|
|
|
/* Variables named starting with 's' exist in the
|
|
|
|
argument-reduced space, so that 2 > sx >= 0.5,
|
|
|
|
1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
|
|
|
|
Variables named ending with 'i' are integer versions of
|
|
|
|
floating-point values. */
|
|
|
|
double sx; /* The value of which we're trying to find the
|
|
|
|
square root. */
|
|
|
|
double sg, g; /* Guess of the square root of x. */
|
|
|
|
double sd, d; /* Difference between the square of the guess and x. */
|
|
|
|
double sy; /* Estimate of 1/2g (overestimated by 1ulp). */
|
|
|
|
double sy2; /* 2*sy */
|
|
|
|
double e; /* Difference between y*g and 1/2 (se = e * fsy). */
|
|
|
|
double shx; /* == sx * fsg */
|
|
|
|
double fsg; /* sg*fsg == g. */
|
|
|
|
fenv_t fe; /* Saved floating-point environment (stores rounding
|
|
|
|
mode and whether the inexact exception is
|
|
|
|
enabled). */
|
|
|
|
uint32_t xi0, xi1, sxi, fsgi;
|
|
|
|
const float *t_sqrt;
|
|
|
|
|
|
|
|
fe = fegetenv_register ();
|
|
|
|
/* complete the EXTRACT_WORDS (xi0,xi1,x) operation. */
|
|
|
|
xi0 = ew_u.parts.msw;
|
|
|
|
xi1 = ew_u.parts.lsw;
|
|
|
|
relax_fenv_state ();
|
|
|
|
sxi = (xi0 & 0x3fffffff) | 0x3fe00000;
|
|
|
|
/* schedule the INSERT_WORDS (sx, sxi, xi1) to get separation
|
|
|
|
between the store and the load. */
|
|
|
|
iw_u.parts.msw = sxi;
|
|
|
|
iw_u.parts.lsw = xi1;
|
|
|
|
t_sqrt = __t_sqrt + (xi0 >> (52 - 32 - 8 - 1) & 0x3fe);
|
|
|
|
sg = t_sqrt[0];
|
|
|
|
sy = t_sqrt[1];
|
|
|
|
/* complete the INSERT_WORDS (sx, sxi, xi1) operation. */
|
|
|
|
sx = iw_u.value;
|
|
|
|
|
2009-11-06 17:33:27 +00:00
|
|
|
/* Here we have three Newton-Raphson iterations each of a
|
2004-05-26 04:47:00 +00:00
|
|
|
division and a square root and the remainder of the
|
|
|
|
argument reduction, all interleaved. */
|
2015-02-12 23:05:37 +00:00
|
|
|
sd = -__builtin_fma (sg, sg, -sx);
|
2004-05-26 04:47:00 +00:00
|
|
|
fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000;
|
|
|
|
sy2 = sy + sy;
|
2015-02-12 23:05:37 +00:00
|
|
|
sg = __builtin_fma (sy, sd, sg); /* 16-bit approximation to
|
|
|
|
sqrt(sx). */
|
2004-05-26 04:47:00 +00:00
|
|
|
|
|
|
|
/* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation
|
|
|
|
between the store and the load. */
|
|
|
|
INSERT_WORDS (fsg, fsgi, 0);
|
|
|
|
iw_u.parts.msw = fsgi;
|
|
|
|
iw_u.parts.lsw = (0);
|
2015-02-12 23:05:37 +00:00
|
|
|
e = -__builtin_fma (sy, sg, -almost_half);
|
|
|
|
sd = -__builtin_fma (sg, sg, -sx);
|
2004-05-26 04:47:00 +00:00
|
|
|
if ((xi0 & 0x7ff00000) == 0)
|
|
|
|
goto denorm;
|
2015-02-12 23:05:37 +00:00
|
|
|
sy = __builtin_fma (e, sy2, sy);
|
|
|
|
sg = __builtin_fma (sy, sd, sg); /* 32-bit approximation to
|
|
|
|
sqrt(sx). */
|
2004-05-26 04:47:00 +00:00
|
|
|
sy2 = sy + sy;
|
|
|
|
/* complete the INSERT_WORDS (fsg, fsgi, 0) operation. */
|
|
|
|
fsg = iw_u.value;
|
2015-02-12 23:05:37 +00:00
|
|
|
e = -__builtin_fma (sy, sg, -almost_half);
|
|
|
|
sd = -__builtin_fma (sg, sg, -sx);
|
|
|
|
sy = __builtin_fma (e, sy2, sy);
|
2004-05-26 04:47:00 +00:00
|
|
|
shx = sx * fsg;
|
2015-02-12 23:05:37 +00:00
|
|
|
sg = __builtin_fma (sy, sd, sg); /* 64-bit approximation to
|
|
|
|
sqrt(sx), but perhaps
|
|
|
|
rounded incorrectly. */
|
2004-05-26 04:47:00 +00:00
|
|
|
sy2 = sy + sy;
|
|
|
|
g = sg * fsg;
|
2015-02-12 23:05:37 +00:00
|
|
|
e = -__builtin_fma (sy, sg, -almost_half);
|
|
|
|
d = -__builtin_fma (g, sg, -shx);
|
|
|
|
sy = __builtin_fma (e, sy2, sy);
|
2004-05-26 04:47:00 +00:00
|
|
|
fesetenv_register (fe);
|
2015-02-12 23:05:37 +00:00
|
|
|
return __builtin_fma (sy, d, g);
|
2004-05-26 04:47:00 +00:00
|
|
|
denorm:
|
|
|
|
/* For denormalised numbers, we normalise, calculate the
|
|
|
|
square root, and return an adjusted result. */
|
|
|
|
fesetenv_register (fe);
|
|
|
|
return __slow_ieee754_sqrt (x * two108) * twom54;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (x < 0)
|
|
|
|
{
|
|
|
|
/* For some reason, some PowerPC32 processors don't implement
|
2011-10-12 15:27:51 +00:00
|
|
|
FE_INVALID_SQRT. */
|
2004-05-26 04:47:00 +00:00
|
|
|
#ifdef FE_INVALID_SQRT
|
Fix libm feraiseexcept namespace (bug 17723).
Various C90 and UNIX98 libm functions call feraiseexcept, which is not
in those standards. This causes linknamespace test failures - except
on x86 / x86_64, where feraiseexcept is inline (for the relevant
constant arguments) in bits/fenv.h.
This patch fixes this by making those functions call __feraiseexcept
instead. All changes are applied to all architectures rather than
considering the possibility that some might not be needed in some
cases (e.g. x86) as it seems most maintainable to keep architectures
consistent.
Where __feraiseexcept does not exist, it is added, with feraiseexcept
made a weak alias; where it is a strong alias, it is made weak.
libm_hidden_def / libm_hidden_proto are used with __feraiseexcept
(this might in some cases improve code generation for existing calls
to __feraiseexcept in some code on some architectures). Where there
are dummy feraiseexcept macros (on architectures without
floating-point exceptions support, to avoid compile errors from
references to undefined FE_* macros), corresponding dummy
__feraiseexcept macros are added. And on x86, to ensure
__feraiseexcept calls still get inlined, the inline function in
bits/fenv.h is refactored so that most of it can be reused in an
inline __feraiseexcept in a separate include/bits/fenv.h.
Calls are changed in C90/UNIX98 functions, but generally not in
functions missing from those standards. They are also changed in
libc_fe* functions (on the basis that those might be used in any libm
function), and in feupdateenv (on the same basis - may be used, via
default libc_*, in any libm function - of course feupdateenv will need
changing to __feupdateenv in a subsequent patch to make that fully
namespace-clean).
No __feraiseexcept is added corresponding to the feraiseexcept in
powerpc bits/fenvinline.h, because that macro definition is
conditional on !defined __NO_MATH_INLINES, and glibc libm is built
with -D__NO_MATH_INLINES, so changing internal calls to use
__feraiseexcept should make no difference.
Tested for x86_64 (testsuite; the only change in disassembly of
installed shared libraries is a slight code reordering in clog10, of
no apparent significance). Also tested for MIPS, where (in the
configuration tested) it eliminates math.h linknamespace failures for
n32 and n64 (some for o32 remain because of other issues).
[BZ #17723]
* include/fenv.h (__feraiseexcept): Use libm_hidden_proto.
* math/fraiseexcpt.c (__feraiseexcept): Use libm_hidden_def.
* sysdeps/aarch64/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/arm/fraiseexcpt.c (feraiseexcept): Likewise.
* sysdeps/hppa/fpu/fraiseexcpt.c (feraiseexcept): Likewise.
* sysdeps/i386/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
* sysdeps/ia64/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/m68k/coldfire/fpu/fraiseexcpt.c (feraiseexcept):
Likewise.
* sysdeps/microblaze/math_private.h (__feraiseexcept): New macro.
* sysdeps/mips/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/powerpc/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
* sysdeps/powerpc/nofpu/fraiseexcpt.c (__feraiseexcept): Likewise.
* sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
(__feraiseexcept): Likewise.
* sysdeps/s390/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/sh/sh4/fpu/fraiseexcpt.c (feraiseexcept): Likewise.
* sysdeps/sparc/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
* sysdeps/tile/math_private.h (__feraiseexcept): New macro.
* sysdeps/unix/sysv/linux/alpha/fraiseexcpt.S (__feraiseexcept):
Use libm_hidden_def.
* sysdeps/x86_64/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
(feraiseexcept): Define as weak not strong alias. Use
libm_hidden_weak.
* sysdeps/x86/fpu/bits/fenv.h (__feraiseexcept_invalid_divbyzero):
New inline function. Factored out of ...
(feraiseexcept): ... here. Use __feraiseexcept_invalid_divbyzero.
* sysdeps/x86/fpu/include/bits/fenv.h: New file.
* math/e_scalb.c (invalid_fn): Call __feraiseexcept instead of
feraiseexcept.
* math/w_acos.c (__acos): Likewise.
* math/w_asin.c (__asin): Likewise.
* math/w_ilogb.c (__ilogb): Likewise.
* math/w_j0.c (y0): Likewise.
* math/w_j1.c (y1): Likewise.
* math/w_jn.c (yn): Likewise.
* math/w_log.c (__log): Likewise.
* math/w_log10.c (__log10): Likewise.
* sysdeps/aarch64/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/aarch64/fpu/math_private.h
(libc_feupdateenv_test_aarch64): Likewise.
* sysdeps/alpha/fpu/feupdateenv.c (__feupdateenv): Likewise.
* sysdeps/arm/fenv_private.h (libc_feupdateenv_test_vfp): Likewise.
* sysdeps/arm/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/ia64/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/m68k/fpu/feupdateenv.c (__feupdateenv): Likewise.
* sysdeps/mips/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Likewise.
* sysdeps/s390/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/sh/sh4/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/sparc/fpu/feupdateenv.c (__feupdateenv): Likewise.
2014-12-30 17:08:09 +00:00
|
|
|
__feraiseexcept (FE_INVALID_SQRT);
|
2008-04-12 03:39:30 +00:00
|
|
|
|
|
|
|
fenv_union_t u = { .fenv = fegetenv_register () };
|
2013-08-17 08:58:55 +00:00
|
|
|
if ((u.l & FE_INVALID) == 0)
|
2004-05-26 04:47:00 +00:00
|
|
|
#endif
|
Fix libm feraiseexcept namespace (bug 17723).
Various C90 and UNIX98 libm functions call feraiseexcept, which is not
in those standards. This causes linknamespace test failures - except
on x86 / x86_64, where feraiseexcept is inline (for the relevant
constant arguments) in bits/fenv.h.
This patch fixes this by making those functions call __feraiseexcept
instead. All changes are applied to all architectures rather than
considering the possibility that some might not be needed in some
cases (e.g. x86) as it seems most maintainable to keep architectures
consistent.
Where __feraiseexcept does not exist, it is added, with feraiseexcept
made a weak alias; where it is a strong alias, it is made weak.
libm_hidden_def / libm_hidden_proto are used with __feraiseexcept
(this might in some cases improve code generation for existing calls
to __feraiseexcept in some code on some architectures). Where there
are dummy feraiseexcept macros (on architectures without
floating-point exceptions support, to avoid compile errors from
references to undefined FE_* macros), corresponding dummy
__feraiseexcept macros are added. And on x86, to ensure
__feraiseexcept calls still get inlined, the inline function in
bits/fenv.h is refactored so that most of it can be reused in an
inline __feraiseexcept in a separate include/bits/fenv.h.
Calls are changed in C90/UNIX98 functions, but generally not in
functions missing from those standards. They are also changed in
libc_fe* functions (on the basis that those might be used in any libm
function), and in feupdateenv (on the same basis - may be used, via
default libc_*, in any libm function - of course feupdateenv will need
changing to __feupdateenv in a subsequent patch to make that fully
namespace-clean).
No __feraiseexcept is added corresponding to the feraiseexcept in
powerpc bits/fenvinline.h, because that macro definition is
conditional on !defined __NO_MATH_INLINES, and glibc libm is built
with -D__NO_MATH_INLINES, so changing internal calls to use
__feraiseexcept should make no difference.
Tested for x86_64 (testsuite; the only change in disassembly of
installed shared libraries is a slight code reordering in clog10, of
no apparent significance). Also tested for MIPS, where (in the
configuration tested) it eliminates math.h linknamespace failures for
n32 and n64 (some for o32 remain because of other issues).
[BZ #17723]
* include/fenv.h (__feraiseexcept): Use libm_hidden_proto.
* math/fraiseexcpt.c (__feraiseexcept): Use libm_hidden_def.
* sysdeps/aarch64/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/arm/fraiseexcpt.c (feraiseexcept): Likewise.
* sysdeps/hppa/fpu/fraiseexcpt.c (feraiseexcept): Likewise.
* sysdeps/i386/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
* sysdeps/ia64/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/m68k/coldfire/fpu/fraiseexcpt.c (feraiseexcept):
Likewise.
* sysdeps/microblaze/math_private.h (__feraiseexcept): New macro.
* sysdeps/mips/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/powerpc/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
* sysdeps/powerpc/nofpu/fraiseexcpt.c (__feraiseexcept): Likewise.
* sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
(__feraiseexcept): Likewise.
* sysdeps/s390/fpu/fraiseexcpt.c (feraiseexcept): Rename to
__feraiseexcept and define as weak alias of __feraiseexcept. Use
libm_hidden_weak.
* sysdeps/sh/sh4/fpu/fraiseexcpt.c (feraiseexcept): Likewise.
* sysdeps/sparc/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
* sysdeps/tile/math_private.h (__feraiseexcept): New macro.
* sysdeps/unix/sysv/linux/alpha/fraiseexcpt.S (__feraiseexcept):
Use libm_hidden_def.
* sysdeps/x86_64/fpu/fraiseexcpt.c (__feraiseexcept): Use
libm_hidden_def.
(feraiseexcept): Define as weak not strong alias. Use
libm_hidden_weak.
* sysdeps/x86/fpu/bits/fenv.h (__feraiseexcept_invalid_divbyzero):
New inline function. Factored out of ...
(feraiseexcept): ... here. Use __feraiseexcept_invalid_divbyzero.
* sysdeps/x86/fpu/include/bits/fenv.h: New file.
* math/e_scalb.c (invalid_fn): Call __feraiseexcept instead of
feraiseexcept.
* math/w_acos.c (__acos): Likewise.
* math/w_asin.c (__asin): Likewise.
* math/w_ilogb.c (__ilogb): Likewise.
* math/w_j0.c (y0): Likewise.
* math/w_j1.c (y1): Likewise.
* math/w_jn.c (yn): Likewise.
* math/w_log.c (__log): Likewise.
* math/w_log10.c (__log10): Likewise.
* sysdeps/aarch64/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/aarch64/fpu/math_private.h
(libc_feupdateenv_test_aarch64): Likewise.
* sysdeps/alpha/fpu/feupdateenv.c (__feupdateenv): Likewise.
* sysdeps/arm/fenv_private.h (libc_feupdateenv_test_vfp): Likewise.
* sysdeps/arm/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/ia64/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/m68k/fpu/feupdateenv.c (__feupdateenv): Likewise.
* sysdeps/mips/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Likewise.
* sysdeps/s390/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/sh/sh4/fpu/feupdateenv.c (feupdateenv): Likewise.
* sysdeps/sparc/fpu/feupdateenv.c (__feupdateenv): Likewise.
2014-12-30 17:08:09 +00:00
|
|
|
__feraiseexcept (FE_INVALID);
|
2004-05-26 04:47:00 +00:00
|
|
|
x = a_nan.value;
|
|
|
|
}
|
|
|
|
return f_wash (x);
|
|
|
|
}
|
2015-01-27 18:16:39 +00:00
|
|
|
#endif /* _ARCH_PPCSQ */
|
2004-05-26 04:47:00 +00:00
|
|
|
|
2011-11-11 18:33:38 +00:00
|
|
|
#undef __ieee754_sqrt
|
2004-05-26 04:47:00 +00:00
|
|
|
double
|
|
|
|
__ieee754_sqrt (double x)
|
|
|
|
{
|
|
|
|
double z;
|
|
|
|
|
2015-01-27 18:16:39 +00:00
|
|
|
#ifdef _ARCH_PPCSQ
|
|
|
|
asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x));
|
|
|
|
#else
|
|
|
|
z = __slow_ieee754_sqrt (x);
|
|
|
|
#endif
|
2004-05-26 04:47:00 +00:00
|
|
|
|
|
|
|
return z;
|
|
|
|
}
|
2011-10-12 15:27:51 +00:00
|
|
|
strong_alias (__ieee754_sqrt, __sqrt_finite)
|