mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 07:20:11 +00:00
d96164c330
Various floating-point functions have code to force underflow exceptions if a tiny result was computed in a way that might not have resulted in such exceptions even though the result is inexact. This typically uses math_force_eval to ensure that the underflowing expression is evaluated, but sometimes uses volatile. This patch refactors such code to use three new macros math_check_force_underflow, math_check_force_underflow_nonneg and math_check_force_underflow_complex (which in turn use math_force_eval). In the limited number of cases not suited to a simple conversion to these macros, existing uses of volatile are changed to use math_force_eval instead. The converted code does not always execute exactly the same sequence of operations as the original code, but the overall effects should be the same. Tested for x86_64, x86, mips64 and powerpc. * sysdeps/generic/math_private.h (fabs_tg): New macro. (min_of_type): Likewise. (math_check_force_underflow): Likewise. (math_check_force_underflow_nonneg): Likewise. (math_check_force_underflow_complex): Likewise. * math/e_exp2l.c (__ieee754_exp2l): Use math_check_force_underflow_nonneg. * math/k_casinh.c (__kernel_casinh): Likewise. * math/k_casinhf.c (__kernel_casinhf): Likewise. * math/k_casinhl.c (__kernel_casinhl): Likewise. * math/s_catan.c (__catan): Use math_check_force_underflow_complex. * math/s_catanf.c (__catanf): Likewise. * math/s_catanh.c (__catanh): Likewise. * math/s_catanhf.c (__catanhf): Likewise. * math/s_catanhl.c (__catanhl): Likewise. * math/s_catanl.c (__catanl): Likewise. * math/s_ccosh.c (__ccosh): Likewise. * math/s_ccoshf.c (__ccoshf): Likewise. * math/s_ccoshl.c (__ccoshl): Likewise. * math/s_cexp.c (__cexp): Likewise. * math/s_cexpf.c (__cexpf): Likewise. * math/s_cexpl.c (__cexpl): Likewise. * math/s_clog.c (__clog): Use math_check_force_underflow_nonneg. * math/s_clog10.c (__clog10): Likewise. * math/s_clog10f.c (__clog10f): Likewise. * math/s_clog10l.c (__clog10l): Likewise. * math/s_clogf.c (__clogf): Likewise. * math/s_clogl.c (__clogl): Likewise. * math/s_csin.c (__csin): Use math_check_force_underflow_complex. * math/s_csinf.c (__csinf): Likewise. * math/s_csinh.c (__csinh): Likewise. * math/s_csinhf.c (__csinhf): Likewise. * math/s_csinhl.c (__csinhl): Likewise. * math/s_csinl.c (__csinl): Likewise. * math/s_csqrt.c (__csqrt): Use math_check_force_underflow. * math/s_csqrtf.c (__csqrtf): Likewise. * math/s_csqrtl.c (__csqrtl): Likewise. * math/s_ctan.c (__ctan): Use math_check_force_underflow_complex. * math/s_ctanf.c (__ctanf): Likewise. * math/s_ctanh.c (__ctanh): Likewise. * math/s_ctanhf.c (__ctanhf): Likewise. * math/s_ctanhl.c (__ctanhl): Likewise. * math/s_ctanl.c (__ctanl): Likewise. * stdlib/strtod_l.c (round_and_return): Use math_force_eval instead of volatile. * sysdeps/ieee754/dbl-64/e_asin.c (__ieee754_asin): Use math_check_force_underflow. * sysdeps/ieee754/dbl-64/e_atanh.c (__ieee754_atanh): Likewise. * sysdeps/ieee754/dbl-64/e_exp.c (__ieee754_exp): Do not use volatile when forcing underflow. * sysdeps/ieee754/dbl-64/e_exp2.c (__ieee754_exp2): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/dbl-64/e_gamma_r.c (__ieee754_gamma_r): Likewise. * sysdeps/ieee754/dbl-64/e_j1.c (__ieee754_j1): Use math_check_force_underflow. * sysdeps/ieee754/dbl-64/e_jn.c (__ieee754_jn): Likewise. * sysdeps/ieee754/dbl-64/e_sinh.c (__ieee754_sinh): Likewise. * sysdeps/ieee754/dbl-64/s_asinh.c (__asinh): Likewise. * sysdeps/ieee754/dbl-64/s_atan.c (atan): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/dbl-64/s_erf.c (__erf): Use math_check_force_underflow. * sysdeps/ieee754/dbl-64/s_expm1.c (__expm1): Likewise. * sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use math_force_eval instead of volatile. * sysdeps/ieee754/dbl-64/s_log1p.c (__log1p): Use math_check_force_underflow. * sysdeps/ieee754/dbl-64/s_sin.c (__sin): Likewise. * sysdeps/ieee754/dbl-64/s_tan.c (tan): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/dbl-64/s_tanh.c (__tanh): Use math_check_force_underflow. * sysdeps/ieee754/flt-32/e_asinf.c (__ieee754_asinf): Likewise. * sysdeps/ieee754/flt-32/e_atanhf.c (__ieee754_atanhf): Likewise. * sysdeps/ieee754/flt-32/e_exp2f.c (__ieee754_exp2f): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/flt-32/e_gammaf_r.c (__ieee754_gammaf_r): Likewise. * sysdeps/ieee754/flt-32/e_j1f.c (__ieee754_j1f): Use math_check_force_underflow. * sysdeps/ieee754/flt-32/e_jnf.c (__ieee754_jnf): Likewise. * sysdeps/ieee754/flt-32/e_sinhf.c (__ieee754_sinhf): Likewise. * sysdeps/ieee754/flt-32/k_sinf.c (__kernel_sinf): Likewise. * sysdeps/ieee754/flt-32/k_tanf.c (__kernel_tanf): Likewise. * sysdeps/ieee754/flt-32/s_asinhf.c (__asinhf): Likewise. * sysdeps/ieee754/flt-32/s_atanf.c (__atanf): Likewise. * sysdeps/ieee754/flt-32/s_erff.c (__erff): Likewise. * sysdeps/ieee754/flt-32/s_expm1f.c (__expm1f): Likewise. * sysdeps/ieee754/flt-32/s_log1pf.c (__log1pf): Likewise. * sysdeps/ieee754/flt-32/s_tanhf.c (__tanhf): Likewise. * sysdeps/ieee754/ldbl-128/e_asinl.c (__ieee754_asinl): Likewise. * sysdeps/ieee754/ldbl-128/e_atanhl.c (__ieee754_atanhl): Likewise. * sysdeps/ieee754/ldbl-128/e_expl.c (__ieee754_expl): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/ldbl-128/e_gammal_r.c (__ieee754_gammal_r): Likewise. * sysdeps/ieee754/ldbl-128/e_j1l.c (__ieee754_j1l): Use math_check_force_underflow. * sysdeps/ieee754/ldbl-128/e_jnl.c (__ieee754_jnl): Likewise. * sysdeps/ieee754/ldbl-128/e_sinhl.c (__ieee754_sinhl): Likewise. * sysdeps/ieee754/ldbl-128/k_sincosl.c (__kernel_sincosl): Likewise. * sysdeps/ieee754/ldbl-128/k_sinl.c (__kernel_sinl): Likewise. * sysdeps/ieee754/ldbl-128/k_tanl.c (__kernel_tanl): Likewise. * sysdeps/ieee754/ldbl-128/s_asinhl.c (__asinhl): Likewise. * sysdeps/ieee754/ldbl-128/s_atanl.c (__atanl): Likewise. * sysdeps/ieee754/ldbl-128/s_erfl.c (__erfl): Likewise. * sysdeps/ieee754/ldbl-128/s_expm1l.c (__expm1l): Likewise. * sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Use math_force_eval instead of volatile. * sysdeps/ieee754/ldbl-128/s_log1pl.c (__log1pl): Use math_check_force_underflow. * sysdeps/ieee754/ldbl-128/s_tanhl.c (__tanhl): Likewise. * sysdeps/ieee754/ldbl-128ibm/e_asinl.c (__ieee754_asinl): Use math_check_force_underflow. * sysdeps/ieee754/ldbl-128ibm/e_atanhl.c (__ieee754_atanhl): Likewise. * sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c (__ieee754_gammal_r): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/ldbl-128ibm/e_jnl.c (__ieee754_jnl): Use math_check_force_underflow. * sysdeps/ieee754/ldbl-128ibm/e_sinhl.c (__ieee754_sinhl): Likewise. * sysdeps/ieee754/ldbl-128ibm/k_sincosl.c (__kernel_sincosl): Likewise. * sysdeps/ieee754/ldbl-128ibm/k_sinl.c (__kernel_sinl): Likewise. * sysdeps/ieee754/ldbl-128ibm/k_tanl.c (__kernel_tanl): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_asinhl.c (__asinhl): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_atanl.c (__atanl): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_erfl.c (__erfl): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_tanhl.c (__tanhl): Likewise. * sysdeps/ieee754/ldbl-96/e_asinl.c (__ieee754_asinl): Likewise. * sysdeps/ieee754/ldbl-96/e_atanhl.c (__ieee754_atanhl): Likewise. * sysdeps/ieee754/ldbl-96/e_gammal_r.c (__ieee754_gammal_r): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/ldbl-96/e_j1l.c (__ieee754_j1l): Use math_check_force_underflow. * sysdeps/ieee754/ldbl-96/e_jnl.c (__ieee754_jnl): Likewise. * sysdeps/ieee754/ldbl-96/e_sinhl.c (__ieee754_sinhl): Likewise. * sysdeps/ieee754/ldbl-96/k_sinl.c (__kernel_sinl): Likewise. * sysdeps/ieee754/ldbl-96/k_tanl.c (__kernel_tanl): Use math_check_force_underflow_nonneg. * sysdeps/ieee754/ldbl-96/s_asinhl.c (__asinhl): Use math_check_force_underflow. * sysdeps/ieee754/ldbl-96/s_erfl.c (__erfl): Likewise. * sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Use math_force_eval instead of volatile. * sysdeps/ieee754/ldbl-96/s_tanhl.c (__tanhl): Use math_check_force_underflow.
251 lines
7.2 KiB
C
251 lines
7.2 KiB
C
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
/*
|
|
Long double expansions are
|
|
Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
|
|
and are incorporated herein by permission of the author. The author
|
|
reserves the right to distribute this material elsewhere under different
|
|
copying permissions. These modifications are distributed here under the
|
|
following terms:
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* __ieee754_asin(x)
|
|
* Method :
|
|
* Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
|
|
* we approximate asin(x) on [0,0.5] by
|
|
* asin(x) = x + x*x^2*R(x^2)
|
|
* Between .5 and .625 the approximation is
|
|
* asin(0.5625 + x) = asin(0.5625) + x rS(x) / sS(x)
|
|
* For x in [0.625,1]
|
|
* asin(x) = pi/2-2*asin(sqrt((1-x)/2))
|
|
* Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
|
|
* then for x>0.98
|
|
* asin(x) = pi/2 - 2*(s+s*z*R(z))
|
|
* = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
|
|
* For x<=0.98, let pio4_hi = pio2_hi/2, then
|
|
* f = hi part of s;
|
|
* c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
|
|
* and
|
|
* asin(x) = pi/2 - 2*(s+s*z*R(z))
|
|
* = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
|
|
* = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
|
|
*
|
|
* Special cases:
|
|
* if x is NaN, return x itself;
|
|
* if |x|>1, return NaN with invalid signal.
|
|
*
|
|
*/
|
|
|
|
|
|
#include <float.h>
|
|
#include <math.h>
|
|
#include <math_private.h>
|
|
long double sqrtl (long double);
|
|
|
|
static const long double
|
|
one = 1.0L,
|
|
huge = 1.0e+300L,
|
|
pio2_hi = 1.5707963267948966192313216916397514420986L,
|
|
pio2_lo = 4.3359050650618905123985220130216759843812E-35L,
|
|
pio4_hi = 7.8539816339744830961566084581987569936977E-1L,
|
|
|
|
/* coefficient for R(x^2) */
|
|
|
|
/* asin(x) = x + x^3 pS(x^2) / qS(x^2)
|
|
0 <= x <= 0.5
|
|
peak relative error 1.9e-35 */
|
|
pS0 = -8.358099012470680544198472400254596543711E2L,
|
|
pS1 = 3.674973957689619490312782828051860366493E3L,
|
|
pS2 = -6.730729094812979665807581609853656623219E3L,
|
|
pS3 = 6.643843795209060298375552684423454077633E3L,
|
|
pS4 = -3.817341990928606692235481812252049415993E3L,
|
|
pS5 = 1.284635388402653715636722822195716476156E3L,
|
|
pS6 = -2.410736125231549204856567737329112037867E2L,
|
|
pS7 = 2.219191969382402856557594215833622156220E1L,
|
|
pS8 = -7.249056260830627156600112195061001036533E-1L,
|
|
pS9 = 1.055923570937755300061509030361395604448E-3L,
|
|
|
|
qS0 = -5.014859407482408326519083440151745519205E3L,
|
|
qS1 = 2.430653047950480068881028451580393430537E4L,
|
|
qS2 = -4.997904737193653607449250593976069726962E4L,
|
|
qS3 = 5.675712336110456923807959930107347511086E4L,
|
|
qS4 = -3.881523118339661268482937768522572588022E4L,
|
|
qS5 = 1.634202194895541569749717032234510811216E4L,
|
|
qS6 = -4.151452662440709301601820849901296953752E3L,
|
|
qS7 = 5.956050864057192019085175976175695342168E2L,
|
|
qS8 = -4.175375777334867025769346564600396877176E1L,
|
|
/* 1.000000000000000000000000000000000000000E0 */
|
|
|
|
/* asin(0.5625 + x) = asin(0.5625) + x rS(x) / sS(x)
|
|
-0.0625 <= x <= 0.0625
|
|
peak relative error 3.3e-35 */
|
|
rS0 = -5.619049346208901520945464704848780243887E0L,
|
|
rS1 = 4.460504162777731472539175700169871920352E1L,
|
|
rS2 = -1.317669505315409261479577040530751477488E2L,
|
|
rS3 = 1.626532582423661989632442410808596009227E2L,
|
|
rS4 = -3.144806644195158614904369445440583873264E1L,
|
|
rS5 = -9.806674443470740708765165604769099559553E1L,
|
|
rS6 = 5.708468492052010816555762842394927806920E1L,
|
|
rS7 = 1.396540499232262112248553357962639431922E1L,
|
|
rS8 = -1.126243289311910363001762058295832610344E1L,
|
|
rS9 = -4.956179821329901954211277873774472383512E-1L,
|
|
rS10 = 3.313227657082367169241333738391762525780E-1L,
|
|
|
|
sS0 = -4.645814742084009935700221277307007679325E0L,
|
|
sS1 = 3.879074822457694323970438316317961918430E1L,
|
|
sS2 = -1.221986588013474694623973554726201001066E2L,
|
|
sS3 = 1.658821150347718105012079876756201905822E2L,
|
|
sS4 = -4.804379630977558197953176474426239748977E1L,
|
|
sS5 = -1.004296417397316948114344573811562952793E2L,
|
|
sS6 = 7.530281592861320234941101403870010111138E1L,
|
|
sS7 = 1.270735595411673647119592092304357226607E1L,
|
|
sS8 = -1.815144839646376500705105967064792930282E1L,
|
|
sS9 = -7.821597334910963922204235247786840828217E-2L,
|
|
/* 1.000000000000000000000000000000000000000E0 */
|
|
|
|
asinr5625 = 5.9740641664535021430381036628424864397707E-1L;
|
|
|
|
|
|
|
|
long double
|
|
__ieee754_asinl (long double x)
|
|
{
|
|
long double a, t, w, p, q, c, r, s;
|
|
int flag;
|
|
|
|
if (__glibc_unlikely (isnan (x)))
|
|
return x + x;
|
|
flag = 0;
|
|
a = __builtin_fabsl (x);
|
|
if (a == 1.0L) /* |x|>= 1 */
|
|
return x * pio2_hi + x * pio2_lo; /* asin(1)=+-pi/2 with inexact */
|
|
else if (a >= 1.0L)
|
|
return (x - x) / (x - x); /* asin(|x|>1) is NaN */
|
|
else if (a < 0.5L)
|
|
{
|
|
if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */
|
|
{
|
|
math_check_force_underflow (x);
|
|
long double force_inexact = huge + x;
|
|
math_force_eval (force_inexact);
|
|
return x; /* return x with inexact if x!=0 */
|
|
}
|
|
else
|
|
{
|
|
t = x * x;
|
|
/* Mark to use pS, qS later on. */
|
|
flag = 1;
|
|
}
|
|
}
|
|
else if (a < 0.625L)
|
|
{
|
|
t = a - 0.5625;
|
|
p = ((((((((((rS10 * t
|
|
+ rS9) * t
|
|
+ rS8) * t
|
|
+ rS7) * t
|
|
+ rS6) * t
|
|
+ rS5) * t
|
|
+ rS4) * t
|
|
+ rS3) * t
|
|
+ rS2) * t
|
|
+ rS1) * t
|
|
+ rS0) * t;
|
|
|
|
q = ((((((((( t
|
|
+ sS9) * t
|
|
+ sS8) * t
|
|
+ sS7) * t
|
|
+ sS6) * t
|
|
+ sS5) * t
|
|
+ sS4) * t
|
|
+ sS3) * t
|
|
+ sS2) * t
|
|
+ sS1) * t
|
|
+ sS0;
|
|
t = asinr5625 + p / q;
|
|
if (x > 0.0L)
|
|
return t;
|
|
else
|
|
return -t;
|
|
}
|
|
else
|
|
{
|
|
/* 1 > |x| >= 0.625 */
|
|
w = one - a;
|
|
t = w * 0.5;
|
|
}
|
|
|
|
p = (((((((((pS9 * t
|
|
+ pS8) * t
|
|
+ pS7) * t
|
|
+ pS6) * t
|
|
+ pS5) * t
|
|
+ pS4) * t
|
|
+ pS3) * t
|
|
+ pS2) * t
|
|
+ pS1) * t
|
|
+ pS0) * t;
|
|
|
|
q = (((((((( t
|
|
+ qS8) * t
|
|
+ qS7) * t
|
|
+ qS6) * t
|
|
+ qS5) * t
|
|
+ qS4) * t
|
|
+ qS3) * t
|
|
+ qS2) * t
|
|
+ qS1) * t
|
|
+ qS0;
|
|
|
|
if (flag) /* 2^-57 < |x| < 0.5 */
|
|
{
|
|
w = p / q;
|
|
return x + x * w;
|
|
}
|
|
|
|
s = __ieee754_sqrtl (t);
|
|
if (a > 0.975L)
|
|
{
|
|
w = p / q;
|
|
t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
|
|
}
|
|
else
|
|
{
|
|
w = ldbl_high (s);
|
|
c = (t - w * w) / (s + w);
|
|
r = p / q;
|
|
p = 2.0 * s * r - (pio2_lo - 2.0 * c);
|
|
q = pio4_hi - 2.0 * w;
|
|
t = pio4_hi - (p - q);
|
|
}
|
|
|
|
if (x > 0.0L)
|
|
return t;
|
|
else
|
|
return -t;
|
|
}
|
|
strong_alias (__ieee754_asinl, __asinl_finite)
|