mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-25 22:40:05 +00:00
c8235dda72
Various i386 libm functions return values with excess range and precision; Wilco Dijkstra's patches to make isfinite etc. expand inline cause this pre-existing issue to result in test failures (when e.g. a result that overflows float but not long double gets counted as overflowing for some purposes but not others). This patch addresses those cases arising from functions defined in C, adding a math_narrow_eval macro that forces values to memory to eliminate excess precision if FLT_EVAL_METHOD indicates this is needed, and is a no-op otherwise. I'll convert existing uses of volatile and asm for this purpose to use the new macro later, once i386 has clean test results again (which requires fixes for .S files as well). Tested for x86_64 and x86. Committed. [BZ #18980] * sysdeps/generic/math_private.h: Include <float.h>. (math_narrow_eval): New macro. [FLT_EVAL_METHOD != 0] (excess_precision): Likewise. * sysdeps/ieee754/dbl-64/e_cosh.c (__ieee754_cosh): Use math_narrow_eval on overflowing return value. * sysdeps/ieee754/dbl-64/e_lgamma_r.c (__ieee754_lgamma_r): Likewise. * sysdeps/ieee754/dbl-64/e_sinh.c (__ieee754_sinh): Likewise. * sysdeps/ieee754/flt-32/e_coshf.c (__ieee754_coshf): Likewise. * sysdeps/ieee754/flt-32/e_lgammaf_r.c (__ieee754_lgammaf_r): Likewise. * sysdeps/ieee754/flt-32/e_sinhf.c (__ieee754_sinhf): Likewise.
315 lines
11 KiB
C
315 lines
11 KiB
C
/* @(#)er_lgamma.c 5.1 93/09/24 */
|
|
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
/* __ieee754_lgamma_r(x, signgamp)
|
|
* Reentrant version of the logarithm of the Gamma function
|
|
* with user provide pointer for the sign of Gamma(x).
|
|
*
|
|
* Method:
|
|
* 1. Argument Reduction for 0 < x <= 8
|
|
* Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
|
|
* reduce x to a number in [1.5,2.5] by
|
|
* lgamma(1+s) = log(s) + lgamma(s)
|
|
* for example,
|
|
* lgamma(7.3) = log(6.3) + lgamma(6.3)
|
|
* = log(6.3*5.3) + lgamma(5.3)
|
|
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
|
|
* 2. Polynomial approximation of lgamma around its
|
|
* minimun ymin=1.461632144968362245 to maintain monotonicity.
|
|
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
|
|
* Let z = x-ymin;
|
|
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
|
|
* where
|
|
* poly(z) is a 14 degree polynomial.
|
|
* 2. Rational approximation in the primary interval [2,3]
|
|
* We use the following approximation:
|
|
* s = x-2.0;
|
|
* lgamma(x) = 0.5*s + s*P(s)/Q(s)
|
|
* with accuracy
|
|
* |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
|
|
* Our algorithms are based on the following observation
|
|
*
|
|
* zeta(2)-1 2 zeta(3)-1 3
|
|
* lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
|
|
* 2 3
|
|
*
|
|
* where Euler = 0.5771... is the Euler constant, which is very
|
|
* close to 0.5.
|
|
*
|
|
* 3. For x>=8, we have
|
|
* lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
|
|
* (better formula:
|
|
* lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
|
|
* Let z = 1/x, then we approximation
|
|
* f(z) = lgamma(x) - (x-0.5)(log(x)-1)
|
|
* by
|
|
* 3 5 11
|
|
* w = w0 + w1*z + w2*z + w3*z + ... + w6*z
|
|
* where
|
|
* |w - f(z)| < 2**-58.74
|
|
*
|
|
* 4. For negative x, since (G is gamma function)
|
|
* -x*G(-x)*G(x) = pi/sin(pi*x),
|
|
* we have
|
|
* G(x) = pi/(sin(pi*x)*(-x)*G(-x))
|
|
* since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
|
|
* Hence, for x<0, signgam = sign(sin(pi*x)) and
|
|
* lgamma(x) = log(|Gamma(x)|)
|
|
* = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
|
|
* Note: one should avoid compute pi*(-x) directly in the
|
|
* computation of sin(pi*(-x)).
|
|
*
|
|
* 5. Special Cases
|
|
* lgamma(2+s) ~ s*(1-Euler) for tiny s
|
|
* lgamma(1)=lgamma(2)=0
|
|
* lgamma(x) ~ -log(x) for tiny x
|
|
* lgamma(0) = lgamma(inf) = inf
|
|
* lgamma(-integer) = +-inf
|
|
*
|
|
*/
|
|
|
|
#include <libc-internal.h>
|
|
#include <math.h>
|
|
#include <math_private.h>
|
|
|
|
static const double
|
|
two52= 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
|
|
half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
|
|
one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
|
|
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
|
|
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
|
|
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
|
|
a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
|
|
a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
|
|
a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
|
|
a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
|
|
a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
|
|
a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
|
|
a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
|
|
a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
|
|
a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
|
|
a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
|
|
tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
|
|
tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
|
|
/* tt = -(tail of tf) */
|
|
tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
|
|
t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
|
|
t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
|
|
t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
|
|
t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
|
|
t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
|
|
t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
|
|
t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
|
|
t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
|
|
t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
|
|
t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
|
|
t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
|
|
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
|
|
t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
|
|
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
|
|
t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
|
|
u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
|
|
u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
|
|
u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
|
|
u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
|
|
u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
|
|
u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
|
|
v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
|
|
v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
|
|
v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
|
|
v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
|
|
v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
|
|
s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
|
|
s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
|
|
s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
|
|
s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
|
|
s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
|
|
s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
|
|
s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
|
|
r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
|
|
r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
|
|
r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
|
|
r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
|
|
r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
|
|
r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
|
|
w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
|
|
w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
|
|
w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
|
|
w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
|
|
w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
|
|
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
|
|
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
|
|
|
|
static const double zero= 0.00000000000000000000e+00;
|
|
|
|
static double
|
|
sin_pi(double x)
|
|
{
|
|
double y,z;
|
|
int n,ix;
|
|
|
|
GET_HIGH_WORD(ix,x);
|
|
ix &= 0x7fffffff;
|
|
|
|
if(ix<0x3fd00000) return __sin(pi*x);
|
|
y = -x; /* x is assume negative */
|
|
|
|
/*
|
|
* argument reduction, make sure inexact flag not raised if input
|
|
* is an integer
|
|
*/
|
|
z = __floor(y);
|
|
if(z!=y) { /* inexact anyway */
|
|
y *= 0.5;
|
|
y = 2.0*(y - __floor(y)); /* y = |x| mod 2.0 */
|
|
n = (int) (y*4.0);
|
|
} else {
|
|
if(ix>=0x43400000) {
|
|
y = zero; n = 0; /* y must be even */
|
|
} else {
|
|
if(ix<0x43300000) z = y+two52; /* exact */
|
|
GET_LOW_WORD(n,z);
|
|
n &= 1;
|
|
y = n;
|
|
n<<= 2;
|
|
}
|
|
}
|
|
switch (n) {
|
|
case 0: y = __sin(pi*y); break;
|
|
case 1:
|
|
case 2: y = __cos(pi*(0.5-y)); break;
|
|
case 3:
|
|
case 4: y = __sin(pi*(one-y)); break;
|
|
case 5:
|
|
case 6: y = -__cos(pi*(y-1.5)); break;
|
|
default: y = __sin(pi*(y-2.0)); break;
|
|
}
|
|
return -y;
|
|
}
|
|
|
|
|
|
double
|
|
__ieee754_lgamma_r(double x, int *signgamp)
|
|
{
|
|
double t,y,z,nadj,p,p1,p2,p3,q,r,w;
|
|
int i,hx,lx,ix;
|
|
|
|
EXTRACT_WORDS(hx,lx,x);
|
|
|
|
/* purge off +-inf, NaN, +-0, and negative arguments */
|
|
*signgamp = 1;
|
|
ix = hx&0x7fffffff;
|
|
if(__builtin_expect(ix>=0x7ff00000, 0)) return x*x;
|
|
if(__builtin_expect((ix|lx)==0, 0))
|
|
{
|
|
if (hx < 0)
|
|
*signgamp = -1;
|
|
return one/fabs(x);
|
|
}
|
|
if(__builtin_expect(ix<0x3b900000, 0)) {
|
|
/* |x|<2**-70, return -log(|x|) */
|
|
if(hx<0) {
|
|
*signgamp = -1;
|
|
return -__ieee754_log(-x);
|
|
} else return -__ieee754_log(x);
|
|
}
|
|
if(hx<0) {
|
|
if(__builtin_expect(ix>=0x43300000, 0))
|
|
/* |x|>=2**52, must be -integer */
|
|
return x/zero;
|
|
if (x < -2.0 && x > -28.0)
|
|
return __lgamma_neg (x, signgamp);
|
|
t = sin_pi(x);
|
|
if(t==zero) return one/fabsf(t); /* -integer */
|
|
nadj = __ieee754_log(pi/fabs(t*x));
|
|
if(t<zero) *signgamp = -1;
|
|
x = -x;
|
|
}
|
|
|
|
/* purge off 1 and 2 */
|
|
if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
|
|
/* for x < 2.0 */
|
|
else if(ix<0x40000000) {
|
|
if(ix<=0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
|
|
r = -__ieee754_log(x);
|
|
if(ix>=0x3FE76944) {y = one-x; i= 0;}
|
|
else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
|
|
else {y = x; i=2;}
|
|
} else {
|
|
r = zero;
|
|
if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
|
|
else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
|
|
else {y=x-one;i=2;}
|
|
}
|
|
switch(i) {
|
|
case 0:
|
|
z = y*y;
|
|
p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
|
|
p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
|
|
p = y*p1+p2;
|
|
r += (p-0.5*y); break;
|
|
case 1:
|
|
z = y*y;
|
|
w = z*y;
|
|
p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
|
|
p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
|
|
p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
|
|
p = z*p1-(tt-w*(p2+y*p3));
|
|
r += (tf + p); break;
|
|
case 2:
|
|
p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
|
|
p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
|
|
r += (-0.5*y + p1/p2);
|
|
}
|
|
}
|
|
else if(ix<0x40200000) { /* x < 8.0 */
|
|
i = (int)x;
|
|
t = zero;
|
|
y = x-(double)i;
|
|
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
|
|
q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
|
|
r = half*y+p/q;
|
|
z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
|
|
switch(i) {
|
|
case 7: z *= (y+6.0); /* FALLTHRU */
|
|
case 6: z *= (y+5.0); /* FALLTHRU */
|
|
case 5: z *= (y+4.0); /* FALLTHRU */
|
|
case 4: z *= (y+3.0); /* FALLTHRU */
|
|
case 3: z *= (y+2.0); /* FALLTHRU */
|
|
r += __ieee754_log(z); break;
|
|
}
|
|
/* 8.0 <= x < 2**58 */
|
|
} else if (ix < 0x43900000) {
|
|
t = __ieee754_log(x);
|
|
z = one/x;
|
|
y = z*z;
|
|
w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
|
|
r = (x-half)*(t-one)+w;
|
|
} else
|
|
/* 2**58 <= x <= inf */
|
|
r = math_narrow_eval (x*(__ieee754_log(x)-one));
|
|
/* NADJ is set for negative arguments but not otherwise,
|
|
resulting in warnings that it may be used uninitialized
|
|
although in the cases where it is used it has always been
|
|
set. */
|
|
DIAG_PUSH_NEEDS_COMMENT;
|
|
#if __GNUC_PREREQ (4, 7)
|
|
DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
|
|
#else
|
|
DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wuninitialized");
|
|
#endif
|
|
if(hx<0) r = nadj - r;
|
|
DIAG_POP_NEEDS_COMMENT;
|
|
return r;
|
|
}
|
|
strong_alias (__ieee754_lgamma_r, __lgamma_r_finite)
|