glibc/sysdeps/ieee754/flt-32/e_j1f.c
Szabolcs Nagy 347a5b592c math: Fix float conversion regressions with gcc-12 [BZ #28713]
Converting double precision constants to float is now affected by the
runtime dynamic rounding mode instead of being evaluated at compile
time with default rounding mode (except static object initializers).

This can change the computed result and cause performance regression.
The known correctness issues (increased ulp errors) are already fixed,
this patch fixes remaining cases of unnecessary runtime conversions.

Add float M_* macros to math.h as new GNU extension API.  To avoid
conversions the new M_* macros are used and instead of casting double
literals to float, use float literals (only required if the conversion
is inexact).

The patch was tested on aarch64 where the following symbols had new
spurious conversion instructions that got fixed:

  __clog10f
  __gammaf_r_finite@GLIBC_2.17
  __j0f_finite@GLIBC_2.17
  __j1f_finite@GLIBC_2.17
  __jnf_finite@GLIBC_2.17
  __kernel_casinhf
  __lgamma_negf
  __log1pf
  __y0f_finite@GLIBC_2.17
  __y1f_finite@GLIBC_2.17
  cacosf
  cacoshf
  casinhf
  catanf
  catanhf
  clogf
  gammaf_positive

Fixes bug 28713.

Reviewed-by: Paul Zimmermann <Paul.Zimmermann@inria.fr>
2022-01-10 14:27:17 +00:00

803 lines
33 KiB
C

/* e_j1f.c -- float version of e_j1.c.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include <errno.h>
#include <float.h>
#include <math.h>
#include <math-narrow-eval.h>
#include <math_private.h>
#include <fenv_private.h>
#include <math-underflow.h>
#include <libm-alias-finite.h>
#include <reduce_aux.h>
static float ponef(float), qonef(float);
static const float
huge = 1e30,
one = 1.0,
invsqrtpi= 5.6418961287e-01, /* 0x3f106ebb */
tpi = 6.3661974669e-01, /* 0x3f22f983 */
/* R0/S0 on [0,2] */
r00 = -6.2500000000e-02, /* 0xbd800000 */
r01 = 1.4070566976e-03, /* 0x3ab86cfd */
r02 = -1.5995563444e-05, /* 0xb7862e36 */
r03 = 4.9672799207e-08, /* 0x335557d2 */
s01 = 1.9153760746e-02, /* 0x3c9ce859 */
s02 = 1.8594678841e-04, /* 0x3942fab6 */
s03 = 1.1771846857e-06, /* 0x359dffc2 */
s04 = 5.0463624390e-09, /* 0x31ad6446 */
s05 = 1.2354227016e-11; /* 0x2d59567e */
static const float zero = 0.0;
/* This is the nearest approximation of the first positive zero of j1. */
#define FIRST_ZERO_J1 0x3.d4eabp+0f
#define SMALL_SIZE 64
/* The following table contains successive zeros of j1 and degree-3
polynomial approximations of j1 around these zeros: Pj[0] for the first
positive zero (3.831705), Pj[1] for the second one (7.015586), and so on.
Each line contains:
{x0, xmid, x1, p0, p1, p2, p3}
where [x0,x1] is the interval around the zero, xmid is the binary32 number
closest to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation
polynomial. Each polynomial was generated using Sollya on the interval
[x0,x1] around the corresponding zero where the error exceeds 9 ulps
for the alternate code. Degree 3 is enough to get an error at most
9 ulps, except around the first zero.
*/
static const float Pj[SMALL_SIZE][7] = {
/* For index 0, we use a degree-4 polynomial generated by Sollya, with the
coefficient of degree 4 hard-coded in j1f_near_root(). */
{ 0x1.e09e5ep+1, 0x1.ea7558p+1, 0x1.ef7352p+1, -0x8.4f069p-28,
-0x6.71b3d8p-4, 0xd.744a2p-8, 0xd.acd9p-8/*, -0x1.3e51aap-8*/ }, /* 0 */
{ 0x1.bdb4c2p+2, 0x1.c0ff6p+2, 0x1.c27a8cp+2, 0xe.c2858p-28,
0x4.cd464p-4, -0x5.79b71p-8, -0xc.11124p-8 }, /* 1 */
{ 0x1.43b214p+3, 0x1.458d0ep+3, 0x1.460ccep+3, -0x1.e7acecp-24,
-0x3.feca9p-4, 0x3.2470f8p-8, 0xa.625b7p-8 }, /* 2 */
{ 0x1.a9c98p+3, 0x1.aa5bbp+3, 0x1.aaa4d8p+3, 0x1.698158p-24,
0x3.7e666cp-4, -0x2.1900ap-8, -0x9.2755p-8 }, /* 3 */
{ 0x1.073be4p+4, 0x1.0787b4p+4, 0x1.07aed8p+4, -0x1.f5f658p-24,
-0x3.24b8ep-4, 0x1.86e35cp-8, 0x8.4e4bbp-8 }, /* 4 */
{ 0x1.39ae2ap+4, 0x1.39da8ep+4, 0x1.39f3dap+4, -0x1.4e744p-24,
0x2.e18a24p-4, -0x1.2ccd16p-8, -0x7.a27ep-8 }, /* 5 */
{ 0x1.6bfa46p+4, 0x1.6c294ep+4, 0x1.6c412p+4, 0xa.3fb7fp-28,
-0x2.acc9c4p-4, 0xf.0b783p-12, 0x7.1c0d3p-8 }, /* 6 */
{ 0x1.9e42bep+4, 0x1.9e757p+4, 0x1.9e876ep+4, -0x2.29f6f4p-24,
0x2.81f21p-4, -0xc.641bp-12, -0x6.a7ea58p-8 }, /* 7 */
{ 0x1.d08a3ep+4, 0x1.d0bfdp+4, 0x1.d0cd3cp+4, -0x1.b5d196p-24,
-0x2.5e40e4p-4, 0xa.7059fp-12, 0x6.4d6bfp-8 }, /* 8 */
{ 0x1.017794p+5, 0x1.018476p+5, 0x1.018b8cp+5, -0x4.0e001p-24,
0x2.3febep-4, -0x8.f23aap-12, -0x6.0102cp-8 }, /* 9 */
{ 0x1.1a9e78p+5, 0x1.1aa89p+5, 0x1.1aaf88p+5, 0x3.b26f2p-24,
-0x2.25babp-4, 0x7.c6d948p-12, 0x5.a1d988p-8 }, /* 10 */
{ 0x1.33bddep+5, 0x1.33cc52p+5, 0x1.33d2e4p+5, -0xf.c8cdap-28,
0x2.0ed05p-4, -0x6.d97dbp-12, -0x5.8da498p-8 }, /* 11 */
{ 0x1.4ce7cp+5, 0x1.4cefdp+5, 0x1.4cf7d4p+5, -0x3.9940e4p-24,
-0x1.fa8b4p-4, 0x6.16108p-12, 0x5.4355e8p-8 }, /* 12 */
{ 0x1.6603e8p+5, 0x1.661316p+5, 0x1.66173ap+5, 0x9.da15dp-28,
0x1.e8727ep-4, -0x5.742468p-12, -0x5.117c28p-8 }, /* 13 */
{ 0x1.7f2ebcp+5, 0x1.7f3632p+5, 0x1.7f3a7ep+5, -0x3.39b218p-24,
-0x1.d8293ap-4, 0x4.ee3348p-12, 0x4.f9bep-8 }, /* 14 */
{ 0x1.9850e6p+5, 0x1.985928p+5, 0x1.985d9ep+5, -0x3.7b5108p-24,
0x1.c96702p-4, -0x4.7b0d08p-12, -0x4.c784a8p-8 }, /* 15 */
{ 0x1.b172e8p+5, 0x1.b17c04p+5, 0x1.b1805cp+5, -0x1.91e43ep-24,
-0x1.bbf246p-4, 0x4.18ad78p-12, 0x4.9bfae8p-8 }, /* 16 */
{ 0x1.ca955ap+5, 0x1.ca9ec6p+5, 0x1.caa2a4p+5, 0x1.28453cp-24,
0x1.af9cb4p-4, -0x3.c3a494p-12, -0x4.78b69p-8 }, /* 17 */
{ 0x1.e3bc94p+5, 0x1.e3c174p+5, 0x1.e3c64p+5, -0x2.e7fef4p-24,
-0x1.a4407ep-4, 0x3.79b228p-12, 0x4.874f7p-8 }, /* 18 */
{ 0x1.fcdf16p+5, 0x1.fce40ep+5, 0x1.fce71p+5, -0x3.23b2fcp-24,
0x1.99be76p-4, -0x3.39ad7cp-12, -0x4.92a55p-8 }, /* 19 */
{ 0x1.0afe34p+6, 0x1.0b034ep+6, 0x1.0b054ap+6, -0xd.19e93p-28,
-0x1.8ffc9cp-4, 0x2.fee7f8p-12, 0x4.2d33b8p-8 }, /* 20 */
{ 0x1.179344p+6, 0x1.17948ep+6, 0x1.1795bp+6, 0x1.c2ac48p-24,
0x1.86e51cp-4, -0x2.cc5abp-12, -0x4.866d08p-8 }, /* 21 */
{ 0x1.24231ep+6, 0x1.2425c8p+6, 0x1.2426e2p+6, -0xd.31027p-28,
-0x1.7e656ep-4, 0x2.9db23cp-12, 0x3.cc63c8p-8 }, /* 22 */
{ 0x1.30b5a8p+6, 0x1.30b6fep+6, 0x1.30b84ep+6, 0x5.b5e53p-24,
0x1.766dc2p-4, -0x2.754cfcp-12, -0x3.c39bb4p-8 }, /* 23 */
{ 0x1.3d46ccp+6, 0x1.3d482ep+6, 0x1.3d495ep+6, -0x1.340a8ap-24,
-0x1.6ef07ep-4, 0x2.4ff9d4p-12, 0x3.9b36e4p-8 }, /* 24 */
{ 0x1.49d688p+6, 0x1.49d95ap+6, 0x1.49dabep+6, -0x3.ba66p-24,
0x1.67e1dcp-4, -0x2.2f32b8p-12, -0x3.e2aaf4p-8 }, /* 25 */
{ 0x1.566916p+6, 0x1.566a84p+6, 0x1.566bcp+6, 0x6.47ca5p-28,
-0x1.61379ap-4, 0x2.1096acp-12, 0x4.2d0968p-8 }, /* 26 */
{ 0x1.62f8dap+6, 0x1.62fbaap+6, 0x1.62fc9cp+6, -0x2.12affp-24,
0x1.5ae8c4p-4, -0x1.f32444p-12, -0x3.9e592p-8 }, /* 27 */
{ 0x1.6f89e6p+6, 0x1.6f8ccep+6, 0x1.6f8e34p+6, -0x7.4853ap-28,
-0x1.54ed76p-4, 0x1.db004ap-12, 0x3.907034p-8 }, /* 28 */
{ 0x1.7c1c6ap+6, 0x1.7c1deep+6, 0x1.7c1f4cp+6, -0x4.f0a998p-24,
0x1.4f3ebcp-4, -0x1.c26808p-12, -0x2.da8df8p-8 }, /* 29 */
{ 0x1.88adaep+6, 0x1.88af0ep+6, 0x1.88afc4p+6, -0x1.80c246p-24,
-0x1.49d668p-4, 0x1.aebc26p-12, 0x3.af7b5cp-8 }, /* 30 */
{ 0x1.953d7p+6, 0x1.95402ap+6, 0x1.9540ep+6, -0x2.22aff8p-24,
0x1.44aefap-4, -0x1.99f25p-12, -0x3.5e9198p-8 }, /* 31 */
{ 0x1.a1d01ep+6, 0x1.a1d146p+6, 0x1.a1d20ap+6, -0x3.aad6d4p-24,
-0x1.3fc386p-4, 0x1.892858p-12, 0x3.fe0184p-8 }, /* 32 */
{ 0x1.ae60ecp+6, 0x1.ae625ep+6, 0x1.ae6326p+6, -0x2.010be4p-24,
0x1.3b0fa4p-4, -0x1.7539ap-12, -0x2.b2c9bp-8 }, /* 33 */
{ 0x1.baf234p+6, 0x1.baf376p+6, 0x1.baf442p+6, -0xd.4fd17p-32,
-0x1.368f5cp-4, 0x1.6734e4p-12, 0x3.59f514p-8 }, /* 34 */
{ 0x1.c782e6p+6, 0x1.c7848cp+6, 0x1.c78516p+6, -0xa.d662dp-28,
0x1.323f18p-4, -0x1.571c02p-12, -0x3.2be5bp-8 }, /* 35 */
{ 0x1.d4144ep+6, 0x1.d415ap+6, 0x1.d41622p+6, 0x4.9f217p-24,
-0x1.2e1b9ap-4, 0x1.4a2edap-12, 0x3.a4e96p-8 }, /* 36 */
{ 0x1.e0a5ep+6, 0x1.e0a6b4p+6, 0x1.e0a788p+6, -0x2.d167p-24,
0x1.2a21eep-4, -0x1.3c4b46p-12, -0x4.9e0978p-8 }, /* 37 */
{ 0x1.ed36eep+6, 0x1.ed37c8p+6, 0x1.ed3892p+6, -0x4.15a83p-24,
-0x1.264f66p-4, 0x1.31dea4p-12, 0x3.d125ecp-8 }, /* 38 */
{ 0x1.f9c77p+6, 0x1.f9c8d8p+6, 0x1.f9c9acp+6, -0x2.a5bbbp-24,
0x1.22a192p-4, -0x1.25e59ep-12, -0x2.ef6934p-8 }, /* 39 */
{ 0x1.032c54p+7, 0x1.032cf4p+7, 0x1.032d66p+7, 0x4.e828bp-24,
-0x1.1f1634p-4, 0x1.1c2394p-12, 0x3.6d744cp-8 }, /* 40 */
{ 0x1.09750cp+7, 0x1.09757cp+7, 0x1.0975b6p+7, -0x3.28a3bcp-24,
0x1.1bab3ep-4, -0x1.1569cep-12, -0x5.84da7p-8 }, /* 41 */
{ 0x1.0fbd9ap+7, 0x1.0fbe04p+7, 0x1.0fbe5ep+7, -0x2.2f667p-24,
-0x1.185eccp-4, 0x1.07f42cp-12, 0x2.87896cp-8 }, /* 42 */
{ 0x1.160628p+7, 0x1.16068ap+7, 0x1.1606cep+7, -0x6.9097dp-24,
0x1.152f28p-4, -0x1.0227fep-12, -0x5.da6e6p-8 }, /* 43 */
{ 0x1.1c4e9ap+7, 0x1.1c4f12p+7, 0x1.1c4f7cp+7, -0x5.1b408p-24,
-0x1.121abp-4, 0xf.6efcp-16, 0x2.c5e954p-8 }, /* 44 */
{ 0x1.2296aap+7, 0x1.229798p+7, 0x1.2297d4p+7, 0x2.70d0dp-24,
0x1.0f1ffp-4, -0xf.523f5p-16, -0x3.5c0568p-8 }, /* 45 */
{ 0x1.28dfa4p+7, 0x1.28e01ep+7, 0x1.28e054p+7, -0x2.7c176p-24,
-0x1.0c3d8ap-4, 0xe.8329ap-16, 0x3.5eb34p-8 }, /* 46 */
{ 0x1.2f282ap+7, 0x1.2f28a4p+7, 0x1.2f28dep+7, 0x4.fd6368p-24,
0x1.097236p-4, -0xe.17299p-16, -0x3.73a2e4p-8 }, /* 47 */
{ 0x1.3570bp+7, 0x1.357128p+7, 0x1.35716p+7, 0x6.b05f68p-24,
-0x1.06bccap-4, 0xd.527b8p-16, 0x2.b8bf9cp-8 }, /* 48 */
{ 0x1.3bb932p+7, 0x1.3bb9aep+7, 0x1.3bb9eap+7, 0x4.0d622p-28,
0x1.041c28p-4, -0xd.0ac11p-16, -0x1.65f2ccp-8 }, /* 49 */
{ 0x1.4201b6p+7, 0x1.420232p+7, 0x1.42027p+7, 0x7.0d98cp-24,
-0x1.018f52p-4, 0xc.c4d8ep-16, 0x2.8f250cp-8 }, /* 50 */
{ 0x1.484a78p+7, 0x1.484ab8p+7, 0x1.484af4p+7, 0x3.93d10cp-24,
0xf.f154fp-8, -0xc.7b7fep-16, -0x3.6b6e4cp-8 }, /* 51 */
{ 0x1.4e92c8p+7, 0x1.4e933cp+7, 0x1.4e9368p+7, 0xd.88185p-32,
-0xf.cad3fp-8, 0xc.1462p-16, 0x2.bd66p-8 }, /* 52 */
{ 0x1.54db84p+7, 0x1.54dbcp+7, 0x1.54dbf4p+7, -0x1.fe6b92p-24,
0xf.a564cp-8, -0xb.c4e6cp-16, -0x3.d51decp-8 }, /* 53 */
{ 0x1.5b23c4p+7, 0x1.5b2444p+7, 0x1.5b2486p+7, 0x2.6137f4p-24,
-0xf.80faep-8, 0xb.5199ep-16, 0x1.9ca85ap-8 }, /* 54 */
{ 0x1.616c62p+7, 0x1.616cc8p+7, 0x1.616d0ap+7, -0x1.55468p-24,
0xf.5d8acp-8, -0xb.21d16p-16, -0x1.b8809ap-8 }, /* 55 */
{ 0x1.67b4fp+7, 0x1.67b54cp+7, 0x1.67b588p+7, -0x1.08c6bep-24,
-0xf.3b096p-8, 0xa.e65efp-16, 0x3.642304p-8 }, /* 56 */
{ 0x1.6dfd8ep+7, 0x1.6dfddp+7, 0x1.6dfe0ap+7, 0x4.9ebfa8p-24,
0xf.196c7p-8, -0xa.ba8c8p-16, -0x5.ad6a2p-8 }, /* 57 */
{ 0x1.74461p+7, 0x1.744652p+7, 0x1.744692p+7, 0x5.a4017p-24,
-0xe.f8aa5p-8, 0xa.49748p-16, 0x2.a86498p-8 }, /* 58 */
{ 0x1.7a8e5ep+7, 0x1.7a8ed6p+7, 0x1.7a8ef8p+7, 0x3.bcb2a8p-28,
0xe.d8b9dp-8, -0x9.c43bep-16, -0x1.e7124ap-8 }, /* 59 */
{ 0x1.80d6cep+7, 0x1.80d75ap+7, 0x1.80d78ap+7, -0x7.1091fp-24,
-0xe.b9925p-8, 0x9.c43dap-16, 0x1.aba86p-8 }, /* 60 */
{ 0x1.871f58p+7, 0x1.871fdcp+7, 0x1.87201ep+7, 0x2.ca1cf4p-28,
0xe.9b2bep-8, -0x9.843b3p-16, -0x2.093e68p-8 }, /* 61 */
{ 0x1.8d67e8p+7, 0x1.8d685ep+7, 0x1.8d688ep+7, 0x5.aa8908p-24,
-0xe.7d7ecp-8, 0x9.501a8p-16, 0x2.54a754p-8 }, /* 62 */
{ 0x1.93b09cp+7, 0x1.93b0e2p+7, 0x1.93b10ep+7, 0x3.d9cd9cp-24,
0xe.6083ap-8, -0x9.45dadp-16, -0x5.112908p-8 }, /* 63 */
};
/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
j1f(x) ~ sqrt(2/(pi*x))*beta1(x)*cos(x-3pi/4-alpha1(x))
where beta1(x) = 1 + 3/(16*x^2) - 99/(512*x^4)
and alpha1(x) = -3/(8*x) + 21/(128*x^3) - 1899/(5120*x^5). */
static float
j1f_asympt (float x)
{
float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
if (x < 0)
{
x = -x;
cst = -cst;
}
double y = 1.0 / (double) x;
double y2 = y * y;
double beta1 = 1.0f + y2 * (0x3p-4 - 0x3.18p-4 * y2);
double alpha1;
alpha1 = y * (-0x6p-4 + y2 * (0x2.ap-4 - 0x5.ef33333333334p-4 * y2));
double h;
int n;
h = reduce_aux (x, &n, alpha1);
n--; /* Subtract pi/2. */
/* Now x - 3pi/4 - alpha1 = h + n*pi/2 mod (2*pi). */
float xr = (float) h;
n = n & 3;
float t = cst / sqrtf (x) * (float) beta1;
if (n == 0)
return t * __cosf (xr);
else if (n == 2) /* cos(x+pi) = -cos(x) */
return -t * __cosf (xr);
else if (n == 1) /* cos(x+pi/2) = -sin(x) */
return -t * __sinf (xr);
else /* cos(x+3pi/2) = sin(x) */
return t * __sinf (xr);
}
/* Special code for x near a root of j1.
z is the value computed by the generic code.
For small x, we use a polynomial approximating j1 around its root.
For large x, we use an asymptotic formula (j1f_asympt). */
static float
j1f_near_root (float x, float z)
{
float index_f, sign = 1.0f;
int index;
if (x < 0)
{
x = -x;
sign = -1.0f;
}
index_f = roundf ((x - FIRST_ZERO_J1) / M_PIf);
if (index_f >= SMALL_SIZE)
return sign * j1f_asympt (x);
index = (int) index_f;
const float *p = Pj[index];
float x0 = p[0];
float x1 = p[2];
/* If not in the interval [x0,x1] around xmid, return the value z. */
if (! (x0 <= x && x <= x1))
return z;
float xmid = p[1];
float y = x - xmid;
float p6 = (index > 0) ? p[6] : p[6] + y * -0x1.3e51aap-8f;
return sign * (p[3] + y * (p[4] + y * (p[5] + y * p6)));
}
float
__ieee754_j1f(float x)
{
float z, s,c,ss,cc,r,u,v,y;
int32_t hx,ix;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
if(__builtin_expect(ix>=0x7f800000, 0)) return one/x;
y = fabsf(x);
if(ix >= 0x40000000) { /* |x| >= 2.0 */
SET_RESTORE_ROUNDF (FE_TONEAREST);
__sincosf (y, &s, &c);
ss = -s-c;
cc = s-c;
if (ix >= 0x7f000000)
/* x >= 2^127: use asymptotic expansion. */
return j1f_asympt (x);
/* Now we are sure that x+x cannot overflow. */
z = __cosf(y+y);
if ((s*c)>zero) cc = z/ss;
else ss = z/cc;
/*
* j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
* y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
*/
if (ix <= 0x5c000000)
{
u = ponef(y); v = qonef(y);
cc = u*cc-v*ss;
}
z = (invsqrtpi * cc) / sqrtf(y);
/* Adjust sign of z. */
z = (hx < 0) ? -z : z;
/* The following threshold is optimal: for x=0x1.e09e5ep+1
and rounding upwards, cc=0x1.b79638p-4 and z is 10 ulps
far from the correctly rounded value. */
float threshold = 0x1.b79638p-4;
if (fabsf (cc) > threshold)
return z;
else
return j1f_near_root (x, z);
}
if(__builtin_expect(ix<0x32000000, 0)) { /* |x|<2**-27 */
if(huge+x>one) { /* inexact if x!=0 necessary */
float ret = math_narrow_eval ((float) 0.5 * x);
math_check_force_underflow (ret);
if (ret == 0 && x != 0)
__set_errno (ERANGE);
return ret;
}
}
z = x*x;
r = z*(r00+z*(r01+z*(r02+z*r03)));
s = one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
r *= x;
return(x*(float)0.5+r/s);
}
libm_alias_finite (__ieee754_j1f, __j1f)
static const float U0[5] = {
-1.9605709612e-01, /* 0xbe48c331 */
5.0443872809e-02, /* 0x3d4e9e3c */
-1.9125689287e-03, /* 0xbafaaf2a */
2.3525259166e-05, /* 0x37c5581c */
-9.1909917899e-08, /* 0xb3c56003 */
};
static const float V0[5] = {
1.9916731864e-02, /* 0x3ca3286a */
2.0255257550e-04, /* 0x3954644b */
1.3560879779e-06, /* 0x35b602d4 */
6.2274145840e-09, /* 0x31d5f8eb */
1.6655924903e-11, /* 0x2d9281cf */
};
/* This is the nearest approximation of the first zero of y1. */
#define FIRST_ZERO_Y1 0x2.3277dcp+0f
/* The following table contains successive zeros of y1 and degree-3
polynomial approximations of y1 around these zeros: Py[0] for the first
positive zero (2.197141), Py[1] for the second one (5.429681), and so on.
Each line contains:
{x0, xmid, x1, p0, p1, p2, p3}
where [x0,x1] is the interval around the zero, xmid is the binary32 number
closest to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation
polynomial. Each polynomial was generated using Sollya on the interval
[x0,x1] around the corresponding zero where the error exceeds 9 ulps
for the alternate code. Degree 3 is enough, except for the first roots.
*/
static const float Py[SMALL_SIZE][7] = {
/* For index 0, we use a degree-5 polynomial generated by Sollya, with the
coefficients of degree 4 and 5 hard-coded in y1f_near_root(). */
{ 0x1.f7f16ap+0, 0x1.193beep+1, 0x1.2105dcp+1, 0xb.96749p-28,
0x8.55241p-4, -0x1.e570bp-4, -0x8.68b61p-8
/*, -0x1.28043p-8, 0x2.50e83p-8*/ }, /* 0 */
/* For index 1, we use a degree-4 polynomial generated by Sollya, with the
coefficient of degree 4 hard-coded in y1f_near_root(). */
{ 0x1.55c6d2p+2, 0x1.5b7fe4p+2, 0x1.5cf8cap+2, 0x1.3c7822p-24,
-0x5.71f158p-4, 0x8.05cb4p-8, 0xd.0b15p-8/*, -0xf.ff6b8p-12*/ }, /* 1 */
{ 0x1.113c6p+3, 0x1.13127ap+3, 0x1.1387dcp+3, -0x1.f3ad8ep-24,
0x4.57e66p-4, -0x4.0afb58p-8, -0xb.29207p-8 }, /* 2 */
{ 0x1.76e7dep+3, 0x1.77f914p+3, 0x1.786a6ap+3, -0xd.5608fp-28,
-0x3.b829d4p-4, 0x2.8852cp-8, 0x9.b70e3p-8 }, /* 3 */
{ 0x1.dc2794p+3, 0x1.dcb7d8p+3, 0x1.dd032p+3, -0xe.a7c04p-28,
0x3.4e0458p-4, -0x1.c64b18p-8, -0x8.b0e7fp-8 }, /* 4 */
{ 0x1.20874p+4, 0x1.20b1c6p+4, 0x1.20c71p+4, 0x1.c2626p-24,
-0x3.00f03cp-4, 0x1.54f806p-8, 0x7.f9cf9p-8 }, /* 5 */
{ 0x1.52d848p+4, 0x1.530254p+4, 0x1.531962p+4, -0x1.9503ecp-24,
0x2.c5b29cp-4, -0x1.0bf28p-8, -0x7.562e58p-8 }, /* 6 */
{ 0x1.851e64p+4, 0x1.854fa4p+4, 0x1.85679p+4, -0x2.8d40fcp-24,
-0x2.96547p-4, 0xd.9c38bp-12, 0x6.dcbf8p-8 }, /* 7 */
{ 0x1.b7808ep+4, 0x1.b79acep+4, 0x1.b7b2a8p+4, -0x2.36df5cp-24,
0x2.6f55ap-4, -0xb.57f9fp-12, -0x6.82569p-8 }, /* 8 */
{ 0x1.e9c8fp+4, 0x1.e9e48p+4, 0x1.e9f24p+4, 0xd.e2eb7p-28,
-0x2.4e8104p-4, 0x9.a4be2p-12, 0x6.2541fp-8 }, /* 9 */
{ 0x1.0e0808p+5, 0x1.0e169p+5, 0x1.0e1d92p+5, -0x2.3070f4p-24,
0x2.325e4cp-4, -0x8.53604p-12, -0x5.ca03a8p-8 }, /* 10 */
{ 0x1.272e08p+5, 0x1.273a7cp+5, 0x1.2741fcp+5, -0x3.525508p-24,
-0x2.19e7dcp-4, 0x7.49d1dp-12, 0x5.9cb02p-8 }, /* 11 */
{ 0x1.404ec6p+5, 0x1.405e18p+5, 0x1.4065cep+5, -0xe.6e158p-28,
0x2.046174p-4, -0x6.71b3dp-12, -0x5.4c3c8p-8 }, /* 12 */
{ 0x1.5971dcp+5, 0x1.598178p+5, 0x1.598592p+5, 0x1.e72698p-24,
-0x1.f13fb2p-4, 0x5.c0f938p-12, 0x5.28ca78p-8 }, /* 13 */
{ 0x1.729c4ep+5, 0x1.72a4a8p+5, 0x1.72a8eap+5, -0x1.5bed9cp-24,
0x1.e018dcp-4, -0x5.2f11e8p-12, -0x5.16ce48p-8 }, /* 14 */
{ 0x1.8bbf4ep+5, 0x1.8bc7b2p+5, 0x1.8bcc1p+5, -0x3.6b654cp-24,
-0x1.d09b2p-4, 0x4.b1747p-12, 0x4.bd22fp-8 }, /* 15 */
{ 0x1.a4e272p+5, 0x1.a4ea9ap+5, 0x1.a4eef4p+5, 0x1.6f11bp-24,
0x1.c28612p-4, -0x4.47462p-12, -0x4.947c5p-8 }, /* 16 */
{ 0x1.be08bep+5, 0x1.be0d68p+5, 0x1.be1088p+5, -0x2.0bc074p-24,
-0x1.b5a622p-4, 0x3.ed52d4p-12, 0x4.b76fc8p-8 }, /* 17 */
{ 0x1.d7272ap+5, 0x1.d7301ep+5, 0x1.d734aep+5, -0x2.87dd4p-24,
0x1.a9d184p-4, -0x3.9cf494p-12, -0x4.6303ep-8 }, /* 18 */
{ 0x1.f0499ap+5, 0x1.f052c4p+5, 0x1.f05758p+5, -0x2.fb964p-24,
-0x1.9ee5eep-4, 0x3.5800dp-12, 0x4.4e9f9p-8 }, /* 19 */
{ 0x1.04b63ap+6, 0x1.04baacp+6, 0x1.04bc92p+6, 0x2.cf5adp-24,
0x1.94c6f4p-4, -0x3.1a83e4p-12, -0x4.2311fp-8 }, /* 20 */
{ 0x1.1146dp+6, 0x1.114beep+6, 0x1.114e12p+6, 0x3.6766fp-24,
-0x1.8b5cccp-4, 0x2.e4a4e4p-12, 0x4.20bf9p-8 }, /* 21 */
{ 0x1.1dda8cp+6, 0x1.1ddd2cp+6, 0x1.1dde7ap+6, 0x3.501424p-24,
0x1.829356p-4, -0x2.b47524p-12, -0x4.04bf18p-8 }, /* 22 */
{ 0x1.2a6bcp+6, 0x1.2a6e64p+6, 0x1.2a6faap+6, -0x5.c05808p-24,
-0x1.7a597ep-4, 0x2.8a0498p-12, 0x4.187258p-8 }, /* 23 */
{ 0x1.36fcd6p+6, 0x1.36ff96p+6, 0x1.3700f6p+6, 0x7.1e1478p-28,
0x1.72a09ap-4, -0x2.61a7fp-12, -0x3.c0b54p-8 }, /* 24 */
{ 0x1.438f46p+6, 0x1.4390c4p+6, 0x1.4392p+6, 0x3.e36e6cp-24,
-0x1.6b5c06p-4, 0x2.3f612p-12, 0x4.18f868p-8 }, /* 25 */
{ 0x1.501f4cp+6, 0x1.5021fp+6, 0x1.50235p+6, 0x1.3f9e5ap-24,
0x1.6480c4p-4, -0x2.1f28fcp-12, -0x3.bb4e3cp-8 }, /* 26 */
{ 0x1.5cb07cp+6, 0x1.5cb318p+6, 0x1.5cb464p+6, -0x2.39e41cp-24,
-0x1.5e0544p-4, 0x2.0189f4p-12, 0x3.8b55acp-8 }, /* 27 */
{ 0x1.694166p+6, 0x1.69443cp+6, 0x1.694594p+6, -0x2.912f84p-24,
0x1.57e12p-4, -0x1.e6fabep-12, -0x3.850174p-8 }, /* 28 */
{ 0x1.75d27cp+6, 0x1.75d55ep+6, 0x1.75d67ep+6, 0x3.d5b00cp-24,
-0x1.520ceep-4, 0x1.d0286ep-12, 0x3.8e7d1p-8 }, /* 29 */
{ 0x1.82653ep+6, 0x1.82667ep+6, 0x1.82674p+6, -0x3.1726ecp-24,
0x1.4c8222p-4, -0x1.b98206p-12, -0x3.f34978p-8 }, /* 30 */
{ 0x1.8ef4b4p+6, 0x1.8ef79cp+6, 0x1.8ef888p+6, 0x1.949e22p-24,
-0x1.473ae6p-4, 0x1.a47388p-12, 0x3.69eefcp-8 }, /* 31 */
{ 0x1.9b8728p+6, 0x1.9b88b8p+6, 0x1.9b896cp+6, -0x5.5553bp-28,
0x1.42320ap-4, -0x1.90f0b8p-12, -0x3.6565p-8 }, /* 32 */
{ 0x1.a8183cp+6, 0x1.a819d2p+6, 0x1.a81aecp+6, 0x3.2df7ecp-28,
-0x1.3d62e4p-4, 0x1.7dae28p-12, 0x2.9eb128p-8 }, /* 33 */
{ 0x1.b4aa1cp+6, 0x1.b4aaeap+6, 0x1.b4abb8p+6, -0x1.e13fcep-24,
0x1.38c948p-4, -0x1.6eb0ecp-12, -0x1.f9ddf8p-8 }, /* 34 */
{ 0x1.c13a7ap+6, 0x1.c13c02p+6, 0x1.c13cbp+6, -0x3.ad9974p-24,
-0x1.34616ep-4, 0x1.5e36ecp-12, 0x2.a9fc5p-8 }, /* 35 */
{ 0x1.cdcb76p+6, 0x1.cdcd16p+6, 0x1.cdcde4p+6, -0x3.6977e8p-24,
0x1.3027fp-4, -0x1.4f703p-12, -0x2.9817d4p-8 }, /* 36 */
{ 0x1.da5cdep+6, 0x1.da5e2ap+6, 0x1.da5efp+6, 0x4.654cbp-24,
-0x1.2c19b6p-4, 0x1.455982p-12, 0x3.f1c564p-8 }, /* 37 */
{ 0x1.e6edccp+6, 0x1.e6ef3ep+6, 0x1.e6f00ap+6, 0x8.825c8p-32,
0x1.2833eep-4, -0x1.39097p-12, -0x3.b2646p-8 }, /* 38 */
{ 0x1.f37f72p+6, 0x1.f3805p+6, 0x1.f3812ap+6, -0x2.0d11d8p-28,
-0x1.24740ap-4, 0x1.2c16p-12, 0x1.fc3804p-8 }, /* 39 */
{ 0x1.000842p+7, 0x1.0008bp+7, 0x1.000908p+7, -0x4.4e495p-24,
0x1.20d7b6p-4, -0x1.20816p-12, -0x2.d1ebe8p-8 }, /* 40 */
{ 0x1.06505cp+7, 0x1.065138p+7, 0x1.06518p+7, 0x4.81c1c8p-24,
-0x1.1d5ccap-4, 0x1.17ad5ap-12, 0x2.fda33p-8 }, /* 41 */
{ 0x1.0c98dap+7, 0x1.0c99cp+7, 0x1.0c9a28p+7, -0xe.99386p-28,
0x1.1a015p-4, -0x1.0bd50ap-12, -0x2.9dfb68p-8 }, /* 42 */
{ 0x1.12e212p+7, 0x1.12e248p+7, 0x1.12e29p+7, -0x6.16f1c8p-24,
-0x1.16c37ap-4, 0x1.0303dcp-12, 0x4.34316p-8 }, /* 43 */
{ 0x1.192a68p+7, 0x1.192acep+7, 0x1.192b02p+7, -0x1.129336p-24,
0x1.13a19ep-4, -0xf.bd247p-16, -0x3.851d18p-8 }, /* 44 */
{ 0x1.1f727p+7, 0x1.1f7354p+7, 0x1.1f73ap+7, 0x5.19c09p-24,
-0x1.109a32p-4, 0xf.09644p-16, 0x2.d78194p-8 }, /* 45 */
{ 0x1.25bb8p+7, 0x1.25bbdap+7, 0x1.25bc12p+7, -0x6.497dp-24,
0x1.0dabc8p-4, -0xe.a1d25p-16, -0x2.3378bp-8 }, /* 46 */
{ 0x1.2c04p+7, 0x1.2c046p+7, 0x1.2c04ap+7, 0x4.e4f338p-24,
-0x1.0ad512p-4, 0xe.52d84p-16, 0x4.3bfa08p-8 }, /* 47 */
{ 0x1.324cbp+7, 0x1.324ce6p+7, 0x1.324d4p+7, -0x1.287c58p-24,
0x1.0814d4p-4, -0xe.03a95p-16, 0x3.9930ap-12 }, /* 48 */
{ 0x1.3894f6p+7, 0x1.38956cp+7, 0x1.3895ap+7, -0x4.b594ep-24,
-0x1.0569fp-4, 0xd.6787ep-16, 0x4.0a5148p-8 }, /* 49 */
{ 0x1.3edd98p+7, 0x1.3eddfp+7, 0x1.3ede2ap+7, -0x3.a8f164p-24,
0x1.02d354p-4, -0xd.0309dp-16, -0x3.2ebfb4p-8 }, /* 50 */
{ 0x1.452638p+7, 0x1.452676p+7, 0x1.4526b4p+7, -0x6.12505p-24,
-0x1.005004p-4, 0xc.a0045p-16, 0x4.87c67p-8 }, /* 51 */
{ 0x1.4b6e8p+7, 0x1.4b6efap+7, 0x1.4b6f34p+7, 0x1.8acf4ep-24,
0xf.ddf16p-8, -0xc.2d207p-16, -0x1.da6c36p-8 }, /* 52 */
{ 0x1.51b742p+7, 0x1.51b77ep+7, 0x1.51b7b2p+7, 0x1.39cf86p-24,
-0xf.b7faep-8, 0xb.db598p-16, -0x8.945b1p-12 }, /* 53 */
{ 0x1.57ffc4p+7, 0x1.580002p+7, 0x1.58003cp+7, -0x2.5f8de8p-24,
0xf.930fep-8, -0xb.91889p-16, -0xa.30df9p-12 }, /* 54 */
{ 0x1.5e483p+7, 0x1.5e4886p+7, 0x1.5e48c8p+7, 0x2.073d64p-24,
-0xf.6f245p-8, 0xb.4085fp-16, 0x2.128188p-8 }, /* 55 */
{ 0x1.64908cp+7, 0x1.64910ap+7, 0x1.64912ap+7, -0x4.ed26ep-28,
0xf.4c2cep-8, -0xa.fe719p-16, -0x2.9374b8p-8 }, /* 56 */
{ 0x1.6ad91ep+7, 0x1.6ad98ep+7, 0x1.6ad9cep+7, -0x2.ae5204p-24,
-0xf.2a1efp-8, 0xa.aa585p-16, 0x2.1c0834p-8 }, /* 57 */
{ 0x1.7121cep+7, 0x1.712212p+7, 0x1.712238p+7, 0x6.d72168p-24,
0xf.08f09p-8, -0xa.7da49p-16, -0x3.4f5f1cp-8 }, /* 58 */
{ 0x1.776a0cp+7, 0x1.776a94p+7, 0x1.776accp+7, 0x2.d3f294p-24,
-0xe.e8986p-8, 0xa.23ccdp-16, 0x2.2a6678p-8 }, /* 59 */
{ 0x1.7db2e8p+7, 0x1.7db318p+7, 0x1.7db35ap+7, 0x3.88c0fp-24,
0xe.c90d7p-8, -0x9.eaeap-16, -0x2.86438cp-8 }, /* 60 */
{ 0x1.83fb56p+7, 0x1.83fb9ap+7, 0x1.83fbep+7, 0x3.d94d34p-24,
-0xe.aa478p-8, 0x9.abac7p-16, 0x1.ac2d84p-8 }, /* 61 */
{ 0x1.8a43e8p+7, 0x1.8a441ep+7, 0x1.8a446p+7, 0x4.66b7ep-24,
0xe.8c3e9p-8, -0x9.87682p-16, -0x7.9ab4a8p-12 }, /* 62 */
{ 0x1.908c6p+7, 0x1.908cap+7, 0x1.908ce6p+7, 0xf.f7ac9p-28,
-0xe.6eeb6p-8, 0x9.4423p-16, 0x4.54c4d8p-8 }, /* 63 */
};
/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
y1f(x) ~ sqrt(2/(pi*x))*beta1(x)*sin(x-3pi/4-alpha1(x))
where beta1(x) = 1 + 3/(16*x^2) - 99/(512*x^4)
and alpha1(x) = -3/(8*x) + 21/(128*x^3) - 1899/(5120*x^5). */
static float
y1f_asympt (float x)
{
float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
double y = 1.0 / (double) x;
double y2 = y * y;
double beta1 = 1.0f + y2 * (0x3p-4 - 0x3.18p-4 * y2);
double alpha1;
alpha1 = y * (-0x6p-4 + y2 * (0x2.ap-4 - 0x5.ef33333333334p-4 * y2));
double h;
int n;
h = reduce_aux (x, &n, alpha1);
n--; /* Subtract pi/2. */
/* Now x - 3pi/4 - alpha1 = h + n*pi/2 mod (2*pi). */
float xr = (float) h;
n = n & 3;
float t = cst / sqrtf (x) * (float) beta1;
if (n == 0)
return t * __sinf (xr);
else if (n == 2) /* sin(x+pi) = -sin(x) */
return -t * __sinf (xr);
else if (n == 1) /* sin(x+pi/2) = cos(x) */
return t * __cosf (xr);
else /* sin(x+3pi/2) = -cos(x) */
return -t * __cosf (xr);
}
/* Special code for x near a root of y1.
z is the value computed by the generic code.
For small x, we use a polynomial approximating y1 around its root.
For large x, we use an asymptotic formula (y1f_asympt). */
static float
y1f_near_root (float x, float z)
{
float index_f;
int index;
index_f = roundf ((x - FIRST_ZERO_Y1) / M_PIf);
if (index_f >= SMALL_SIZE)
return y1f_asympt (x);
index = (int) index_f;
const float *p = Py[index];
float x0 = p[0];
float x1 = p[2];
/* If not in the interval [x0,x1] around xmid, return the value z. */
if (! (x0 <= x && x <= x1))
return z;
float xmid = p[1];
float y = x - xmid, p6;
if (index == 0)
p6 = p[6] + y * (-0x1.28043p-8 + y * 0x2.50e83p-8);
else if (index == 1)
p6 = p[6] + y * -0xf.ff6b8p-12;
else
p6 = p[6];
return p[3] + y * (p[4] + y * (p[5] + y * p6));
}
float
__ieee754_y1f(float x)
{
float z, s,c,ss,cc,u,v;
int32_t hx,ix;
GET_FLOAT_WORD(hx,x);
ix = 0x7fffffff&hx;
/* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
if(__builtin_expect(ix>=0x7f800000, 0)) return one/(x+x*x);
if(__builtin_expect(ix==0, 0))
return -1/zero; /* -inf and divide by zero exception. */
if(__builtin_expect(hx<0, 0)) return zero/(zero*x);
if (ix >= 0x3fe0dfbc) { /* |x| >= 0x1.c1bf78p+0 */
SET_RESTORE_ROUNDF (FE_TONEAREST);
__sincosf (x, &s, &c);
ss = -s-c;
cc = s-c;
if (ix >= 0x7f000000)
/* x >= 2^127: use asymptotic expansion. */
return y1f_asympt (x);
/* Now we are sure that x+x cannot overflow. */
z = __cosf(x+x);
if ((s*c)>zero) cc = z/ss;
else ss = z/cc;
/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
* where x0 = x-3pi/4
* Better formula:
* cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
* = 1/sqrt(2) * (sin(x) - cos(x))
* sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
* = -1/sqrt(2) * (cos(x) + sin(x))
* To avoid cancellation, use
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.
*/
if (ix <= 0x5c000000)
{
u = ponef(x); v = qonef(x);
ss = u*ss+v*cc;
}
z = (invsqrtpi * ss) / sqrtf(x);
float threshold = 0x1.3e014cp-2;
/* The following threshold is optimal: for x=0x1.f7f16ap+0
and rounding upwards, |ss|=-0x1.3e014cp-2 and z is 11 ulps
far from the correctly rounded value. */
if (fabsf (ss) > threshold)
return z;
else
return y1f_near_root (x, z);
}
if(__builtin_expect(ix<=0x33000000, 0)) { /* x < 2**-25 */
z = -tpi / x;
if (isinf (z))
__set_errno (ERANGE);
return z;
}
/* Now 2**-25 <= x < 0x1.c1bf78p+0. */
z = x*x;
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
return(x*(u/v) + tpi*(__ieee754_j1f(x)*__ieee754_logf(x)-one/x));
}
libm_alias_finite (__ieee754_y1f, __y1f)
/* For x >= 8, the asymptotic expansion of pone is
* 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x.
* We approximate pone by
* pone(x) = 1 + (R/S)
* where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10
* S = 1 + ps0*s^2 + ... + ps4*s^10
* and
* | pone(x)-1-R/S | <= 2 ** ( -60.06)
*/
static const float pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
0.0000000000e+00, /* 0x00000000 */
1.1718750000e-01, /* 0x3df00000 */
1.3239480972e+01, /* 0x4153d4ea */
4.1205184937e+02, /* 0x43ce06a3 */
3.8747453613e+03, /* 0x45722bed */
7.9144794922e+03, /* 0x45f753d6 */
};
static const float ps8[5] = {
1.1420736694e+02, /* 0x42e46a2c */
3.6509309082e+03, /* 0x45642ee5 */
3.6956207031e+04, /* 0x47105c35 */
9.7602796875e+04, /* 0x47bea166 */
3.0804271484e+04, /* 0x46f0a88b */
};
static const float pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
1.3199052094e-11, /* 0x2d68333f */
1.1718749255e-01, /* 0x3defffff */
6.8027510643e+00, /* 0x40d9b023 */
1.0830818176e+02, /* 0x42d89dca */
5.1763616943e+02, /* 0x440168b7 */
5.2871520996e+02, /* 0x44042dc6 */
};
static const float ps5[5] = {
5.9280597687e+01, /* 0x426d1f55 */
9.9140142822e+02, /* 0x4477d9b1 */
5.3532670898e+03, /* 0x45a74a23 */
7.8446904297e+03, /* 0x45f52586 */
1.5040468750e+03, /* 0x44bc0180 */
};
static const float pr3[6] = {
3.0250391081e-09, /* 0x314fe10d */
1.1718686670e-01, /* 0x3defffab */
3.9329774380e+00, /* 0x407bb5e7 */
3.5119403839e+01, /* 0x420c7a45 */
9.1055007935e+01, /* 0x42b61c2a */
4.8559066772e+01, /* 0x42423c7c */
};
static const float ps3[5] = {
3.4791309357e+01, /* 0x420b2a4d */
3.3676245117e+02, /* 0x43a86198 */
1.0468714600e+03, /* 0x4482dbe3 */
8.9081134033e+02, /* 0x445eb3ed */
1.0378793335e+02, /* 0x42cf936c */
};
static const float pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
1.0771083225e-07, /* 0x33e74ea8 */
1.1717621982e-01, /* 0x3deffa16 */
2.3685150146e+00, /* 0x401795c0 */
1.2242610931e+01, /* 0x4143e1bc */
1.7693971634e+01, /* 0x418d8d41 */
5.0735230446e+00, /* 0x40a25a4d */
};
static const float ps2[5] = {
2.1436485291e+01, /* 0x41ab7dec */
1.2529022980e+02, /* 0x42fa9499 */
2.3227647400e+02, /* 0x436846c7 */
1.1767937469e+02, /* 0x42eb5bd7 */
8.3646392822e+00, /* 0x4105d590 */
};
static float
ponef(float x)
{
const float *p,*q;
float z,r,s;
int32_t ix;
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
/* ix >= 0x40000000 for all calls to this function. */
if(ix>=0x41000000) {p = pr8; q= ps8;}
else if(ix>=0x40f71c58){p = pr5; q= ps5;}
else if(ix>=0x4036db68){p = pr3; q= ps3;}
else {p = pr2; q= ps2;}
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
return one+ r/s;
}
/* For x >= 8, the asymptotic expansion of qone is
* 3/8 s - 105/1024 s^3 - ..., where s = 1/x.
* We approximate pone by
* qone(x) = s*(0.375 + (R/S))
* where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10
* S = 1 + qs1*s^2 + ... + qs6*s^12
* and
* | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13)
*/
static const float qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
0.0000000000e+00, /* 0x00000000 */
-1.0253906250e-01, /* 0xbdd20000 */
-1.6271753311e+01, /* 0xc1822c8d */
-7.5960174561e+02, /* 0xc43de683 */
-1.1849806641e+04, /* 0xc639273a */
-4.8438511719e+04, /* 0xc73d3683 */
};
static const float qs8[6] = {
1.6139537048e+02, /* 0x43216537 */
7.8253862305e+03, /* 0x45f48b17 */
1.3387534375e+05, /* 0x4802bcd6 */
7.1965775000e+05, /* 0x492fb29c */
6.6660125000e+05, /* 0x4922be94 */
-2.9449025000e+05, /* 0xc88fcb48 */
};
static const float qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
-2.0897993405e-11, /* 0xadb7d219 */
-1.0253904760e-01, /* 0xbdd1fffe */
-8.0564479828e+00, /* 0xc100e736 */
-1.8366960144e+02, /* 0xc337ab6b */
-1.3731937256e+03, /* 0xc4aba633 */
-2.6124443359e+03, /* 0xc523471c */
};
static const float qs5[6] = {
8.1276550293e+01, /* 0x42a28d98 */
1.9917987061e+03, /* 0x44f8f98f */
1.7468484375e+04, /* 0x468878f8 */
4.9851425781e+04, /* 0x4742bb6d */
2.7948074219e+04, /* 0x46da5826 */
-4.7191835938e+03, /* 0xc5937978 */
};
static const float qr3[6] = {
-5.0783124372e-09, /* 0xb1ae7d4f */
-1.0253783315e-01, /* 0xbdd1ff5b */
-4.6101160049e+00, /* 0xc0938612 */
-5.7847221375e+01, /* 0xc267638e */
-2.2824453735e+02, /* 0xc3643e9a */
-2.1921012878e+02, /* 0xc35b35cb */
};
static const float qs3[6] = {
4.7665153503e+01, /* 0x423ea91e */
6.7386511230e+02, /* 0x4428775e */
3.3801528320e+03, /* 0x45534272 */
5.5477290039e+03, /* 0x45ad5dd5 */
1.9031191406e+03, /* 0x44ede3d0 */
-1.3520118713e+02, /* 0xc3073381 */
};
static const float qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
-1.7838172539e-07, /* 0xb43f8932 */
-1.0251704603e-01, /* 0xbdd1f475 */
-2.7522056103e+00, /* 0xc0302423 */
-1.9663616180e+01, /* 0xc19d4f16 */
-4.2325313568e+01, /* 0xc2294d1f */
-2.1371921539e+01, /* 0xc1aaf9b2 */
};
static const float qs2[6] = {
2.9533363342e+01, /* 0x41ec4454 */
2.5298155212e+02, /* 0x437cfb47 */
7.5750280762e+02, /* 0x443d602e */
7.3939318848e+02, /* 0x4438d92a */
1.5594900513e+02, /* 0x431bf2f2 */
-4.9594988823e+00, /* 0xc09eb437 */
};
static float
qonef(float x)
{
const float *p,*q;
float s,r,z;
int32_t ix;
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
/* ix >= 0x40000000 for all calls to this function. */
if(ix>=0x41000000) {p = qr8; q= qs8;} /* x >= 8 */
else if(ix>=0x40f71c58){p = qr5; q= qs5;} /* x >= 7.722209930e+00 */
else if(ix>=0x4036db68){p = qr3; q= qs3;} /* x >= 2.857141495e+00 */
else {p = qr2; q= qs2;} /* x >= 2 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
return ((float).375 + r/s)/x;
}