math: Remove slow paths from asin and acos [BZ #15267]

This patch series removes all remaining slow paths and related code.
First asin/acos, tan, atan, atan2 implementations are updated, and the final
patch removes the unused mpa files, headers and probes. Passes buildmanyglibc.

Remove slow paths from asin/acos. Add ULP annotations based on previous slow
path checks (which are approximate). Update AArch64 and x86_64 libm-test-ulps.

Reviewed-By: Paul Zimmermann <Paul.Zimmermann@inria.fr>
This commit is contained in:
Wilco Dijkstra 2021-03-10 12:39:56 +00:00 committed by Wilco Dijkstra
parent 94caafa040
commit db3f7bb558
3 changed files with 47 additions and 300 deletions

View File

@ -57,7 +57,7 @@ float: 1
ldouble: 1
Function: "asin_upward":
double: 1
double: 2
float: 1
ldouble: 2

View File

@ -21,8 +21,7 @@
/* */
/* FUNCTIONS: uasin */
/* uacos */
/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h usncs.h */
/* doasin.c sincos32.c dosincos.c mpa.c */
/* FILES NEEDED: dla.h endian.h mydefs.h usncs.h */
/* sincos.tbl asincos.tbl powtwo.tbl root.tbl */
/* */
/******************************************************************/
@ -31,7 +30,6 @@
#include "asincos.tbl"
#include "root.tbl"
#include "powtwo.tbl"
#include "MathLib.h"
#include "uasncs.h"
#include <float.h>
#include <math.h>
@ -43,15 +41,11 @@
# define SECTION
#endif
void __doasin(double x, double dx, double w[]);
void __dubsin(double x, double dx, double v[]);
void __dubcos(double x, double dx, double v[]);
void __docos(double x, double dx, double v[]);
/* asin with max ULP of ~0.516 based on random sampling. */
double
SECTION
__ieee754_asin(double x){
double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2];
double x2,xx,res1,p,t,res,r,cor,cc,y,c,z;
mynumber u,v;
int4 k,m,n;
@ -70,27 +64,8 @@ __ieee754_asin(double x){
x2 = x*x;
t = (((((f6*x2 + f5)*x2 + f4)*x2 + f3)*x2 + f2)*x2 + f1)*(x2*x);
res = x+t; /* res=arcsin(x) according to Taylor series */
cor = (x-res)+t;
if (res == res+1.025*cor) return res;
else {
x1 = x+big;
xx = x*x;
x1 -= big;
x2 = x - x1;
p = x1*x1*x1;
s1 = a1.x*p;
s2 = ((((((c7*xx + c6)*xx + c5)*xx + c4)*xx + c3)*xx + c2)*xx*xx*x +
((a1.x+a2.x)*x2*x2+ 0.5*x1*x)*x2) + a2.x*p;
res1 = x+s1;
s2 = ((x-res1)+s1)+s2;
res = res1+s2;
cor = (res1-res)+s2;
if (res == res+1.00014*cor) return res;
else {
__doasin(x,0,w);
return w[0];
}
}
/* Max ULP is 0.513. */
return res;
}
/*---------------------0.125 <= |x| < 0.5 -----------------------------*/
else if (k < 0x3fe00000) {
@ -103,26 +78,8 @@ __ieee754_asin(double x){
+xx*asncs.x[n+6]))))+asncs.x[n+7];
t+=p;
res =asncs.x[n+8] +t;
cor = (asncs.x[n+8]-res)+t;
if (res == res+1.05*cor) return (m>0)?res:-res;
else {
r=asncs.x[n+8]+xx*asncs.x[n+9];
t=((asncs.x[n+8]-r)+xx*asncs.x[n+9])+(p+xx*asncs.x[n+10]);
res = r+t;
cor = (r-res)+t;
if (res == res+1.0005*cor) return (m>0)?res:-res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__dubsin(res,z,w);
z=(w[0]-fabs(x))+w[1];
if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
else {
/* Max ULP is 0.524. */
return (m>0)?res:-res;
}
}
}
} /* else if (k < 0x3fe00000) */
/*-------------------- 0.5 <= |x| < 0.75 -----------------------------*/
else
@ -135,26 +92,8 @@ __ieee754_asin(double x){
+xx*(asncs.x[n+6]+xx*asncs.x[n+7])))))+asncs.x[n+8];
t+=p;
res =asncs.x[n+9] +t;
cor = (asncs.x[n+9]-res)+t;
if (res == res+1.01*cor) return (m>0)?res:-res;
else {
r=asncs.x[n+9]+xx*asncs.x[n+10];
t=((asncs.x[n+9]-r)+xx*asncs.x[n+10])+(p+xx*asncs.x[n+11]);
res = r+t;
cor = (r-res)+t;
if (res == res+1.0005*cor) return (m>0)?res:-res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__dubsin(res,z,w);
z=(w[0]-fabs(x))+w[1];
if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
else {
/* Max ULP is 0.505. */
return (m>0)?res:-res;
}
}
}
} /* else if (k < 0x3fe80000) */
/*--------------------- 0.75 <= |x|< 0.921875 ----------------------*/
else
@ -167,28 +106,8 @@ __ieee754_asin(double x){
+xx*(asncs.x[n+6]+xx*(asncs.x[n+7]+xx*asncs.x[n+8]))))))+asncs.x[n+9];
t+=p;
res =asncs.x[n+10] +t;
cor = (asncs.x[n+10]-res)+t;
if (res == res+1.01*cor) return (m>0)?res:-res;
else {
r=asncs.x[n+10]+xx*asncs.x[n+11];
t=((asncs.x[n+10]-r)+xx*asncs.x[n+11])+(p+xx*asncs.x[n+12]);
res = r+t;
cor = (r-res)+t;
if (res == res+1.0008*cor) return (m>0)?res:-res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
y=hp0.x-res;
z=((hp0.x-y)-res)+(hp1.x-z);
__dubcos(y,z,w);
z=(w[0]-fabs(x))+w[1];
if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
else {
/* Max ULP is 0.505. */
return (m>0)?res:-res;
}
}
}
} /* else if (k < 0x3fed8000) */
/*-------------------0.921875 <= |x| < 0.953125 ------------------------*/
else
@ -203,29 +122,8 @@ __ieee754_asin(double x){
xx*asncs.x[n+9])))))))+asncs.x[n+10];
t+=p;
res =asncs.x[n+11] +t;
cor = (asncs.x[n+11]-res)+t;
if (res == res+1.01*cor) return (m>0)?res:-res;
else {
r=asncs.x[n+11]+xx*asncs.x[n+12];
t=((asncs.x[n+11]-r)+xx*asncs.x[n+12])+(p+xx*asncs.x[n+13]);
res = r+t;
cor = (r-res)+t;
if (res == res+1.0007*cor) return (m>0)?res:-res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
y=(hp0.x-res)-z;
z=y+hp1.x;
y=(y-z)+hp1.x;
__dubcos(z,y,w);
z=(w[0]-fabs(x))+w[1];
if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
else {
/* Max ULP is 0.505. */
return (m>0)?res:-res;
}
}
}
} /* else if (k < 0x3fee8000) */
/*--------------------0.953125 <= |x| < 0.96875 ------------------------*/
@ -241,29 +139,8 @@ __ieee754_asin(double x){
xx*(asncs.x[n+9]+xx*asncs.x[n+10]))))))))+asncs.x[n+11];
t+=p;
res =asncs.x[n+12] +t;
cor = (asncs.x[n+12]-res)+t;
if (res == res+1.01*cor) return (m>0)?res:-res;
else {
r=asncs.x[n+12]+xx*asncs.x[n+13];
t=((asncs.x[n+12]-r)+xx*asncs.x[n+13])+(p+xx*asncs.x[n+14]);
res = r+t;
cor = (r-res)+t;
if (res == res+1.0007*cor) return (m>0)?res:-res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
y=(hp0.x-res)-z;
z=y+hp1.x;
y=(y-z)+hp1.x;
__dubcos(z,y,w);
z=(w[0]-fabs(x))+w[1];
if (z>1.0e-27) return (m>0)?min(res,res1):-min(res,res1);
else if (z<-1.0e-27) return (m>0)?max(res,res1):-max(res,res1);
else {
/* Max ULP is 0.505. */
return (m>0)?res:-res;
}
}
}
} /* else if (k < 0x3fef0000) */
/*--------------------0.96875 <= |x| < 1 --------------------------------*/
else
@ -282,16 +159,8 @@ __ieee754_asin(double x){
cor = (hp1.x - 2.0*cc)-2.0*(y+cc)*p;
res1 = hp0.x - 2.0*y;
res =res1 + cor;
if (res == res+1.003*((res1-res)+cor)) return (m>0)?res:-res;
else {
c=y+cc;
cc=(y-c)+cc;
__doasin(c,cc,w);
res1=hp0.x-2.0*w[0];
cor=((hp0.x-res1)-2.0*w[0])+(hp1.x-2.0*w[1]);
res = res1+cor;
/* Max ULP is 0.5015. */
return (m>0)?res:-res;
}
} /* else if (k < 0x3ff00000) */
/*---------------------------- |x|>=1 -------------------------------*/
else if (k==0x3ff00000 && u.i[LOW_HALF]==0) return (m>0)?hp0.x:-hp0.x;
@ -315,11 +184,12 @@ libm_alias_finite (__ieee754_asin, __asin)
/* */
/*******************************************************************/
/* acos with max ULP of ~0.523 based on random sampling. */
double
SECTION
__ieee754_acos(double x)
{
double x1,x2,xx,s1,s2,res1,p,t,res,r,cor,cc,y,c,z,w[2],eps;
double x2,xx,res1,p,t,res,r,cor,cc,y,c,z;
mynumber u,v;
int4 k,m,n;
u.x = x;
@ -336,32 +206,8 @@ __ieee754_acos(double x)
r=hp0.x-x;
cor=(((hp0.x-r)-x)+hp1.x)-t;
res = r+cor;
cor = (r-res)+cor;
if (res == res+1.004*cor) return res;
else {
x1 = x+big;
xx = x*x;
x1 -= big;
x2 = x - x1;
p = x1*x1*x1;
s1 = a1.x*p;
s2 = ((((((c7*xx + c6)*xx + c5)*xx + c4)*xx + c3)*xx + c2)*xx*xx*x +
((a1.x+a2.x)*x2*x2+ 0.5*x1*x)*x2) + a2.x*p;
res1 = x+s1;
s2 = ((x-res1)+s1)+s2;
r=hp0.x-res1;
cor=(((hp0.x-r)-res1)+hp1.x)-s2;
res = r+cor;
cor = (r-res)+cor;
if (res == res+1.00004*cor) return res;
else {
__doasin(x,0,w);
r=hp0.x-w[0];
cor=((hp0.x-r)-w[0])+(hp1.x-w[1]);
res=r+cor;
/* Max ULP is 0.502. */
return res;
}
}
} /* else if (k < 0x3fc00000) */
/*---------------------- 0.125 <= |x| < 0.5 --------------------*/
else
@ -377,35 +223,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+8]):(hp0.x+asncs.x[n+8]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
if (res == res+1.02*((y-res)+t)) return res;
else {
r=asncs.x[n+8]+xx*asncs.x[n+9];
t=((asncs.x[n+8]-r)+xx*asncs.x[n+9])+(p+xx*asncs.x[n+10]);
if (m>0)
{p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; }
else
{p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); }
res = p+t;
cor = (p-res)+t;
if (res == (res+1.0002*cor)) return res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__docos(res,z,w);
z=(w[0]-x)+w[1];
if (z>1.0e-27) return max(res,res1);
else if (z<-1.0e-27) return min(res,res1);
else return res;
}
}
/* Max ULP is 0.51. */
return res;
} /* else if (k < 0x3fe00000) */
/*--------------------------- 0.5 <= |x| < 0.75 ---------------------*/
else
if (k < 0x3fe80000) {
n = 1056+((k&0x000fe000)>>11)*3;
if (m>0) {xx = x - asncs.x[n]; eps=1.04; }
else {xx = -x - asncs.x[n]; eps=1.02; }
if (m>0) {xx = x - asncs.x[n]; }
else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]+
@ -414,33 +241,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+9]):(hp0.x+asncs.x[n+9]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
if (res == res+eps*((y-res)+t)) return res;
else {
r=asncs.x[n+9]+xx*asncs.x[n+10];
t=((asncs.x[n+9]-r)+xx*asncs.x[n+10])+(p+xx*asncs.x[n+11]);
if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0004; }
else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0002; }
res = p+t;
cor = (p-res)+t;
if (res == (res+eps*cor)) return res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__docos(res,z,w);
z=(w[0]-x)+w[1];
if (z>1.0e-27) return max(res,res1);
else if (z<-1.0e-27) return min(res,res1);
else return res;
}
}
/* Max ULP is 0.523 based on random sampling. */
return res;
} /* else if (k < 0x3fe80000) */
/*------------------------- 0.75 <= |x| < 0.921875 -------------*/
else
if (k < 0x3fed8000) {
n = 992+((k&0x000fe000)>>13)*13;
if (m>0) {xx = x - asncs.x[n]; eps = 1.04; }
else {xx = -x - asncs.x[n]; eps = 1.01; }
if (m>0) {xx = x - asncs.x[n]; }
else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]+xx*(asncs.x[n+7]+
@ -449,33 +259,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+10]):(hp0.x+asncs.x[n+10]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
if (res == res+eps*((y-res)+t)) return res;
else {
r=asncs.x[n+10]+xx*asncs.x[n+11];
t=((asncs.x[n+10]-r)+xx*asncs.x[n+11])+(p+xx*asncs.x[n+12]);
if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0032; }
else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0008; }
res = p+t;
cor = (p-res)+t;
if (res == (res+eps*cor)) return res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__docos(res,z,w);
z=(w[0]-x)+w[1];
if (z>1.0e-27) return max(res,res1);
else if (z<-1.0e-27) return min(res,res1);
else return res;
}
}
/* Max ULP is 0.523 based on random sampling. */
return res;
} /* else if (k < 0x3fed8000) */
/*-------------------0.921875 <= |x| < 0.953125 ------------------*/
else
if (k < 0x3fee8000) {
n = 884+((k&0x000fe000)>>13)*14;
if (m>0) {xx = x - asncs.x[n]; eps=1.04; }
else {xx = -x - asncs.x[n]; eps =1.005; }
if (m>0) {xx = x - asncs.x[n]; }
else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]
@ -485,33 +278,16 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+11]):(hp0.x+asncs.x[n+11]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
if (res == res+eps*((y-res)+t)) return res;
else {
r=asncs.x[n+11]+xx*asncs.x[n+12];
t=((asncs.x[n+11]-r)+xx*asncs.x[n+12])+(p+xx*asncs.x[n+13]);
if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0030; }
else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0005; }
res = p+t;
cor = (p-res)+t;
if (res == (res+eps*cor)) return res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__docos(res,z,w);
z=(w[0]-x)+w[1];
if (z>1.0e-27) return max(res,res1);
else if (z<-1.0e-27) return min(res,res1);
else return res;
}
}
/* Max ULP is 0.523 based on random sampling. */
return res;
} /* else if (k < 0x3fee8000) */
/*--------------------0.953125 <= |x| < 0.96875 ----------------*/
else
if (k < 0x3fef0000) {
n = 768+((k&0x000fe000)>>13)*15;
if (m>0) {xx = x - asncs.x[n]; eps=1.04; }
else {xx = -x - asncs.x[n]; eps=1.005;}
if (m>0) {xx = x - asncs.x[n]; }
else {xx = -x - asncs.x[n]; }
t = asncs.x[n+1]*xx;
p=xx*xx*(asncs.x[n+2]+xx*(asncs.x[n+3]+xx*(asncs.x[n+4]+
xx*(asncs.x[n+5]+xx*(asncs.x[n+6]
@ -521,25 +297,8 @@ __ieee754_acos(double x)
y = (m>0)?(hp0.x-asncs.x[n+12]):(hp0.x+asncs.x[n+12]);
t = (m>0)?(hp1.x-t):(hp1.x+t);
res = y+t;
if (res == res+eps*((y-res)+t)) return res;
else {
r=asncs.x[n+12]+xx*asncs.x[n+13];
t=((asncs.x[n+12]-r)+xx*asncs.x[n+13])+(p+xx*asncs.x[n+14]);
if (m>0) {p = hp0.x-r; t = (((hp0.x-p)-r)-t)+hp1.x; eps=1.0030; }
else {p = hp0.x+r; t = ((hp0.x-p)+r)+(hp1.x+t); eps=1.0005; }
res = p+t;
cor = (p-res)+t;
if (res == (res+eps*cor)) return res;
else {
res1=res+1.1*cor;
z=0.5*(res1-res);
__docos(res,z,w);
z=(w[0]-x)+w[1];
if (z>1.0e-27) return max(res,res1);
else if (z<-1.0e-27) return min(res,res1);
else return res;
}
}
/* Max ULP is 0.523 based on random sampling. */
return res;
} /* else if (k < 0x3fef0000) */
/*-----------------0.96875 <= |x| < 1 ---------------------------*/
@ -560,29 +319,15 @@ __ieee754_acos(double x)
cor = (hp1.x - cc)-(y+cc)*p;
res1 = hp0.x - y;
res =res1 + cor;
if (res == res+1.002*((res1-res)+cor)) return (res+res);
else {
c=y+cc;
cc=(y-c)+cc;
__doasin(c,cc,w);
res1=hp0.x-w[0];
cor=((hp0.x-res1)-w[0])+(hp1.x-w[1]);
res = res1+cor;
/* Max ULP is 0.501. */
return (res+res);
}
}
else {
cor = cc+p*(y+cc);
res = y + cor;
if (res == res+1.03*((y-res)+cor)) return (res+res);
else {
c=y+cc;
cc=(y-c)+cc;
__doasin(c,cc,w);
res = w[0];
/* Max ULP is 0.515. */
return (res+res);
}
}
} /* else if (k < 0x3ff00000) */
/*---------------------------- |x|>=1 -----------------------*/

View File

@ -68,7 +68,7 @@ float128: 1
ldouble: 1
Function: "asin_upward":
double: 1
double: 2
float: 1
float128: 2
ldouble: 1
@ -98,6 +98,7 @@ float128: 4
ldouble: 5
Function: "atan":
double: 1
float: 1
float128: 1
ldouble: 1
@ -673,6 +674,7 @@ float128: 2
ldouble: 3
Function: Imaginary part of "clog":
double: 1
float: 1
float128: 1
ldouble: 1