mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-08 22:30:07 +00:00
Optimize accurate 64-bit routines for FMA4 on x86-64
This commit is contained in:
parent
58985aa92f
commit
af968f62f2
57
ChangeLog
57
ChangeLog
@ -1,5 +1,62 @@
|
|||||||
2011-10-24 Ulrich Drepper <drepper@gmail.com>
|
2011-10-24 Ulrich Drepper <drepper@gmail.com>
|
||||||
|
|
||||||
|
* config.make.in: Add have-mfma4 entry.
|
||||||
|
* configure.in: Substitute libc_cv_cc_fma4.
|
||||||
|
* math/Makefile (dbl-only-routines): Add sincostab.
|
||||||
|
* sysdeps/ieee754/dbl-64/dosincos.c: Don't include sincos.tbl.
|
||||||
|
Use __sincostab not sincos.
|
||||||
|
* sysdeps/ieee754/dbl-64/e_asin.c: Don't define aliases when function
|
||||||
|
name is a macro.
|
||||||
|
* sysdeps/ieee754/dbl-64/e_exp.c: Likewise.
|
||||||
|
* sysdeps/ieee754/dbl-64/e_log.c: Likewise.
|
||||||
|
* sysdeps/ieee754/dbl-64/e_pow.c: Likewise.
|
||||||
|
* sysdeps/ieee754/dbl-64/e_atan2.c: Likewise. Define singArctan2
|
||||||
|
using __copysign.
|
||||||
|
* sysdeps/ieee754/dbl-64/mpa.c: Don't export __acr. Don't define
|
||||||
|
__cr and __cpymn. Define __cpy unless NO___CPY is defined. Define
|
||||||
|
norm, denorm, and __mp_dbl unless NO___MP_DBL is defined.
|
||||||
|
* sysdeps/ieee754/dbl-64/mpa.h: Don't declare __acr, __cr, __cpymn,
|
||||||
|
and __inv.
|
||||||
|
* sysdeps/ieee754/dbl-64/mpsqrt.c: Make fastiroot static.
|
||||||
|
* sysdeps/ieee754/dbl-64/s_atan.c: Define __signArctan using
|
||||||
|
__copysign.
|
||||||
|
* sysdeps/ieee754/dbl-64/s_sin.c: Use __sincostab not sincos. Don't
|
||||||
|
define aliases when function name is a macro.
|
||||||
|
* sysdeps/ieee754/dbl-64/sincostab.c: Renamed from
|
||||||
|
sysdeps/ieee754/dbl-64/sincos.tbl.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/Makefile: Add entries to build
|
||||||
|
fma4-enabled routines.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/brandred-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/doasin-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_asin.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_exp.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_log-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_log.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_pow.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mpa-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mplog-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/mptan-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_atan.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_sin.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_tan.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: New file.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: New file.
|
||||||
|
|
||||||
* sysdeps/ieee754/dbl-64/doasin.c: Adjust for DLA_FMA -> DLA_FMS
|
* sysdeps/ieee754/dbl-64/doasin.c: Adjust for DLA_FMA -> DLA_FMS
|
||||||
rename.
|
rename.
|
||||||
* sysdeps/ieee754/dbl-64/dosincos.c: Likewise.
|
* sysdeps/ieee754/dbl-64/dosincos.c: Likewise.
|
||||||
|
@ -59,6 +59,7 @@ have-cpp-asm-debuginfo = @libc_cv_cpp_asm_debuginfo@
|
|||||||
enable-check-abi = @enable_check_abi@
|
enable-check-abi = @enable_check_abi@
|
||||||
have-forced-unwind = @libc_cv_forced_unwind@
|
have-forced-unwind = @libc_cv_forced_unwind@
|
||||||
have-fpie = @libc_cv_fpie@
|
have-fpie = @libc_cv_fpie@
|
||||||
|
have-mfma4 = @libc_cv_cc_fma4@
|
||||||
gnu89-inline-CFLAGS = @gnu89_inline@
|
gnu89-inline-CFLAGS = @gnu89_inline@
|
||||||
have-ssp = @libc_cv_ssp@
|
have-ssp = @libc_cv_ssp@
|
||||||
have-selinux = @have_selinux@
|
have-selinux = @have_selinux@
|
||||||
|
2
configure
vendored
2
configure
vendored
@ -623,6 +623,7 @@ elf
|
|||||||
ldd_rewrite_script
|
ldd_rewrite_script
|
||||||
use_ldconfig
|
use_ldconfig
|
||||||
libc_cv_as_i686
|
libc_cv_as_i686
|
||||||
|
libc_cv_cc_fma4
|
||||||
libc_cv_cc_novzeroupper
|
libc_cv_cc_novzeroupper
|
||||||
libc_cv_cc_avx
|
libc_cv_cc_avx
|
||||||
libc_cv_cc_sse4
|
libc_cv_cc_sse4
|
||||||
@ -7944,6 +7945,7 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if test $elf = yes; then
|
if test $elf = yes; then
|
||||||
$as_echo "#define HAVE_ELF 1" >>confdefs.h
|
$as_echo "#define HAVE_ELF 1" >>confdefs.h
|
||||||
|
|
||||||
|
@ -2339,6 +2339,7 @@ AC_SUBST(libc_cv_cpp_asm_debuginfo)
|
|||||||
AC_SUBST(libc_cv_cc_sse4)
|
AC_SUBST(libc_cv_cc_sse4)
|
||||||
AC_SUBST(libc_cv_cc_avx)
|
AC_SUBST(libc_cv_cc_avx)
|
||||||
AC_SUBST(libc_cv_cc_novzeroupper)
|
AC_SUBST(libc_cv_cc_novzeroupper)
|
||||||
|
AC_SUBST(libc_cv_cc_fma4)
|
||||||
AC_SUBST(libc_cv_as_i686)
|
AC_SUBST(libc_cv_as_i686)
|
||||||
|
|
||||||
AC_SUBST(use_ldconfig)
|
AC_SUBST(use_ldconfig)
|
||||||
|
@ -66,7 +66,7 @@ include ../Makeconfig
|
|||||||
|
|
||||||
dbl-only-routines := branred doasin dosincos halfulp mpa mpatan2 \
|
dbl-only-routines := branred doasin dosincos halfulp mpa mpatan2 \
|
||||||
mpatan mpexp mplog mpsqrt mptan sincos32 slowexp \
|
mpatan mpexp mplog mpsqrt mptan sincos32 slowexp \
|
||||||
slowpow
|
slowpow sincostab
|
||||||
libm-routines = $(strip $(libm-support) $(libm-calls) \
|
libm-routines = $(strip $(libm-support) $(libm-calls) \
|
||||||
$(patsubst %_rf,%f_r,$(libm-calls:=f)) \
|
$(patsubst %_rf,%f_r,$(libm-calls:=f)) \
|
||||||
$(long-m-$(long-double-fcts))) \
|
$(long-m-$(long-double-fcts))) \
|
||||||
|
@ -35,11 +35,16 @@
|
|||||||
|
|
||||||
#include "endian.h"
|
#include "endian.h"
|
||||||
#include "mydefs.h"
|
#include "mydefs.h"
|
||||||
#include "sincos.tbl"
|
|
||||||
#include <dla.h>
|
#include <dla.h>
|
||||||
#include "dosincos.h"
|
#include "dosincos.h"
|
||||||
#include "math_private.h"
|
#include "math_private.h"
|
||||||
|
|
||||||
|
extern const union
|
||||||
|
{
|
||||||
|
int4 i[880];
|
||||||
|
double x[440];
|
||||||
|
} __sincostab attribute_hidden;
|
||||||
|
|
||||||
/***********************************************************************/
|
/***********************************************************************/
|
||||||
/* Routine receive Double-Length number (x+dx) and computing sin(x+dx) */
|
/* Routine receive Double-Length number (x+dx) and computing sin(x+dx) */
|
||||||
/* as Double-Length number and store it at array v .It computes it by */
|
/* as Double-Length number and store it at array v .It computes it by */
|
||||||
@ -66,10 +71,10 @@ void __dubsin(double x, double dx, double v[]) {
|
|||||||
dd=(x-d)+dx;
|
dd=(x-d)+dx;
|
||||||
/* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
|
/* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
|
||||||
MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
|
MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
|
||||||
sn=sincos.x[k]; /* */
|
sn=__sincostab.x[k]; /* */
|
||||||
ssn=sincos.x[k+1]; /* sin(Xi) and cos(Xi) */
|
ssn=__sincostab.x[k+1]; /* sin(Xi) and cos(Xi) */
|
||||||
cs=sincos.x[k+2]; /* */
|
cs=__sincostab.x[k+2]; /* */
|
||||||
ccs=sincos.x[k+3]; /* */
|
ccs=__sincostab.x[k+3]; /* */
|
||||||
MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* Taylor */
|
MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* Taylor */
|
||||||
ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
|
ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
|
||||||
MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* series */
|
MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* series */
|
||||||
@ -118,10 +123,10 @@ void __dubcos(double x, double dx, double v[]) {
|
|||||||
d=x+dx;
|
d=x+dx;
|
||||||
dd=(x-d)+dx; /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
|
dd=(x-d)+dx; /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
|
||||||
MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
|
MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
|
||||||
sn=sincos.x[k]; /* */
|
sn=__sincostab.x[k]; /* */
|
||||||
ssn=sincos.x[k+1]; /* sin(Xi) and cos(Xi) */
|
ssn=__sincostab.x[k+1]; /* sin(Xi) and cos(Xi) */
|
||||||
cs=sincos.x[k+2]; /* */
|
cs=__sincostab.x[k+2]; /* */
|
||||||
ccs=sincos.x[k+3]; /* */
|
ccs=__sincostab.x[k+3]; /* */
|
||||||
MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
|
MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
|
||||||
ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
|
ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
|
||||||
MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
|
MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
|
||||||
|
@ -324,7 +324,9 @@ double __ieee754_asin(double x){
|
|||||||
return u.x/v.x; /* NaN */
|
return u.x/v.x; /* NaN */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifndef __ieee754_asin
|
||||||
strong_alias (__ieee754_asin, __asin_finite)
|
strong_alias (__ieee754_asin, __asin_finite)
|
||||||
|
#endif
|
||||||
|
|
||||||
/*******************************************************************/
|
/*******************************************************************/
|
||||||
/* */
|
/* */
|
||||||
@ -636,4 +638,6 @@ double __ieee754_acos(double x)
|
|||||||
return u.x/v.x;
|
return u.x/v.x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifndef __ieee754_acos
|
||||||
strong_alias (__ieee754_acos, __acos_finite)
|
strong_alias (__ieee754_acos, __acos_finite)
|
||||||
|
#endif
|
||||||
|
@ -51,7 +51,11 @@
|
|||||||
/* round to nearest mode of IEEE 754 standard. */
|
/* round to nearest mode of IEEE 754 standard. */
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
static double atan2Mp(double ,double ,const int[]);
|
static double atan2Mp(double ,double ,const int[]);
|
||||||
static double signArctan2(double ,double);
|
/* Fix the sign and return after stage 1 or stage 2 */
|
||||||
|
static double signArctan2(double y,double z)
|
||||||
|
{
|
||||||
|
return __copysign(z, y);
|
||||||
|
}
|
||||||
static double normalized(double ,double,double ,double);
|
static double normalized(double ,double,double ,double);
|
||||||
void __mpatan2(mp_no *,mp_no *,mp_no *,int);
|
void __mpatan2(mp_no *,mp_no *,mp_no *,int);
|
||||||
|
|
||||||
@ -375,7 +379,9 @@ double __ieee754_atan2(double y,double x) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifndef __ieee754_atan2
|
||||||
strong_alias (__ieee754_atan2, __atan2_finite)
|
strong_alias (__ieee754_atan2, __atan2_finite)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Treat the Denormalized case */
|
/* Treat the Denormalized case */
|
||||||
static double normalized(double ax,double ay,double y, double z)
|
static double normalized(double ax,double ay,double y, double z)
|
||||||
@ -386,11 +392,6 @@ static double normalized(double ax,double ay,double y, double z)
|
|||||||
__dbl_mp(ue.d,&mpt1,p); __mul(&mpz,&mpt1,&mperr,p);
|
__dbl_mp(ue.d,&mpt1,p); __mul(&mpz,&mpt1,&mperr,p);
|
||||||
__sub(&mpz,&mperr,&mpz2,p); __mp_dbl(&mpz2,&z,p);
|
__sub(&mpz,&mperr,&mpz2,p); __mp_dbl(&mpz2,&z,p);
|
||||||
return signArctan2(y,z);
|
return signArctan2(y,z);
|
||||||
}
|
|
||||||
/* Fix the sign and return after stage 1 or stage 2 */
|
|
||||||
static double signArctan2(double y,double z)
|
|
||||||
{
|
|
||||||
return ((y<ZERO) ? -z : z);
|
|
||||||
}
|
}
|
||||||
/* Stage 3: Perform a multi-Precision computation */
|
/* Stage 3: Perform a multi-Precision computation */
|
||||||
static double atan2Mp(double x,double y,const int pr[])
|
static double atan2Mp(double x,double y,const int pr[])
|
||||||
|
@ -145,7 +145,9 @@ double __ieee754_exp(double x) {
|
|||||||
else return __slowexp(x);
|
else return __slowexp(x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifndef __ieee754_exp
|
||||||
strong_alias (__ieee754_exp, __exp_finite)
|
strong_alias (__ieee754_exp, __exp_finite)
|
||||||
|
#endif
|
||||||
|
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
/* Compute e^(x+xx)(Double-Length number) .The routine also receive */
|
/* Compute e^(x+xx)(Double-Length number) .The routine also receive */
|
||||||
|
@ -207,4 +207,6 @@ double __ieee754_log(double x) {
|
|||||||
}
|
}
|
||||||
return y1;
|
return y1;
|
||||||
}
|
}
|
||||||
|
#ifndef __ieee754_log
|
||||||
strong_alias (__ieee754_log, __log_finite)
|
strong_alias (__ieee754_log, __log_finite)
|
||||||
|
#endif
|
||||||
|
@ -153,7 +153,9 @@ double __ieee754_pow(double x, double y) {
|
|||||||
if (y<0) return (x<1.0)?INF.x:0;
|
if (y<0) return (x<1.0)?INF.x:0;
|
||||||
return 0; /* unreachable, to make the compiler happy */
|
return 0; /* unreachable, to make the compiler happy */
|
||||||
}
|
}
|
||||||
|
#ifndef __ieee754_pow
|
||||||
strong_alias (__ieee754_pow, __pow_finite)
|
strong_alias (__ieee754_pow, __pow_finite)
|
||||||
|
#endif
|
||||||
|
|
||||||
/**************************************************************************/
|
/**************************************************************************/
|
||||||
/* Computing x^y using more accurate but more slow log routine */
|
/* Computing x^y using more accurate but more slow log routine */
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* IBM Accurate Mathematical Library
|
* IBM Accurate Mathematical Library
|
||||||
* written by International Business Machines Corp.
|
* written by International Business Machines Corp.
|
||||||
* Copyright (C) 2001 Free Software Foundation
|
* Copyright (C) 2001, 2011 Free Software Foundation
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Lesser General Public License as published by
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
@ -64,7 +63,7 @@ static int mcr(const mp_no *x, const mp_no *y, int p) {
|
|||||||
|
|
||||||
|
|
||||||
/* acr() compares the absolute values of two multiple precision numbers */
|
/* acr() compares the absolute values of two multiple precision numbers */
|
||||||
int __acr(const mp_no *x, const mp_no *y, int p) {
|
static int __acr(const mp_no *x, const mp_no *y, int p) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (X[0] == ZERO) {
|
if (X[0] == ZERO) {
|
||||||
@ -82,8 +81,9 @@ int __acr(const mp_no *x, const mp_no *y, int p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
/* cr90 compares the values of two multiple precision numbers */
|
/* cr90 compares the values of two multiple precision numbers */
|
||||||
int __cr(const mp_no *x, const mp_no *y, int p) {
|
static int __cr(const mp_no *x, const mp_no *y, int p) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (X[0] > Y[0]) i= 1;
|
if (X[0] > Y[0]) i= 1;
|
||||||
@ -93,26 +93,26 @@ int __cr(const mp_no *x, const mp_no *y, int p) {
|
|||||||
|
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NO___CPY
|
||||||
/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
|
/* Copy a multiple precision number. Set *y=*x. x=y is permissible. */
|
||||||
void __cpy(const mp_no *x, mp_no *y, int p) {
|
void __cpy(const mp_no *x, mp_no *y, int p) {
|
||||||
int i;
|
|
||||||
|
|
||||||
EY = EX;
|
EY = EX;
|
||||||
for (i=0; i <= p; i++) Y[i] = X[i];
|
for (int i=0; i <= p; i++) Y[i] = X[i];
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
/* Copy a multiple precision number x of precision m into a */
|
/* Copy a multiple precision number x of precision m into a */
|
||||||
/* multiple precision number y of precision n. In case n>m, */
|
/* multiple precision number y of precision n. In case n>m, */
|
||||||
/* the digits of y beyond the m'th are set to zero. In case */
|
/* the digits of y beyond the m'th are set to zero. In case */
|
||||||
/* n<m, the digits of x beyond the n'th are ignored. */
|
/* n<m, the digits of x beyond the n'th are ignored. */
|
||||||
/* x=y is permissible. */
|
/* x=y is permissible. */
|
||||||
|
|
||||||
void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
|
static void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
|
||||||
|
|
||||||
int i,k;
|
int i,k;
|
||||||
|
|
||||||
@ -122,7 +122,10 @@ void __cpymn(const mp_no *x, int m, mp_no *y, int n) {
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NO___MP_DBL
|
||||||
/* Convert a multiple precision number *x into a double precision */
|
/* Convert a multiple precision number *x into a double precision */
|
||||||
/* number *y, normalized case (|x| >= 2**(-1022))) */
|
/* number *y, normalized case (|x| >= 2**(-1022))) */
|
||||||
static void norm(const mp_no *x, double *y, int p)
|
static void norm(const mp_no *x, double *y, int p)
|
||||||
@ -242,6 +245,7 @@ void __mp_dbl(const mp_no *x, double *y, int p) {
|
|||||||
else if (EX==-42 && X[1]>=TWO10) norm(x,y,p);
|
else if (EX==-42 && X[1]>=TWO10) norm(x,y,p);
|
||||||
else denorm(x,y,p);
|
else denorm(x,y,p);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* dbl_mp() converts a double precision number x into a multiple precision */
|
/* dbl_mp() converts a double precision number x into a multiple precision */
|
||||||
@ -466,7 +470,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
|
|||||||
/* 2.001*r**(1-p) for p>3. */
|
/* 2.001*r**(1-p) for p>3. */
|
||||||
/* *x=0 is not permissible. *x is left unchanged. */
|
/* *x=0 is not permissible. *x is left unchanged. */
|
||||||
|
|
||||||
void __inv(const mp_no *x, mp_no *y, int p) {
|
static void __inv(const mp_no *x, mp_no *y, int p) {
|
||||||
int i;
|
int i;
|
||||||
#if 0
|
#if 0
|
||||||
int l;
|
int l;
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* IBM Accurate Mathematical Library
|
* IBM Accurate Mathematical Library
|
||||||
* Written by International Business Machines Corp.
|
* Written by International Business Machines Corp.
|
||||||
* Copyright (C) 2001 Free Software Foundation, Inc.
|
* Copyright (C) 2001, 2011 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Lesser General Public License as published by
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
@ -65,16 +64,16 @@ typedef union { int i[2]; double d; } number;
|
|||||||
|
|
||||||
#define ABS(x) ((x) < 0 ? -(x) : (x))
|
#define ABS(x) ((x) < 0 ? -(x) : (x))
|
||||||
|
|
||||||
int __acr(const mp_no *, const mp_no *, int);
|
// int __acr(const mp_no *, const mp_no *, int);
|
||||||
int __cr(const mp_no *, const mp_no *, int);
|
// int __cr(const mp_no *, const mp_no *, int);
|
||||||
void __cpy(const mp_no *, mp_no *, int);
|
void __cpy(const mp_no *, mp_no *, int);
|
||||||
void __cpymn(const mp_no *, int, mp_no *, int);
|
// void __cpymn(const mp_no *, int, mp_no *, int);
|
||||||
void __mp_dbl(const mp_no *, double *, int);
|
void __mp_dbl(const mp_no *, double *, int);
|
||||||
void __dbl_mp(double, mp_no *, int);
|
void __dbl_mp(double, mp_no *, int);
|
||||||
void __add(const mp_no *, const mp_no *, mp_no *, int);
|
void __add(const mp_no *, const mp_no *, mp_no *, int);
|
||||||
void __sub(const mp_no *, const mp_no *, mp_no *, int);
|
void __sub(const mp_no *, const mp_no *, mp_no *, int);
|
||||||
void __mul(const mp_no *, const mp_no *, mp_no *, int);
|
void __mul(const mp_no *, const mp_no *, mp_no *, int);
|
||||||
void __inv(const mp_no *, mp_no *, int);
|
// void __inv(const mp_no *, mp_no *, int);
|
||||||
void __dvd(const mp_no *, const mp_no *, mp_no *, int);
|
void __dvd(const mp_no *, const mp_no *, mp_no *, int);
|
||||||
|
|
||||||
extern void __mpatan (mp_no *, mp_no *, int);
|
extern void __mpatan (mp_no *, mp_no *, int);
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* IBM Accurate Mathematical Library
|
* IBM Accurate Mathematical Library
|
||||||
* written by International Business Machines Corp.
|
* written by International Business Machines Corp.
|
||||||
* Copyright (C) 2001 Free Software Foundation
|
* Copyright (C) 2001, 2011 Free Software Foundation
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Lesser General Public License as published by
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
@ -41,7 +40,7 @@
|
|||||||
/* p as integer. Routine computes sqrt(*x) and stores result in *y */
|
/* p as integer. Routine computes sqrt(*x) and stores result in *y */
|
||||||
/****************************************************************************/
|
/****************************************************************************/
|
||||||
|
|
||||||
double fastiroot(double);
|
static double fastiroot(double);
|
||||||
|
|
||||||
void __mpsqrt(mp_no *x, mp_no *y, int p) {
|
void __mpsqrt(mp_no *x, mp_no *y, int p) {
|
||||||
#include "mpsqrt.h"
|
#include "mpsqrt.h"
|
||||||
@ -82,7 +81,7 @@ void __mpsqrt(mp_no *x, mp_no *y, int p) {
|
|||||||
/* Compute a double precision approximation for 1/sqrt(x) */
|
/* Compute a double precision approximation for 1/sqrt(x) */
|
||||||
/* with the relative error bounded by 2**-51. */
|
/* with the relative error bounded by 2**-51. */
|
||||||
/***********************************************************/
|
/***********************************************************/
|
||||||
double fastiroot(double x) {
|
static double fastiroot(double x) {
|
||||||
union {int i[2]; double d;} p,q;
|
union {int i[2]; double d;} p,q;
|
||||||
double y,z, t;
|
double y,z, t;
|
||||||
int n;
|
int n;
|
||||||
|
@ -46,7 +46,13 @@
|
|||||||
|
|
||||||
void __mpatan(mp_no *,mp_no *,int); /* see definition in mpatan.c */
|
void __mpatan(mp_no *,mp_no *,int); /* see definition in mpatan.c */
|
||||||
static double atanMp(double,const int[]);
|
static double atanMp(double,const int[]);
|
||||||
double __signArctan(double,double);
|
|
||||||
|
/* Fix the sign of y and return */
|
||||||
|
static double __signArctan(double x,double y){
|
||||||
|
return __copysign(y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* An ultimate atan() routine. Given an IEEE double machine number x, */
|
/* An ultimate atan() routine. Given an IEEE double machine number x, */
|
||||||
/* routine computes the correctly rounded (to nearest) value of atan(x). */
|
/* routine computes the correctly rounded (to nearest) value of atan(x). */
|
||||||
double atan(double x) {
|
double atan(double x) {
|
||||||
@ -203,14 +209,6 @@ double atan(double x) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Fix the sign of y and return */
|
|
||||||
double __signArctan(double x,double y){
|
|
||||||
|
|
||||||
if (x<ZERO) return -y;
|
|
||||||
else return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Final stages. Compute atan(x) by multiple precision arithmetic */
|
/* Final stages. Compute atan(x) by multiple precision arithmetic */
|
||||||
static double atanMp(double x,const int pr[]){
|
static double atanMp(double x,const int pr[]){
|
||||||
mp_no mpx,mpy,mpy2,mperr,mpt1,mpy1;
|
mp_no mpx,mpy,mpy2,mperr,mpt1,mpy1;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IBM Accurate Mathematical Library
|
* IBM Accurate Mathematical Library
|
||||||
* written by International Business Machines Corp.
|
* written by International Business Machines Corp.
|
||||||
* Copyright (C) 2001, 2009 Free Software Foundation
|
* Copyright (C) 2001, 2009, 2011 Free Software Foundation
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Lesser General Public License as published by
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
@ -53,9 +53,14 @@
|
|||||||
#include "mydefs.h"
|
#include "mydefs.h"
|
||||||
#include "usncs.h"
|
#include "usncs.h"
|
||||||
#include "MathLib.h"
|
#include "MathLib.h"
|
||||||
#include "sincos.tbl"
|
|
||||||
#include "math_private.h"
|
#include "math_private.h"
|
||||||
|
|
||||||
|
extern const union
|
||||||
|
{
|
||||||
|
int4 i[880];
|
||||||
|
double x[440];
|
||||||
|
} __sincostab attribute_hidden;
|
||||||
|
|
||||||
static const double
|
static const double
|
||||||
sn3 = -1.66666666666664880952546298448555E-01,
|
sn3 = -1.66666666666664880952546298448555E-01,
|
||||||
sn5 = 8.33333214285722277379541354343671E-03,
|
sn5 = 8.33333214285722277379541354343671E-03,
|
||||||
@ -120,10 +125,10 @@ double __sin(double x){
|
|||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=(m>0)?sincos.x[k]:-sincos.x[k];
|
sn=(m>0)?__sincostab.x[k]:-__sincostab.x[k];
|
||||||
ssn=(m>0)?sincos.x[k+1]:-sincos.x[k+1];
|
ssn=(m>0)?__sincostab.x[k+1]:-__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ssn+s*ccs-sn*c)+cs*s;
|
cor=(ssn+s*ccs-sn*c)+cs*s;
|
||||||
res=sn+cor;
|
res=sn+cor;
|
||||||
cor=(sn-res)+cor;
|
cor=(sn-res)+cor;
|
||||||
@ -146,10 +151,10 @@ double __sin(double x){
|
|||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ccs-s*ssn-cs*c)-sn*s;
|
cor=(ccs-s*ssn-cs*c)-sn*s;
|
||||||
res=cs+cor;
|
res=cs+cor;
|
||||||
cor=(cs-res)+cor;
|
cor=(cs-res)+cor;
|
||||||
@ -192,10 +197,10 @@ double __sin(double x){
|
|||||||
s = y + (db+y*xx*(sn3 +xx*sn5));
|
s = y + (db+y*xx*(sn3 +xx*sn5));
|
||||||
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ssn+s*ccs-sn*c)+cs*s;
|
cor=(ssn+s*ccs-sn*c)+cs*s;
|
||||||
res=sn+cor;
|
res=sn+cor;
|
||||||
cor=(sn-res)+cor;
|
cor=(sn-res)+cor;
|
||||||
@ -212,10 +217,10 @@ double __sin(double x){
|
|||||||
y=a-(u.x-big.x)+da;
|
y=a-(u.x-big.x)+da;
|
||||||
xx=y*y;
|
xx=y*y;
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
cor=(ccs-s*ssn-cs*c)-sn*s;
|
cor=(ccs-s*ssn-cs*c)-sn*s;
|
||||||
@ -269,10 +274,10 @@ double __sin(double x){
|
|||||||
s = y + (db+y*xx*(sn3 +xx*sn5));
|
s = y + (db+y*xx*(sn3 +xx*sn5));
|
||||||
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ssn+s*ccs-sn*c)+cs*s;
|
cor=(ssn+s*ccs-sn*c)+cs*s;
|
||||||
res=sn+cor;
|
res=sn+cor;
|
||||||
cor=(sn-res)+cor;
|
cor=(sn-res)+cor;
|
||||||
@ -289,10 +294,10 @@ double __sin(double x){
|
|||||||
y=a-(u.x-big.x)+da;
|
y=a-(u.x-big.x)+da;
|
||||||
xx=y*y;
|
xx=y*y;
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
cor=(ccs-s*ssn-cs*c)-sn*s;
|
cor=(ccs-s*ssn-cs*c)-sn*s;
|
||||||
@ -364,10 +369,10 @@ double __cos(double x)
|
|||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ccs-s*ssn-cs*c)-sn*s;
|
cor=(ccs-s*ssn-cs*c)-sn*s;
|
||||||
res=cs+cor;
|
res=cs+cor;
|
||||||
cor=(cs-res)+cor;
|
cor=(cs-res)+cor;
|
||||||
@ -396,10 +401,10 @@ double __cos(double x)
|
|||||||
s = y + (db+y*xx*(sn3 +xx*sn5));
|
s = y + (db+y*xx*(sn3 +xx*sn5));
|
||||||
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ssn+s*ccs-sn*c)+cs*s;
|
cor=(ssn+s*ccs-sn*c)+cs*s;
|
||||||
res=sn+cor;
|
res=sn+cor;
|
||||||
cor=(sn-res)+cor;
|
cor=(sn-res)+cor;
|
||||||
@ -442,10 +447,10 @@ double __cos(double x)
|
|||||||
s = y + (db+y*xx*(sn3 +xx*sn5));
|
s = y + (db+y*xx*(sn3 +xx*sn5));
|
||||||
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ssn+s*ccs-sn*c)+cs*s;
|
cor=(ssn+s*ccs-sn*c)+cs*s;
|
||||||
res=sn+cor;
|
res=sn+cor;
|
||||||
cor=(sn-res)+cor;
|
cor=(sn-res)+cor;
|
||||||
@ -461,10 +466,10 @@ double __cos(double x)
|
|||||||
y=a-(u.x-big.x)+da;
|
y=a-(u.x-big.x)+da;
|
||||||
xx=y*y;
|
xx=y*y;
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
cor=(ccs-s*ssn-cs*c)-sn*s;
|
cor=(ccs-s*ssn-cs*c)-sn*s;
|
||||||
@ -517,10 +522,10 @@ double __cos(double x)
|
|||||||
s = y + (db+y*xx*(sn3 +xx*sn5));
|
s = y + (db+y*xx*(sn3 +xx*sn5));
|
||||||
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*db+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
cor=(ssn+s*ccs-sn*c)+cs*s;
|
cor=(ssn+s*ccs-sn*c)+cs*s;
|
||||||
res=sn+cor;
|
res=sn+cor;
|
||||||
cor=(sn-res)+cor;
|
cor=(sn-res)+cor;
|
||||||
@ -536,10 +541,10 @@ double __cos(double x)
|
|||||||
y=a-(u.x-big.x)+da;
|
y=a-(u.x-big.x)+da;
|
||||||
xx=y*y;
|
xx=y*y;
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
s = y + y*xx*(sn3 +xx*sn5);
|
s = y + y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
cor=(ccs-s*ssn-cs*c)-sn*s;
|
cor=(ccs-s*ssn-cs*c)-sn*s;
|
||||||
@ -611,7 +616,7 @@ static const double th2_36 = 206158430208.0; /* 1.5*2**37 */
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*******************************************************************************/
|
/*******************************************************************************/
|
||||||
/* Routine compute sin(x) for 0.25<|x|< 0.855469 by sincos.tbl and Taylor */
|
/* Routine compute sin(x) for 0.25<|x|< 0.855469 by __sincostab.tbl and Taylor */
|
||||||
/* and if result still doesn't accurate enough by mpsin or dubsin */
|
/* and if result still doesn't accurate enough by mpsin or dubsin */
|
||||||
/*******************************************************************************/
|
/*******************************************************************************/
|
||||||
|
|
||||||
@ -627,10 +632,10 @@ static double slow1(double x) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k]; /* Data */
|
sn=__sincostab.x[k]; /* Data */
|
||||||
ssn=sincos.x[k+1]; /* from */
|
ssn=__sincostab.x[k+1]; /* from */
|
||||||
cs=sincos.x[k+2]; /* tables */
|
cs=__sincostab.x[k+2]; /* tables */
|
||||||
ccs=sincos.x[k+3]; /* sincos.tbl */
|
ccs=__sincostab.x[k+3]; /* __sincostab.tbl */
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = y - y1;
|
y2 = y - y1;
|
||||||
c1 = (cs+t22)-t22;
|
c1 = (cs+t22)-t22;
|
||||||
@ -648,7 +653,7 @@ static double slow1(double x) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**************************************************************************/
|
/**************************************************************************/
|
||||||
/* Routine compute sin(x) for 0.855469 <|x|<2.426265 by sincos.tbl */
|
/* Routine compute sin(x) for 0.855469 <|x|<2.426265 by __sincostab.tbl */
|
||||||
/* and if result still doesn't accurate enough by mpsin or dubsin */
|
/* and if result still doesn't accurate enough by mpsin or dubsin */
|
||||||
/**************************************************************************/
|
/**************************************************************************/
|
||||||
static double slow2(double x) {
|
static double slow2(double x) {
|
||||||
@ -672,10 +677,10 @@ static double slow2(double x) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = y*del+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*del+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+del;
|
y2 = (y - y1)+del;
|
||||||
e1 = (sn+t22)-t22;
|
e1 = (sn+t22)-t22;
|
||||||
@ -763,10 +768,10 @@ static double sloww1(double x, double dx, double orig) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+dx;
|
y2 = (y - y1)+dx;
|
||||||
c1 = (cs+t22)-t22;
|
c1 = (cs+t22)-t22;
|
||||||
@ -805,10 +810,10 @@ static double sloww2(double x, double dx, double orig, int n) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
|
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+dx;
|
y2 = (y - y1)+dx;
|
||||||
@ -882,10 +887,10 @@ mynumber u;
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+dx;
|
y2 = (y - y1)+dx;
|
||||||
c1 = (cs+t22)-t22;
|
c1 = (cs+t22)-t22;
|
||||||
@ -925,10 +930,10 @@ mynumber u;
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
|
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+dx;
|
y2 = (y - y1)+dx;
|
||||||
@ -966,10 +971,10 @@ static double cslow2(double x) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = y - y1;
|
y2 = y - y1;
|
||||||
e1 = (sn+t22)-t22;
|
e1 = (sn+t22)-t22;
|
||||||
@ -1059,10 +1064,10 @@ static double csloww1(double x, double dx, double orig) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+dx;
|
y2 = (y - y1)+dx;
|
||||||
c1 = (cs+t22)-t22;
|
c1 = (cs+t22)-t22;
|
||||||
@ -1103,10 +1108,10 @@ static double csloww2(double x, double dx, double orig, int n) {
|
|||||||
s = y*xx*(sn3 +xx*sn5);
|
s = y*xx*(sn3 +xx*sn5);
|
||||||
c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
|
c = y*dx+xx*(cs2 +xx*(cs4 + xx*cs6));
|
||||||
k=u.i[LOW_HALF]<<2;
|
k=u.i[LOW_HALF]<<2;
|
||||||
sn=sincos.x[k];
|
sn=__sincostab.x[k];
|
||||||
ssn=sincos.x[k+1];
|
ssn=__sincostab.x[k+1];
|
||||||
cs=sincos.x[k+2];
|
cs=__sincostab.x[k+2];
|
||||||
ccs=sincos.x[k+3];
|
ccs=__sincostab.x[k+3];
|
||||||
|
|
||||||
y1 = (y+t22)-t22;
|
y1 = (y+t22)-t22;
|
||||||
y2 = (y - y1)+dx;
|
y2 = (y - y1)+dx;
|
||||||
@ -1127,12 +1132,17 @@ static double csloww2(double x, double dx, double orig, int n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef __cos
|
||||||
weak_alias (__cos, cos)
|
weak_alias (__cos, cos)
|
||||||
weak_alias (__sin, sin)
|
|
||||||
|
|
||||||
# ifdef NO_LONG_DOUBLE
|
# ifdef NO_LONG_DOUBLE
|
||||||
strong_alias (__sin, __sinl)
|
|
||||||
weak_alias (__sin, sinl)
|
|
||||||
strong_alias (__cos, __cosl)
|
strong_alias (__cos, __cosl)
|
||||||
weak_alias (__cos, cosl)
|
weak_alias (__cos, cosl)
|
||||||
# endif
|
# endif
|
||||||
|
#endif
|
||||||
|
#ifndef __sin
|
||||||
|
weak_alias (__sin, sin)
|
||||||
|
# ifdef NO_LONG_DOUBLE
|
||||||
|
strong_alias (__sin, __sinl)
|
||||||
|
weak_alias (__sin, sinl)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IBM Accurate Mathematical Library
|
* IBM Accurate Mathematical Library
|
||||||
* Written by International Business Machines Corp.
|
* Written by International Business Machines Corp.
|
||||||
* Copyright (C) 2001, 2007 Free Software Foundation, Inc.
|
* Copyright (C) 2001, 2007, 2011 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU Lesser General Public License as published by
|
* it under the terms of the GNU Lesser General Public License as published by
|
||||||
@ -18,13 +18,16 @@
|
|||||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <mydefs.h>
|
||||||
|
#include <endian.h>
|
||||||
|
|
||||||
/****************************************************************/
|
/****************************************************************/
|
||||||
/* TABLES FOR THE usin() and ucos() FUNCTION */
|
/* TABLES FOR THE usin() and ucos() FUNCTION */
|
||||||
/****************************************************************/
|
/****************************************************************/
|
||||||
|
|
||||||
|
|
||||||
#ifdef BIG_ENDI
|
#ifdef BIG_ENDI
|
||||||
static const union {int4 i[880]; double x[440];}sincos = { .i = {
|
const union {int4 i[880]; double x[440];}__sincostab = { .i = {
|
||||||
/**/ 0x00000000, 0x00000000,
|
/**/ 0x00000000, 0x00000000,
|
||||||
/**/ 0x00000000, 0x00000000,
|
/**/ 0x00000000, 0x00000000,
|
||||||
/**/ 0x3FF00000, 0x00000000,
|
/**/ 0x3FF00000, 0x00000000,
|
||||||
@ -467,7 +470,7 @@ static const union {int4 i[880]; double x[440];}sincos = { .i = {
|
|||||||
/**/ 0x3C747A10, 0x8073C259 } };
|
/**/ 0x3C747A10, 0x8073C259 } };
|
||||||
#else
|
#else
|
||||||
#ifdef LITTLE_ENDI
|
#ifdef LITTLE_ENDI
|
||||||
static const union {int4 i[880]; double x[440];} sincos = { .i = {
|
const union {int4 i[880]; double x[440];} __sincostab = { .i = {
|
||||||
/**/ 0x00000000, 0x00000000,
|
/**/ 0x00000000, 0x00000000,
|
||||||
/**/ 0x00000000, 0x00000000,
|
/**/ 0x00000000, 0x00000000,
|
||||||
/**/ 0x00000000, 0x3FF00000,
|
/**/ 0x00000000, 0x3FF00000,
|
@ -1,4 +1,36 @@
|
|||||||
ifeq ($(subdir),math)
|
ifeq ($(subdir),math)
|
||||||
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
|
libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
|
||||||
s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
|
s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
|
||||||
|
|
||||||
|
ifeq ($(have-mfma4),yes)
|
||||||
|
libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
|
||||||
|
e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
|
||||||
|
mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
|
||||||
|
sincos32-fma4 doasin-fma4 dosincos-fma4 \
|
||||||
|
brandred-fma4 halfulp-fma4 mpexp-fma4 \
|
||||||
|
mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
|
||||||
|
|
||||||
|
CFLAGS-brandred-fma4.c = -mfma4
|
||||||
|
CFLAGS-doasin-fma4.c = -mfma4
|
||||||
|
CFLAGS-dosincos-fma4.c = -mfma4
|
||||||
|
CFLAGS-e_asin-fma4.c = -mfma4
|
||||||
|
CFLAGS-e_atan2-fma4.c = -mfma4
|
||||||
|
CFLAGS-e_exp-fma4.c = -mfma4
|
||||||
|
CFLAGS-e_log-fma4.c = -mfma4
|
||||||
|
CFLAGS-e_pow-fma4.c = -mfma4
|
||||||
|
CFLAGS-halfulp-fma4.c = -mfma4
|
||||||
|
CFLAGS-mpa-fma4.c = -mfma4
|
||||||
|
CFLAGS-mpatan-fma4.c = -mfma4
|
||||||
|
CFLAGS-mpatan2-fma4.c = -mfma4
|
||||||
|
CFLAGS-mpexp-fma4.c = -mfma4
|
||||||
|
CFLAGS-mplog-fma4.c = -mfma4
|
||||||
|
CFLAGS-mpsqrt-fma4.c = -mfma4
|
||||||
|
CFLAGS-mptan-fma4.c = -mfma4
|
||||||
|
CFLAGS-s_atan-fma4.c = -mfma4
|
||||||
|
CFLAGS-sincos32-fma4.c = -mfma4
|
||||||
|
CFLAGS-slowexp-fma4.c = -mfma4
|
||||||
|
CFLAGS-slowpow-fma4.c = -mfma4
|
||||||
|
CLFAGS-s_sin-fma4.c = -mfma4
|
||||||
|
CLFAGS-s_tan-fma4.c = -mfma4
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
3
sysdeps/x86_64/fpu/multiarch/brandred-fma4.c
Normal file
3
sysdeps/x86_64/fpu/multiarch/brandred-fma4.c
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#define __branred __branred_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/branred.c>
|
3
sysdeps/x86_64/fpu/multiarch/doasin-fma4.c
Normal file
3
sysdeps/x86_64/fpu/multiarch/doasin-fma4.c
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#define __doasin __doasin_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/doasin.c>
|
5
sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c
Normal file
5
sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#define __docos __docos_fma4
|
||||||
|
#define __dubcos __dubcos_fma4
|
||||||
|
#define __dubsin __dubsin_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/dosincos.c>
|
10
sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c
Normal file
10
sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#define __ieee754_acos __ieee754_acos_fma4
|
||||||
|
#define __ieee754_asin __ieee754_asin_fma4
|
||||||
|
#define __cos32 __cos32_fma4
|
||||||
|
#define __doasin __doasin_fma4
|
||||||
|
#define __docos __docos_fma4
|
||||||
|
#define __dubcos __dubcos_fma4
|
||||||
|
#define __dubsin __dubsin_fma4
|
||||||
|
#define __sin32 __sin32_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_asin.c>
|
23
sysdeps/x86_64/fpu/multiarch/e_asin.c
Normal file
23
sysdeps/x86_64/fpu/multiarch/e_asin.c
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math_private.h>
|
||||||
|
|
||||||
|
extern double __ieee754_acos_sse2 (double);
|
||||||
|
extern double __ieee754_acos_fma4 (double);
|
||||||
|
extern double __ieee754_asin_sse2 (double);
|
||||||
|
extern double __ieee754_asin_fma4 (double);
|
||||||
|
|
||||||
|
libm_ifunc (__ieee754_acos,
|
||||||
|
HAS_FMA4 ? __ieee754_acos_fma4 : __ieee754_acos_sse2);
|
||||||
|
strong_alias (__ieee754_acos, __acos_finite)
|
||||||
|
|
||||||
|
libm_ifunc (__ieee754_asin,
|
||||||
|
HAS_FMA4 ? __ieee754_asin_fma4 : __ieee754_asin_sse2);
|
||||||
|
strong_alias (__ieee754_asin, __asin_finite)
|
||||||
|
|
||||||
|
# define __ieee754_acos __ieee754_acos_sse2
|
||||||
|
# define __ieee754_asin __ieee754_asin_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_asin.c>
|
9
sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
Normal file
9
sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#define __ieee754_atan2 __ieee754_atan2_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __dvd __dvd_fma4
|
||||||
|
#define __mpatan2 __mpatan2_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_atan2.c>
|
16
sysdeps/x86_64/fpu/multiarch/e_atan2.c
Normal file
16
sysdeps/x86_64/fpu/multiarch/e_atan2.c
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math_private.h>
|
||||||
|
|
||||||
|
extern double __ieee754_atan2_sse2 (double, double);
|
||||||
|
extern double __ieee754_atan2_fma4 (double, double);
|
||||||
|
|
||||||
|
libm_ifunc (__ieee754_atan2,
|
||||||
|
HAS_FMA4 ? __ieee754_atan2_fma4 : __ieee754_atan2_sse2);
|
||||||
|
strong_alias (__ieee754_atan2, __atan2_finite)
|
||||||
|
|
||||||
|
# define __ieee754_atan2 __ieee754_atan2_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_atan2.c>
|
5
sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
Normal file
5
sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#define __ieee754_exp __ieee754_exp_fma4
|
||||||
|
#define __exp1 __exp1_fma4
|
||||||
|
#define __slowexp __slowexp_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_exp.c>
|
15
sysdeps/x86_64/fpu/multiarch/e_exp.c
Normal file
15
sysdeps/x86_64/fpu/multiarch/e_exp.c
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math_private.h>
|
||||||
|
|
||||||
|
extern double __ieee754_exp_sse2 (double);
|
||||||
|
extern double __ieee754_exp_fma4 (double);
|
||||||
|
|
||||||
|
libm_ifunc (__ieee754_exp, HAS_FMA4 ? __ieee754_exp_fma4 : __ieee754_exp_sse2);
|
||||||
|
strong_alias (__ieee754_exp, __exp_finite)
|
||||||
|
|
||||||
|
# define __ieee754_exp __ieee754_exp_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_exp.c>
|
7
sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
Normal file
7
sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#define __ieee754_log __ieee754_log_fma4
|
||||||
|
#define __mplog __mplog_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_log.c>
|
15
sysdeps/x86_64/fpu/multiarch/e_log.c
Normal file
15
sysdeps/x86_64/fpu/multiarch/e_log.c
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math_private.h>
|
||||||
|
|
||||||
|
extern double __ieee754_log_sse2 (double);
|
||||||
|
extern double __ieee754_log_fma4 (double);
|
||||||
|
|
||||||
|
libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 : __ieee754_log_sse2);
|
||||||
|
strong_alias (__ieee754_log, __log_finite)
|
||||||
|
|
||||||
|
# define __ieee754_log __ieee754_log_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_log.c>
|
5
sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
Normal file
5
sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#define __ieee754_pow __ieee754_pow_fma4
|
||||||
|
#define __exp1 __exp1_fma4
|
||||||
|
#define __slowpow __slowpow_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_pow.c>
|
15
sysdeps/x86_64/fpu/multiarch/e_pow.c
Normal file
15
sysdeps/x86_64/fpu/multiarch/e_pow.c
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math_private.h>
|
||||||
|
|
||||||
|
extern double __ieee754_pow_sse2 (double, double);
|
||||||
|
extern double __ieee754_pow_fma4 (double, double);
|
||||||
|
|
||||||
|
libm_ifunc (__ieee754_pow, HAS_FMA4 ? __ieee754_pow_fma4 : __ieee754_pow_sse2);
|
||||||
|
strong_alias (__ieee754_pow, __pow_finite)
|
||||||
|
|
||||||
|
# define __ieee754_pow __ieee754_pow_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/e_pow.c>
|
3
sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
Normal file
3
sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#define __halfulp __halfulp_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/halfulp.c>
|
10
sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
Normal file
10
sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#define __add __add_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __dvd __dvd_fma4
|
||||||
|
|
||||||
|
#define NO___CPY 1
|
||||||
|
#define NO___MP_DBL 1
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mpa.c>
|
8
sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
Normal file
8
sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#define __mpatan __mpatan_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dvd __dvd_fma4
|
||||||
|
#define __mpsqrt __mpsqrt_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mpatan.c>
|
8
sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
Normal file
8
sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#define __mpatan2 __mpatan2_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dvd __dvd_fma4
|
||||||
|
#define __mpatan __mpatan_fma4
|
||||||
|
#define __mpsqrt __mpsqrt_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mpatan2.c>
|
7
sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
Normal file
7
sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#define __mpexp __mpexp_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __dvd __dvd_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mpexp.c>
|
7
sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
Normal file
7
sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#define __mplog __mplog_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __mpexp __mpexp_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mplog.c>
|
6
sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
Normal file
6
sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#define __mpsqrt __mpsqrt_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mpsqrt.c>
|
6
sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
Normal file
6
sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#define __mptan __mptan_fma4
|
||||||
|
#define __c32 __c32_fma4
|
||||||
|
#define __dvd __dvd_fma4
|
||||||
|
#define __mpranred __mpranred_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/mptan.c>
|
8
sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
Normal file
8
sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#define atan __atan_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __mpatan __mpatan_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/s_atan.c>
|
14
sysdeps/x86_64/fpu/multiarch/s_atan.c
Normal file
14
sysdeps/x86_64/fpu/multiarch/s_atan.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math.h>
|
||||||
|
|
||||||
|
extern double __atan_sse2 (double);
|
||||||
|
extern double __atan_fma4 (double);
|
||||||
|
|
||||||
|
libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : __atan_sse2);
|
||||||
|
|
||||||
|
# define atan __atan_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/s_atan.c>
|
11
sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c
Normal file
11
sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#define __cos __cos_fma4
|
||||||
|
#define __sin __sin_fma4
|
||||||
|
#define __branred __branred_fma4
|
||||||
|
#define __docos __docos_fma4
|
||||||
|
#define __dubsin __dubsin_fma4
|
||||||
|
#define __mpcos __mpcos_fma4
|
||||||
|
#define __mpcos1 __mpcos1_fma4
|
||||||
|
#define __mpsin __mpsin_fma4
|
||||||
|
#define __mpsin1 __mpsin1_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/s_sin.c>
|
22
sysdeps/x86_64/fpu/multiarch/s_sin.c
Normal file
22
sysdeps/x86_64/fpu/multiarch/s_sin.c
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math.h>
|
||||||
|
# undef NAN
|
||||||
|
|
||||||
|
extern double __cos_sse2 (double);
|
||||||
|
extern double __cos_fma4 (double);
|
||||||
|
extern double __sin_sse2 (double);
|
||||||
|
extern double __sin_fma4 (double);
|
||||||
|
|
||||||
|
libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : __cos_sse2);
|
||||||
|
weak_alias (__cos, cos)
|
||||||
|
|
||||||
|
libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : __sin_sse2);
|
||||||
|
weak_alias (__sin, sin)
|
||||||
|
|
||||||
|
# define __cos __cos_sse2
|
||||||
|
# define __sin __sin_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/s_sin.c>
|
9
sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
Normal file
9
sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#define tan __tan_fma4
|
||||||
|
#define __branred __branred_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __mpranred __mpranred_fma4
|
||||||
|
#define __mptan __mptan_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/s_tan.c>
|
14
sysdeps/x86_64/fpu/multiarch/s_tan.c
Normal file
14
sysdeps/x86_64/fpu/multiarch/s_tan.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#ifdef HAVE_FMA4_SUPPORT
|
||||||
|
# include <init-arch.h>
|
||||||
|
# include <math.h>
|
||||||
|
|
||||||
|
extern double __tan_sse2 (double);
|
||||||
|
extern double __tan_fma4 (double);
|
||||||
|
|
||||||
|
libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : __tan_sse2);
|
||||||
|
|
||||||
|
# define tan __tan_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/s_tan.c>
|
14
sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
Normal file
14
sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#define __cos32 __cos32_fma4
|
||||||
|
#define __sin32 __sin32_fma4
|
||||||
|
#define __c32 __c32_fma4
|
||||||
|
#define __mpsin __mpsin_fma4
|
||||||
|
#define __mpsin1 __mpsin1_fma4
|
||||||
|
#define __mpcos __mpcos_fma4
|
||||||
|
#define __mpcos1 __mpcos1_fma4
|
||||||
|
#define __mpranred __mpranred_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/sincos32.c>
|
8
sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
Normal file
8
sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#define __slowexp __slowexp_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __mpexp __mpexp_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/slowexp.c>
|
10
sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
Normal file
10
sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#define __slowpow __slowpow_fma4
|
||||||
|
#define __add __add_fma4
|
||||||
|
#define __dbl_mp __dbl_mp_fma4
|
||||||
|
#define __mpexp __mpexp_fma4
|
||||||
|
#define __mplog __mplog_fma4
|
||||||
|
#define __mul __mul_fma4
|
||||||
|
#define __sub __sub_fma4
|
||||||
|
#define __halfulp __halfulp_fma4
|
||||||
|
|
||||||
|
#include <sysdeps/ieee754/dbl-64/slowpow.c>
|
Loading…
Reference in New Issue
Block a user