mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-24 03:31:07 +00:00
x86-64: Add vector tanh/tanhf implementation to libmvec
Implement vectorized tanh/tanhf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector tanh/tanhf with regenerated ulps. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
parent
f9ce13fdac
commit
c0f36fc303
@ -285,4 +285,15 @@
|
||||
#define __DECL_SIMD_erff32x
|
||||
#define __DECL_SIMD_erff64x
|
||||
#define __DECL_SIMD_erff128x
|
||||
|
||||
#define __DECL_SIMD_tanh
|
||||
#define __DECL_SIMD_tanhf
|
||||
#define __DECL_SIMD_tanhl
|
||||
#define __DECL_SIMD_tanhf16
|
||||
#define __DECL_SIMD_tanhf32
|
||||
#define __DECL_SIMD_tanhf64
|
||||
#define __DECL_SIMD_tanhf128
|
||||
#define __DECL_SIMD_tanhf32x
|
||||
#define __DECL_SIMD_tanhf64x
|
||||
#define __DECL_SIMD_tanhf128x
|
||||
#endif
|
||||
|
@ -72,7 +72,7 @@ __MATHCALL_VEC (cosh,, (_Mdouble_ __x));
|
||||
/* Hyperbolic sine of X. */
|
||||
__MATHCALL_VEC (sinh,, (_Mdouble_ __x));
|
||||
/* Hyperbolic tangent of X. */
|
||||
__MATHCALL (tanh,, (_Mdouble_ __x));
|
||||
__MATHCALL_VEC (tanh,, (_Mdouble_ __x));
|
||||
|
||||
#ifdef __USE_GNU
|
||||
/* Cosine and sine of X. */
|
||||
|
@ -61,6 +61,7 @@ GLIBC_2.35 _ZGVbN2v_log10 F
|
||||
GLIBC_2.35 _ZGVbN2v_log1p F
|
||||
GLIBC_2.35 _ZGVbN2v_log2 F
|
||||
GLIBC_2.35 _ZGVbN2v_sinh F
|
||||
GLIBC_2.35 _ZGVbN2v_tanh F
|
||||
GLIBC_2.35 _ZGVbN2vv_atan2 F
|
||||
GLIBC_2.35 _ZGVbN2vv_hypot F
|
||||
GLIBC_2.35 _ZGVbN4v_acosf F
|
||||
@ -78,6 +79,7 @@ GLIBC_2.35 _ZGVbN4v_log10f F
|
||||
GLIBC_2.35 _ZGVbN4v_log1pf F
|
||||
GLIBC_2.35 _ZGVbN4v_log2f F
|
||||
GLIBC_2.35 _ZGVbN4v_sinhf F
|
||||
GLIBC_2.35 _ZGVbN4v_tanhf F
|
||||
GLIBC_2.35 _ZGVbN4vv_atan2f F
|
||||
GLIBC_2.35 _ZGVbN4vv_hypotf F
|
||||
GLIBC_2.35 _ZGVcN4v_acos F
|
||||
@ -95,6 +97,7 @@ GLIBC_2.35 _ZGVcN4v_log10 F
|
||||
GLIBC_2.35 _ZGVcN4v_log1p F
|
||||
GLIBC_2.35 _ZGVcN4v_log2 F
|
||||
GLIBC_2.35 _ZGVcN4v_sinh F
|
||||
GLIBC_2.35 _ZGVcN4v_tanh F
|
||||
GLIBC_2.35 _ZGVcN4vv_atan2 F
|
||||
GLIBC_2.35 _ZGVcN4vv_hypot F
|
||||
GLIBC_2.35 _ZGVcN8v_acosf F
|
||||
@ -112,6 +115,7 @@ GLIBC_2.35 _ZGVcN8v_log10f F
|
||||
GLIBC_2.35 _ZGVcN8v_log1pf F
|
||||
GLIBC_2.35 _ZGVcN8v_log2f F
|
||||
GLIBC_2.35 _ZGVcN8v_sinhf F
|
||||
GLIBC_2.35 _ZGVcN8v_tanhf F
|
||||
GLIBC_2.35 _ZGVcN8vv_atan2f F
|
||||
GLIBC_2.35 _ZGVcN8vv_hypotf F
|
||||
GLIBC_2.35 _ZGVdN4v_acos F
|
||||
@ -129,6 +133,7 @@ GLIBC_2.35 _ZGVdN4v_log10 F
|
||||
GLIBC_2.35 _ZGVdN4v_log1p F
|
||||
GLIBC_2.35 _ZGVdN4v_log2 F
|
||||
GLIBC_2.35 _ZGVdN4v_sinh F
|
||||
GLIBC_2.35 _ZGVdN4v_tanh F
|
||||
GLIBC_2.35 _ZGVdN4vv_atan2 F
|
||||
GLIBC_2.35 _ZGVdN4vv_hypot F
|
||||
GLIBC_2.35 _ZGVdN8v_acosf F
|
||||
@ -146,6 +151,7 @@ GLIBC_2.35 _ZGVdN8v_log10f F
|
||||
GLIBC_2.35 _ZGVdN8v_log1pf F
|
||||
GLIBC_2.35 _ZGVdN8v_log2f F
|
||||
GLIBC_2.35 _ZGVdN8v_sinhf F
|
||||
GLIBC_2.35 _ZGVdN8v_tanhf F
|
||||
GLIBC_2.35 _ZGVdN8vv_atan2f F
|
||||
GLIBC_2.35 _ZGVdN8vv_hypotf F
|
||||
GLIBC_2.35 _ZGVeN16v_acosf F
|
||||
@ -163,6 +169,7 @@ GLIBC_2.35 _ZGVeN16v_log10f F
|
||||
GLIBC_2.35 _ZGVeN16v_log1pf F
|
||||
GLIBC_2.35 _ZGVeN16v_log2f F
|
||||
GLIBC_2.35 _ZGVeN16v_sinhf F
|
||||
GLIBC_2.35 _ZGVeN16v_tanhf F
|
||||
GLIBC_2.35 _ZGVeN16vv_atan2f F
|
||||
GLIBC_2.35 _ZGVeN16vv_hypotf F
|
||||
GLIBC_2.35 _ZGVeN8v_acos F
|
||||
@ -180,5 +187,6 @@ GLIBC_2.35 _ZGVeN8v_log10 F
|
||||
GLIBC_2.35 _ZGVeN8v_log1p F
|
||||
GLIBC_2.35 _ZGVeN8v_log2 F
|
||||
GLIBC_2.35 _ZGVeN8v_sinh F
|
||||
GLIBC_2.35 _ZGVeN8v_tanh F
|
||||
GLIBC_2.35 _ZGVeN8vv_atan2 F
|
||||
GLIBC_2.35 _ZGVeN8vv_hypot F
|
||||
|
@ -126,6 +126,10 @@
|
||||
# define __DECL_SIMD_erf __DECL_SIMD_x86_64
|
||||
# undef __DECL_SIMD_erff
|
||||
# define __DECL_SIMD_erff __DECL_SIMD_x86_64
|
||||
# undef __DECL_SIMD_tanh
|
||||
# define __DECL_SIMD_tanh __DECL_SIMD_x86_64
|
||||
# undef __DECL_SIMD_tanhf
|
||||
# define __DECL_SIMD_tanhf __DECL_SIMD_x86_64
|
||||
|
||||
# endif
|
||||
#endif
|
||||
|
@ -62,6 +62,8 @@
|
||||
!GCC$ builtin (acoshf) attributes simd (notinbranch) if('x86_64')
|
||||
!GCC$ builtin (erf) attributes simd (notinbranch) if('x86_64')
|
||||
!GCC$ builtin (erff) attributes simd (notinbranch) if('x86_64')
|
||||
!GCC$ builtin (tanh) attributes simd (notinbranch) if('x86_64')
|
||||
!GCC$ builtin (tanhf) attributes simd (notinbranch) if('x86_64')
|
||||
|
||||
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
|
||||
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
|
||||
@ -109,3 +111,5 @@
|
||||
!GCC$ builtin (acoshf) attributes simd (notinbranch) if('x32')
|
||||
!GCC$ builtin (erf) attributes simd (notinbranch) if('x32')
|
||||
!GCC$ builtin (erff) attributes simd (notinbranch) if('x32')
|
||||
!GCC$ builtin (tanh) attributes simd (notinbranch) if('x32')
|
||||
!GCC$ builtin (tanhf) attributes simd (notinbranch) if('x32')
|
||||
|
@ -45,6 +45,7 @@ libmvec-funcs = \
|
||||
sin \
|
||||
sincos \
|
||||
sinh \
|
||||
tanh \
|
||||
|
||||
# Define libmvec function for benchtests directory.
|
||||
libmvec-bench-funcs = \
|
||||
|
@ -29,6 +29,7 @@ libmvec {
|
||||
_ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p;
|
||||
_ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2;
|
||||
_ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh;
|
||||
_ZGVbN2v_tanh; _ZGVcN4v_tanh; _ZGVdN4v_tanh; _ZGVeN8v_tanh;
|
||||
_ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2;
|
||||
_ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot;
|
||||
_ZGVbN4v_acosf; _ZGVcN8v_acosf; _ZGVdN8v_acosf; _ZGVeN16v_acosf;
|
||||
@ -46,6 +47,7 @@ libmvec {
|
||||
_ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf;
|
||||
_ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f;
|
||||
_ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf;
|
||||
_ZGVbN4v_tanhf; _ZGVcN8v_tanhf; _ZGVdN8v_tanhf; _ZGVeN16v_tanhf;
|
||||
_ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f;
|
||||
_ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf;
|
||||
}
|
||||
|
@ -2067,6 +2067,21 @@ float: 3
|
||||
float128: 3
|
||||
ldouble: 4
|
||||
|
||||
Function: "tanh_vlen16":
|
||||
float: 1
|
||||
|
||||
Function: "tanh_vlen2":
|
||||
double: 1
|
||||
|
||||
Function: "tanh_vlen4":
|
||||
double: 1
|
||||
|
||||
Function: "tanh_vlen4_avx2":
|
||||
double: 1
|
||||
|
||||
Function: "tanh_vlen8":
|
||||
double: 1
|
||||
|
||||
Function: "tgamma":
|
||||
double: 9
|
||||
float: 8
|
||||
|
20
sysdeps/x86_64/fpu/multiarch/svml_d_tanh2_core-sse2.S
Normal file
20
sysdeps/x86_64/fpu/multiarch/svml_d_tanh2_core-sse2.S
Normal file
@ -0,0 +1,20 @@
|
||||
/* SSE2 version of vectorized tanh, vector length is 2.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define _ZGVbN2v_tanh _ZGVbN2v_tanh_sse2
|
||||
#include "../svml_d_tanh2_core.S"
|
27
sysdeps/x86_64/fpu/multiarch/svml_d_tanh2_core.c
Normal file
27
sysdeps/x86_64/fpu/multiarch/svml_d_tanh2_core.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* Multiple versions of vectorized tanh, vector length is 2.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define SYMBOL_NAME _ZGVbN2v_tanh
|
||||
#include "ifunc-mathvec-sse4_1.h"
|
||||
|
||||
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (_ZGVbN2v_tanh, __GI__ZGVbN2v_tanh, __redirect__ZGVbN2v_tanh)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
#endif
|
1272
sysdeps/x86_64/fpu/multiarch/svml_d_tanh2_core_sse4.S
Normal file
1272
sysdeps/x86_64/fpu/multiarch/svml_d_tanh2_core_sse4.S
Normal file
File diff suppressed because it is too large
Load Diff
20
sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core-sse.S
Normal file
20
sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core-sse.S
Normal file
@ -0,0 +1,20 @@
|
||||
/* SSE version of vectorized tanh, vector length is 4.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define _ZGVdN4v_tanh _ZGVdN4v_tanh_sse_wrapper
|
||||
#include "../svml_d_tanh4_core.S"
|
27
sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core.c
Normal file
27
sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* Multiple versions of vectorized tanh, vector length is 4.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define SYMBOL_NAME _ZGVdN4v_tanh
|
||||
#include "ifunc-mathvec-avx2.h"
|
||||
|
||||
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (_ZGVdN4v_tanh, __GI__ZGVdN4v_tanh, __redirect__ZGVdN4v_tanh)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
#endif
|
1279
sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S
Normal file
1279
sysdeps/x86_64/fpu/multiarch/svml_d_tanh4_core_avx2.S
Normal file
File diff suppressed because it is too large
Load Diff
20
sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core-avx2.S
Normal file
20
sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core-avx2.S
Normal file
@ -0,0 +1,20 @@
|
||||
/* AVX2 version of vectorized tanh, vector length is 8.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define _ZGVeN8v_tanh _ZGVeN8v_tanh_avx2_wrapper
|
||||
#include "../svml_d_tanh8_core.S"
|
27
sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core.c
Normal file
27
sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core.c
Normal file
@ -0,0 +1,27 @@
|
||||
/* Multiple versions of vectorized tanh, vector length is 8.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define SYMBOL_NAME _ZGVeN8v_tanh
|
||||
#include "ifunc-mathvec-avx512-skx.h"
|
||||
|
||||
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (_ZGVeN8v_tanh, __GI__ZGVeN8v_tanh, __redirect__ZGVeN8v_tanh)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
#endif
|
472
sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core_avx512.S
Normal file
472
sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core_avx512.S
Normal file
@ -0,0 +1,472 @@
|
||||
/* Function tanh vectorized with AVX-512.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
https://www.gnu.org/licenses/. */
|
||||
|
||||
/*
|
||||
* ALGORITHM DESCRIPTION:
|
||||
*
|
||||
* NOTE: Since the hyperbolic tangent function is odd
|
||||
* (tanh(x) = -tanh(-x)), below algorithm deals with the absolute
|
||||
* value of the argument |x|: tanh(x) = sign(x) * tanh(|x|)
|
||||
*
|
||||
* We use a table lookup method to compute tanh(|x|).
|
||||
* The basic idea is to split the input range into a number of subintervals
|
||||
* and to approximate tanh(.) with a polynomial on each of them.
|
||||
*
|
||||
* IEEE SPECIAL CONDITIONS:
|
||||
* x = [+,-]0, r = [+,-]0
|
||||
* x = +Inf, r = +1
|
||||
* x = -Inf, r = -1
|
||||
* x = QNaN, r = QNaN
|
||||
* x = SNaN, r = QNaN
|
||||
*
|
||||
*
|
||||
* ALGORITHM DETAILS
|
||||
* We handle special values in a callout function, aside from main path
|
||||
* computations. "Special" for this algorithm are:
|
||||
* INF, NAN, |x| > HUGE_THRESHOLD
|
||||
*
|
||||
*
|
||||
* Main path computations are organized as follows:
|
||||
* Actually we split the interval [0, SATURATION_THRESHOLD)
|
||||
* into a number of subintervals. On each subinterval we approximate tanh(.)
|
||||
* with a minimax polynomial of pre-defined degree. Polynomial coefficients
|
||||
* are computed beforehand and stored in table. We also use
|
||||
*
|
||||
* y := |x| + B,
|
||||
*
|
||||
* here B depends on subinterval and is used to make argument
|
||||
* closer to zero.
|
||||
* We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD],
|
||||
* where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to
|
||||
* preserve main path computation logic but return 1.0 for all arguments.
|
||||
*
|
||||
* Hence reconstruction looks as follows:
|
||||
* we extract proper polynomial and range reduction coefficients
|
||||
* (Pj and B), corresponding to subinterval, to which |x| belongs,
|
||||
* and return
|
||||
*
|
||||
* r := sign(x) * (P0 + P1 * y + ... + Pn * y^n)
|
||||
*
|
||||
* NOTE: we use multiprecision technique to multiply and sum the first
|
||||
* K terms of the polynomial. So Pj, j = 0..K are stored in
|
||||
* table each as a pair of target precision numbers (Pj and PLj) to
|
||||
* achieve wider than target precision.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/* Offsets for data table __svml_dtanh_data_internal
|
||||
*/
|
||||
#define _dC 0
|
||||
#define _dP0 128
|
||||
#define _dP1 256
|
||||
#define _dP2 384
|
||||
#define _dP3 512
|
||||
#define _dP4 640
|
||||
#define _dP5 768
|
||||
#define _dP6 896
|
||||
#define _dP7 1024
|
||||
#define _dP8 1152
|
||||
#define _dP9 1280
|
||||
#define _dP10 1408
|
||||
#define _dP11 1536
|
||||
#define _dP12 1664
|
||||
#define _dP13 1792
|
||||
#define _dP14 1920
|
||||
#define _dP15 2048
|
||||
#define _dP16 2176
|
||||
#define _dP17 2304
|
||||
#define _iExpMantMask_UISA 2432
|
||||
#define _iMinIdxOfsMask_UISA 2496
|
||||
#define _iMaxIdxMask_UISA 2560
|
||||
#define _dbSignMask 2624
|
||||
#define _dbAbsMask 2688
|
||||
#define _iExpMantMask 2752
|
||||
#define _iExpMask 2816
|
||||
#define _iMinIdxOfsMask 2880
|
||||
#define _iMaxIdxMask 2944
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.section .text.evex512,"ax",@progbits
|
||||
ENTRY(_ZGVeN8v_tanh_skx)
|
||||
pushq %rbp
|
||||
cfi_def_cfa_offset(16)
|
||||
movq %rsp, %rbp
|
||||
cfi_def_cfa(6, 16)
|
||||
cfi_offset(6, -16)
|
||||
andq $-64, %rsp
|
||||
subq $320, %rsp
|
||||
vpsrlq $32, %zmm0, %zmm4
|
||||
vmovups %zmm0, (%rsp)
|
||||
vmovups __svml_dtanh_data_internal(%rip), %zmm14
|
||||
vmovups _dP0+__svml_dtanh_data_internal(%rip), %zmm15
|
||||
vpmovqd %zmm4, %ymm5
|
||||
|
||||
/* Constant loading */
|
||||
vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %zmm0, %zmm13
|
||||
vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %zmm0, %zmm3
|
||||
|
||||
/* Here huge arguments, INF and NaNs are filtered out to callout. */
|
||||
vpand _iExpMantMask_UISA+__svml_dtanh_data_internal(%rip), %ymm5, %ymm7
|
||||
vmovups _dP2+__svml_dtanh_data_internal(%rip), %zmm0
|
||||
vmovups _dP16+__svml_dtanh_data_internal(%rip), %zmm4
|
||||
vmovups _dP15+__svml_dtanh_data_internal(%rip), %zmm5
|
||||
vmovups %zmm3, 64(%rsp)
|
||||
vmovups _dP3+__svml_dtanh_data_internal(%rip), %zmm3
|
||||
vpsubd _iMinIdxOfsMask_UISA+__svml_dtanh_data_internal(%rip), %ymm7, %ymm8
|
||||
|
||||
/* if VMIN, VMAX is defined for I type */
|
||||
vxorps %ymm9, %ymm9, %ymm9
|
||||
vpmaxsd %ymm9, %ymm8, %ymm10
|
||||
vpminsd _iMaxIdxMask_UISA+__svml_dtanh_data_internal(%rip), %ymm10, %ymm11
|
||||
vpsrld $19, %ymm11, %ymm12
|
||||
vmovups _dP12+__svml_dtanh_data_internal(%rip), %zmm8
|
||||
vmovups _dP11+__svml_dtanh_data_internal(%rip), %zmm9
|
||||
vmovups _dP10+__svml_dtanh_data_internal(%rip), %zmm10
|
||||
vmovups _dP9+__svml_dtanh_data_internal(%rip), %zmm11
|
||||
vpmovzxdq %ymm12, %zmm2
|
||||
vmovups _dP8+__svml_dtanh_data_internal(%rip), %zmm12
|
||||
vpermt2pd _dP2+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm0
|
||||
vpermt2pd _dC+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm14
|
||||
vpermt2pd _dP16+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm4
|
||||
vpermt2pd _dP15+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm5
|
||||
vsubpd {rn-sae}, %zmm14, %zmm13, %zmm1
|
||||
vpermt2pd _dP12+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm8
|
||||
vpermt2pd _dP11+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm9
|
||||
vpermt2pd _dP10+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm10
|
||||
vpermt2pd _dP9+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm11
|
||||
vpermt2pd _dP8+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm12
|
||||
vpermt2pd _dP3+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm3
|
||||
vpermt2pd _dP0+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm15
|
||||
vmovups %zmm0, 192(%rsp)
|
||||
vmovups _dP17+__svml_dtanh_data_internal(%rip), %zmm0
|
||||
vmovups _dP7+__svml_dtanh_data_internal(%rip), %zmm13
|
||||
vmovups _dP6+__svml_dtanh_data_internal(%rip), %zmm14
|
||||
vmovups %zmm3, 256(%rsp)
|
||||
vmovups _dP5+__svml_dtanh_data_internal(%rip), %zmm3
|
||||
vmovups %zmm15, 128(%rsp)
|
||||
vmovups _dP4+__svml_dtanh_data_internal(%rip), %zmm15
|
||||
vpermt2pd _dP17+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm0
|
||||
vpermt2pd _dP7+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm13
|
||||
vpermt2pd _dP6+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm14
|
||||
vpermt2pd _dP5+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm3
|
||||
vpermt2pd _dP4+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm15
|
||||
vfmadd213pd {rn-sae}, %zmm4, %zmm1, %zmm0
|
||||
vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %ymm7, %ymm6
|
||||
vmovmskps %ymm6, %edx
|
||||
vmovups _dP14+__svml_dtanh_data_internal(%rip), %zmm6
|
||||
vfmadd213pd {rn-sae}, %zmm5, %zmm1, %zmm0
|
||||
vmovups _dP13+__svml_dtanh_data_internal(%rip), %zmm7
|
||||
vpermt2pd _dP14+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm6
|
||||
vpermt2pd _dP13+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm7
|
||||
vfmadd213pd {rn-sae}, %zmm6, %zmm1, %zmm0
|
||||
vmovups 256(%rsp), %zmm2
|
||||
vfmadd213pd {rn-sae}, %zmm7, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm8, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm9, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm10, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm11, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm12, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm13, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm14, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm3, %zmm1, %zmm0
|
||||
vmovups 128(%rsp), %zmm3
|
||||
vfmadd213pd {rn-sae}, %zmm15, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
vmovups 192(%rsp), %zmm2
|
||||
vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
|
||||
vfmadd213pd {rn-sae}, %zmm3, %zmm1, %zmm0
|
||||
vorpd 64(%rsp), %zmm0, %zmm0
|
||||
testl %edx, %edx
|
||||
|
||||
/* Go to special inputs processing branch */
|
||||
jne L(SPECIAL_VALUES_BRANCH)
|
||||
# LOE rbx r12 r13 r14 r15 edx zmm0
|
||||
|
||||
/* Restore registers
|
||||
* and exit the function
|
||||
*/
|
||||
|
||||
L(EXIT):
|
||||
movq %rbp, %rsp
|
||||
popq %rbp
|
||||
cfi_def_cfa(7, 8)
|
||||
cfi_restore(6)
|
||||
ret
|
||||
cfi_def_cfa(6, 16)
|
||||
cfi_offset(6, -16)
|
||||
|
||||
/* Branch to process
|
||||
* special inputs
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_BRANCH):
|
||||
vmovups (%rsp), %zmm1
|
||||
vmovups %zmm0, 128(%rsp)
|
||||
vmovups %zmm1, 64(%rsp)
|
||||
# LOE rbx r12 r13 r14 r15 edx zmm0
|
||||
|
||||
xorl %eax, %eax
|
||||
# LOE rbx r12 r13 r14 r15 eax edx
|
||||
|
||||
vzeroupper
|
||||
movq %r12, 16(%rsp)
|
||||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -304; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
|
||||
movl %eax, %r12d
|
||||
movq %r13, 8(%rsp)
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -312; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xfe, 0xff, 0xff, 0x22
|
||||
movl %edx, %r13d
|
||||
movq %r14, (%rsp)
|
||||
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -320; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Range mask
|
||||
* bits check
|
||||
*/
|
||||
|
||||
L(RANGEMASK_CHECK):
|
||||
btl %r12d, %r13d
|
||||
|
||||
/* Call scalar math function */
|
||||
jc L(SCALAR_MATH_CALL)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Special inputs
|
||||
* processing loop
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_LOOP):
|
||||
incl %r12d
|
||||
cmpl $8, %r12d
|
||||
|
||||
/* Check bits in range mask */
|
||||
jl L(RANGEMASK_CHECK)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
movq 16(%rsp), %r12
|
||||
cfi_restore(12)
|
||||
movq 8(%rsp), %r13
|
||||
cfi_restore(13)
|
||||
movq (%rsp), %r14
|
||||
cfi_restore(14)
|
||||
vmovups 128(%rsp), %zmm0
|
||||
|
||||
/* Go to exit */
|
||||
jmp L(EXIT)
|
||||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -304; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -312; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xfe, 0xff, 0xff, 0x22
|
||||
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -320; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
|
||||
# LOE rbx r12 r13 r14 r15 zmm0
|
||||
|
||||
/* Scalar math fucntion call
|
||||
* to process special input
|
||||
*/
|
||||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movsd 64(%rsp,%r14,8), %xmm0
|
||||
call tanh@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movsd %xmm0, 128(%rsp,%r14,8)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
# LOE rbx r15 r12d r13d
|
||||
END(_ZGVeN8v_tanh_skx)
|
||||
|
||||
.section .rodata, "a"
|
||||
.align 64
|
||||
|
||||
#ifdef __svml_dtanh_data_internal_typedef
|
||||
typedef unsigned int VUINT32;
|
||||
typedef struct
|
||||
{
|
||||
__declspec(align(64)) VUINT32 _dC[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP0[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP1[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP2[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP3[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP4[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP5[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP6[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP7[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP8[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP9[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP10[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP11[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP12[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP13[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP14[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP15[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP16[16][2];
|
||||
__declspec(align(64)) VUINT32 _dP17[16][2];
|
||||
__declspec(align(64)) VUINT32 _iExpMantMask_UISA[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMinIdxOfsMask_UISA[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMaxIdxMask_UISA[16][1];
|
||||
__declspec(align(64)) VUINT32 _dbSignMask[8][2];
|
||||
__declspec(align(64)) VUINT32 _dbAbsMask[8][2];
|
||||
__declspec(align(64)) VUINT32 _iExpMantMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iExpMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMinIdxOfsMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMaxIdxMask[16][1];
|
||||
} __svml_dtanh_data_internal;
|
||||
#endif
|
||||
__svml_dtanh_data_internal:
|
||||
/*== _dC ==*/
|
||||
.quad 0x0000000000000000, 0x3fcc000000000000, 0x3fd4000000000000, 0x3fdc000000000000
|
||||
.quad 0x3fe4000000000000, 0x3fec000000000000, 0x3ff4000000000000, 0x3ffc000000000000
|
||||
.quad 0x4004000000000000, 0x400c000000000000, 0x4014000000000000, 0x401c000000000000
|
||||
.quad 0x4024000000000000, 0x402c000000000000, 0x4034000000000000, 0x0000000000000000
|
||||
/*== p0 ==*/
|
||||
.align 64
|
||||
.quad 0x0000000000000000, 0x3fcb8fd0416a7c92, 0x3fd35f98a0ea650e, 0x3fda5729ee488037
|
||||
.quad 0x3fe1bf47eabb8f95, 0x3fe686650b8c2015, 0x3feb2523bb6b2dee, 0x3fee1fbf97e33527
|
||||
.quad 0x3fef9258260a71c2, 0x3feff112c63a9077, 0x3fefff419668df11, 0x3feffffc832750f2
|
||||
.quad 0x3feffffffdc96f35, 0x3fefffffffffcf58, 0x3ff0000000000000, 0x3ff0000000000000
|
||||
/*== p1 ==*/
|
||||
.align 64
|
||||
.quad 0x0000000000000000, 0x3c65e23ebcd3bcbe, 0xbc4c600bac3adf00, 0x3c6c44091785d040
|
||||
.quad 0x3c8221d7a6e3674b, 0x3c69f89d2cf6b85c, 0x3c73b3e9ec0b8f1c, 0xbc7f8d4b0428aada
|
||||
.quad 0xbc7c52d880cf43c0, 0x3c7dd36e37096480, 0x3c7b4f6380c442ca, 0xbc729755de470096
|
||||
.quad 0x3c84cf852845efbd, 0x3c6fc4fb440a5378, 0xbc63981083b55870, 0x0000000000000000
|
||||
/*== p2 ==*/
|
||||
.align 64
|
||||
.quad 0x3ff0000000000000, 0x3fee842ca3f08532, 0x3fed11574af58f1b, 0x3fea945b9c24e4f9
|
||||
.quad 0x3fe6284c3374f815, 0x3fe02500a09f8d6e, 0x3fd1f25131e3a8c0, 0x3fbd22ca1c24a139
|
||||
.quad 0x3f9b3afe1fba5c76, 0x3f6dd37d19b22b21, 0x3f27ccec13a9ef96, 0x3ecbe6c3f33250ae
|
||||
.quad 0x3e41b4865394f75f, 0x3d8853f01bda5f28, 0x3c73953c0197ef58, 0x0000000000000000
|
||||
/*== p3 ==*/
|
||||
.align 64
|
||||
.quad 0xbbf0b3ea3fdfaa19, 0xbfca48aaeb53bc21, 0xbfd19921f4329916, 0xbfd5e0f09bef8011
|
||||
.quad 0xbfd893b59c35c882, 0xbfd6ba7cb7576538, 0xbfce7291743d7555, 0xbfbb6d85a01efb80
|
||||
.quad 0xbf9addae58c7141a, 0xbf6dc59376c7aa19, 0xbf27cc5e74677410, 0xbecbe6c0e8b4cc87
|
||||
.quad 0xbe41b486526b0565, 0xbd8853f01bef63a4, 0xbc73955be519be31, 0x0000000000000000
|
||||
/*== p4 ==*/
|
||||
.align 64
|
||||
.quad 0xbfd5555555555555, 0xbfd183afc292ba11, 0xbfcc1a4b039c9bfa, 0xbfc16e1e6d8d0be6
|
||||
.quad 0xbf92426c751e48a2, 0x3fb4f152b2bad124, 0x3fbbba40cbef72be, 0x3fb01ba038be6a3d
|
||||
.quad 0x3f916df44871efc8, 0x3f63c6869dfc8870, 0x3f1fb9aef915d828, 0x3ec299d1e27c6e11
|
||||
.quad 0x3e379b5ddcca334c, 0x3d8037f57bc62c9a, 0x3c6a2d4b50a2cff7, 0x0000000000000000
|
||||
/*== p5 ==*/
|
||||
.align 64
|
||||
.quad 0xbce6863ee44ed636, 0x3fc04dcd0476c75e, 0x3fc43d3449a80f08, 0x3fc5c26f3699b7e7
|
||||
.quad 0x3fc1a686f6ab2533, 0x3faf203c316ce730, 0xbf89c7a02788557c, 0xbf98157e26e0d541
|
||||
.quad 0xbf807b55c1c7d278, 0xbf53a18d5843190f, 0xbf0fb6bbc89b1a5b, 0xbeb299c9c684a963
|
||||
.quad 0xbe279b5dd4fb3d01, 0xbd7037f57ae72aa6, 0xbc5a2ca2bba78e86, 0x0000000000000000
|
||||
/*== p6 ==*/
|
||||
.align 64
|
||||
.quad 0x3fc1111111112ab5, 0x3fb5c19efdfc08ad, 0x3fa74c98dc34fbac, 0xbf790d6a8eff0a77
|
||||
.quad 0xbfac3c021789a786, 0xbfae2196b7326859, 0xbf93a7a011ff8c2a, 0x3f6e4709c7e8430e
|
||||
.quad 0x3f67682afa611151, 0x3f3ef2ee77717cbf, 0x3ef95a4482f180b7, 0x3e9dc2c27da3b603
|
||||
.quad 0x3e12e2afd9f7433e, 0x3d59f320348679ba, 0x3c44b61d9bbcc940, 0x0000000000000000
|
||||
/*== p7 ==*/
|
||||
.align 64
|
||||
.quad 0xbda1ea19ddddb3b4, 0xbfb0b8df995ce4df, 0xbfb2955cf41e8164, 0xbfaf9d05c309f7c6
|
||||
.quad 0xbf987d27ccff4291, 0x3f8b2ca62572b098, 0x3f8f1cf6c7f5b00a, 0x3f60379811e43dd5
|
||||
.quad 0xbf4793826f78537e, 0xbf2405695e36240f, 0xbee0e08de39ce756, 0xbe83d709ba5f714e
|
||||
.quad 0xbdf92e3fc5ee63e0, 0xbd414cc030f2110e, 0xbc2ba022e8d82a87, 0x0000000000000000
|
||||
/*== p8 ==*/
|
||||
.align 64
|
||||
.quad 0xbfaba1ba1990520b, 0xbf96e37bba52f6fc, 0x3ecff7df18455399, 0x3f97362834d33a4e
|
||||
.quad 0x3f9e7f8380184b45, 0x3f869543e7c420d4, 0xbf7326bd4914222a, 0xbf5fc15b0a9d98fa
|
||||
.quad 0x3f14cffcfa69fbb6, 0x3f057e48e5b79d10, 0x3ec33b66d7d77264, 0x3e66ac4e578b9b10
|
||||
.quad 0x3ddcc74b8d3d5c42, 0x3d23c589137f92b4, 0x3c107f8e2c8707a1, 0x0000000000000000
|
||||
/*== p9 ==*/
|
||||
.align 64
|
||||
.quad 0xbe351ca7f096011f, 0x3f9eaaf3320c3851, 0x3f9cf823fe761fc1, 0x3f9022271754ff1f
|
||||
.quad 0xbf731fe77c9c60af, 0xbf84a6046865ec7d, 0xbf4ca3f1f2b9192b, 0x3f4c77dee0afd227
|
||||
.quad 0x3f04055bce68597a, 0xbee2bf0cb4a71647, 0xbea31eaafe73efd5, 0xbe46abb02c4368ed
|
||||
.quad 0xbdbcc749ca8079dd, 0xbd03c5883836b9d2, 0xbbf07a5416264aec, 0x0000000000000000
|
||||
/*== p10 ==*/
|
||||
.align 64
|
||||
.quad 0x3f9664f94e6ac14e, 0xbf94d3343bae39dd, 0xbf7bc748e60df843, 0xbf8c89372b43ba85
|
||||
.quad 0xbf8129a092de747a, 0x3f60c85b4d538746, 0x3f5be9392199ec18, 0xbf2a0c68a4489f10
|
||||
.quad 0xbf00462601dc2faa, 0x3eb7b6a219dea9f4, 0x3e80cbcc8d4c5c8a, 0x3e2425bb231a5e29
|
||||
.quad 0x3d9992a4beac8662, 0x3ce191ba5ed3fb67, 0x3bc892450bad44c4, 0x0000000000000000
|
||||
/*== p11 ==*/
|
||||
.align 64
|
||||
.quad 0xbea8c4c1fd7852fe, 0xbfccce16b1046f13, 0xbf81a16f224bb7b6, 0xbf62cbf00406bc09
|
||||
.quad 0x3f75b29bb02cf69b, 0x3f607df0f9f90c17, 0xbf4b852a6e0758d5, 0xbf0078c63d1b8445
|
||||
.quad 0x3eec12eadd55be7a, 0xbe6fa600f593181b, 0xbe5a3c935dce3f7d, 0xbe001c6d95e3ae96
|
||||
.quad 0xbd74755a00ea1fd3, 0xbcbc1c6c063bb7ac, 0xbba3be9a4460fe00, 0x0000000000000000
|
||||
/*== p12 ==*/
|
||||
.align 64
|
||||
.quad 0xbf822404577aa9dd, 0x403d8b07f7a82aa3, 0xbf9f44ab92fbab0a, 0x3fb2eac604473d6a
|
||||
.quad 0x3f45f87d903aaac8, 0xbf5e104671036300, 0x3f19bc98ddf0f340, 0x3f0d4304bc9246e8
|
||||
.quad 0xbed13c415f7b9d41, 0xbe722b8d9720cdb0, 0x3e322666d739bec0, 0x3dd76a553d7e7918
|
||||
.quad 0x3d4de0fa59416a39, 0x3c948716cf3681b4, 0x3b873f9f2d2fda99, 0x0000000000000000
|
||||
/*== p13 ==*/
|
||||
.align 64
|
||||
.quad 0xbefdd99a221ed573, 0x4070593a3735bab4, 0xbfccab654e44835e, 0x3fd13ed80037dbac
|
||||
.quad 0xbf6045b9076cc487, 0x3f2085ee7e8ac170, 0x3f23524622610430, 0xbeff12a6626911b4
|
||||
.quad 0x3eab9008bca408af, 0x3e634df71865f620, 0xbe05bb1bcf83ca73, 0xbdaf2ac143fb6762
|
||||
.quad 0xbd23eae52a3dbf57, 0xbc6b5e3e9ca0955e, 0xbb5eca68e2c1ba2e, 0x0000000000000000
|
||||
/*== p14 ==*/
|
||||
.align 64
|
||||
.quad 0x3f6e3be689423841, 0xc0d263511f5baac1, 0x40169f73b15ebe5c, 0xc025c1dd41cd6cb5
|
||||
.quad 0xbf58fd89fe05e0d1, 0x3f73f7af01d5af7a, 0xbf1e40bdead17e6b, 0x3ee224cd6c4513e5
|
||||
.quad 0xbe24b645e68eeaa3, 0xbe4abfebfb72bc83, 0x3dd51c38f8695ed3, 0x3d8313ac38c6832b
|
||||
.quad 0x3cf7787935626685, 0x3c401ffc49c6bc29, 0xbabf0b21acfa52ab, 0x0000000000000000
|
||||
/*== p15 ==*/
|
||||
.align 64
|
||||
.quad 0xbf2a1306713a4f3a, 0xc1045e509116b066, 0x4041fab9250984ce, 0xc0458d090ec3de95
|
||||
.quad 0xbf74949d60113d63, 0x3f7c9fd6200d0ade, 0x3f02cd40e0ad0a9f, 0xbe858ab8e019f311
|
||||
.quad 0xbe792fa6323b7cf8, 0x3e2df04d67876402, 0xbd95c72be95e4d2c, 0xbd55a89c30203106
|
||||
.quad 0xbccad6b3bb9eff65, 0xbc12705ccd3dd884, 0xba8e0a4c47ae75f5, 0x0000000000000000
|
||||
/*== p16 ==*/
|
||||
.align 64
|
||||
.quad 0xbf55d7e76dc56871, 0x41528c38809c90c7, 0xc076d57fb5190b02, 0x4085f09f888f8ada
|
||||
.quad 0x3fa246332a2fcba5, 0xbfb29d851a896fcd, 0x3ed9065ae369b212, 0xbeb8e1ba4c98a030
|
||||
.quad 0x3e6ffd0766ad4016, 0xbe0c63c29f505f5b, 0xbd7fab216b9e0e49, 0x3d2826b62056aa27
|
||||
.quad 0x3ca313e31762f523, 0x3bea37aa21895319, 0x3ae5c7f1fd871496, 0x0000000000000000
|
||||
/*== p17 ==*/
|
||||
.align 64
|
||||
.quad 0x3f35e67ab76a26e7, 0x41848ee0627d8206, 0xc0a216d618b489ec, 0x40a5b89107c8af4f
|
||||
.quad 0x3fb69d8374520eda, 0xbfbded519f981716, 0xbef02d288b5b3371, 0x3eb290981209c1a6
|
||||
.quad 0xbe567e924bf5ff6e, 0x3de3f7f7de6b0eb6, 0x3d69ed18bae3ebbc, 0xbcf7534c4f3dfa71
|
||||
.quad 0xbc730b73f1eaff20, 0xbbba2cff8135d462, 0xbab5a71b5f7d9035, 0x0000000000000000
|
||||
.align 64
|
||||
.long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask_UISA */
|
||||
.align 64
|
||||
.long 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 /* _iMinIdxOfsMask_UISA */
|
||||
.align 64
|
||||
.long 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000 /* _iMaxIdxMask_UISA */
|
||||
.align 64
|
||||
.quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 /* _dbSignMask */
|
||||
.align 64
|
||||
.quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff /* _dbAbsMask */
|
||||
.align 64
|
||||
.long 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000 /* _iExpMantMask */
|
||||
.align 64
|
||||
.long 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000 /* _iExpMask */
|
||||
.align 64
|
||||
.long 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000 /* _iMinIdxOfsMask */
|
||||
.align 64
|
||||
.long 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000 /* _iMaxIdxMask */
|
||||
.align 64
|
||||
.type __svml_dtanh_data_internal,@object
|
||||
.size __svml_dtanh_data_internal,.-__svml_dtanh_data_internal
|
20
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core-avx2.S
Normal file
20
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core-avx2.S
Normal file
@ -0,0 +1,20 @@
|
||||
/* AVX2 version of vectorized tanhf.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define _ZGVeN16v_tanhf _ZGVeN16v_tanhf_avx2_wrapper
|
||||
#include "../svml_s_tanhf16_core.S"
|
28
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core.c
Normal file
28
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core.c
Normal file
@ -0,0 +1,28 @@
|
||||
/* Multiple versions of vectorized tanhf, vector length is 16.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define SYMBOL_NAME _ZGVeN16v_tanhf
|
||||
#include "ifunc-mathvec-avx512-skx.h"
|
||||
|
||||
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (_ZGVeN16v_tanhf, __GI__ZGVeN16v_tanhf,
|
||||
__redirect__ZGVeN16v_tanhf)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
#endif
|
381
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S
Normal file
381
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf16_core_avx512.S
Normal file
@ -0,0 +1,381 @@
|
||||
/* Function tanhf vectorized with AVX-512.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
https://www.gnu.org/licenses/. */
|
||||
|
||||
/*
|
||||
* ALGORITHM DESCRIPTION:
|
||||
*
|
||||
* NOTE: Since the hyperbolic tangent function is odd
|
||||
* (tanh(x) = -tanh(-x)), below algorithm deals with the absolute
|
||||
* value of the argument |x|: tanh(x) = sign(x) * tanh(|x|)
|
||||
*
|
||||
* We use a table lookup method to compute tanh(|x|).
|
||||
* The basic idea is to split the input range into a number of subintervals
|
||||
* and to approximate tanh(.) with a polynomial on each of them.
|
||||
*
|
||||
* IEEE SPECIAL CONDITIONS:
|
||||
* x = [+,-]0, r = [+,-]0
|
||||
* x = +Inf, r = +1
|
||||
* x = -Inf, r = -1
|
||||
* x = QNaN, r = QNaN
|
||||
* x = SNaN, r = QNaN
|
||||
*
|
||||
*
|
||||
* ALGORITHM DETAILS
|
||||
* We handle special values in a callout function, aside from main path
|
||||
* computations. "Special" for this algorithm are:
|
||||
* INF, NAN, |x| > HUGE_THRESHOLD
|
||||
*
|
||||
*
|
||||
* Main path computations are organized as follows:
|
||||
* Actually we split the interval [0, SATURATION_THRESHOLD)
|
||||
* into a number of subintervals. On each subinterval we approximate tanh(.)
|
||||
* with a minimax polynomial of pre-defined degree. Polynomial coefficients
|
||||
* are computed beforehand and stored in table. We also use
|
||||
*
|
||||
* y := |x| + B,
|
||||
*
|
||||
* here B depends on subinterval and is used to make argument
|
||||
* closer to zero.
|
||||
* We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD],
|
||||
* where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to
|
||||
* preserve main path computation logic but return 1.0 for all arguments.
|
||||
*
|
||||
* Hence reconstruction looks as follows:
|
||||
* we extract proper polynomial and range reduction coefficients
|
||||
* (Pj and B), corresponding to subinterval, to which |x| belongs,
|
||||
* and return
|
||||
*
|
||||
* r := sign(x) * (P0 + P1 * y + ... + Pn * y^n)
|
||||
*
|
||||
* NOTE: we use multiprecision technique to multiply and sum the first
|
||||
* K terms of the polynomial. So Pj, j = 0..K are stored in
|
||||
* table each as a pair of target precision numbers (Pj and PLj) to
|
||||
* achieve wider than target precision.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/* Offsets for data table __svml_stanh_data_internal
|
||||
*/
|
||||
#define _sC 0
|
||||
#define _sP0 128
|
||||
#define _sP2 256
|
||||
#define _sP3 384
|
||||
#define _sP4 512
|
||||
#define _sP5 640
|
||||
#define _sP6 768
|
||||
#define _sP7 896
|
||||
#define _iExpMantMask_UISA 1024
|
||||
#define _iMinIdxOfsMask_UISA 1088
|
||||
#define _iMaxIdxMask_UISA 1152
|
||||
#define _sSignMask 1216
|
||||
#define _sAbsMask 1280
|
||||
#define _iExpMantMask 1344
|
||||
#define _iExpMask 1408
|
||||
#define _iMinIdxOfsMask 1472
|
||||
#define _iMaxIdxMask 1536
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.section .text.exex512,"ax",@progbits
|
||||
ENTRY(_ZGVeN16v_tanhf_skx)
|
||||
pushq %rbp
|
||||
cfi_def_cfa_offset(16)
|
||||
movq %rsp, %rbp
|
||||
cfi_def_cfa(6, 16)
|
||||
cfi_offset(6, -16)
|
||||
andq $-64, %rsp
|
||||
subq $192, %rsp
|
||||
vmovaps %zmm0, %zmm1
|
||||
vmovups __svml_stanh_data_internal(%rip), %zmm9
|
||||
vmovups _sP6+__svml_stanh_data_internal(%rip), %zmm11
|
||||
vmovups _sP5+__svml_stanh_data_internal(%rip), %zmm12
|
||||
vmovups _sP4+__svml_stanh_data_internal(%rip), %zmm13
|
||||
vmovups _sP3+__svml_stanh_data_internal(%rip), %zmm14
|
||||
vmovups _sP2+__svml_stanh_data_internal(%rip), %zmm15
|
||||
vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||
vandps _sAbsMask+__svml_stanh_data_internal(%rip), %zmm1, %zmm8
|
||||
vandps _sSignMask+__svml_stanh_data_internal(%rip), %zmm1, %zmm0
|
||||
|
||||
/* Here huge arguments, INF and NaNs are filtered out to callout. */
|
||||
vpandd _iExpMantMask_UISA+__svml_stanh_data_internal(%rip), %zmm1, %zmm3
|
||||
vpsubd _iMinIdxOfsMask_UISA+__svml_stanh_data_internal(%rip), %zmm3, %zmm4
|
||||
vpcmpd $2, _iExpMask+__svml_stanh_data_internal(%rip), %zmm3, %k1
|
||||
|
||||
/*
|
||||
* small table specific variables *
|
||||
* Constant loading
|
||||
*/
|
||||
vpxord %zmm5, %zmm5, %zmm5
|
||||
|
||||
/* if VMIN, VMAX is defined for I type */
|
||||
vpmaxsd %zmm5, %zmm4, %zmm6
|
||||
vpminsd _iMaxIdxMask_UISA+__svml_stanh_data_internal(%rip), %zmm6, %zmm7
|
||||
vpsrld $21, %zmm7, %zmm10
|
||||
vmovups _sP7+__svml_stanh_data_internal(%rip), %zmm4
|
||||
vpermt2ps _sC+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm9
|
||||
vpermt2ps _sP6+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm11
|
||||
vpermt2ps _sP7+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm4
|
||||
vpermt2ps _sP5+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm12
|
||||
vpermt2ps _sP4+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm13
|
||||
vpermt2ps _sP3+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm14
|
||||
vpermt2ps _sP2+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm15
|
||||
vpandnd %zmm3, %zmm3, %zmm2{%k1}
|
||||
vptestmd %zmm2, %zmm2, %k0
|
||||
vmovups _sP0+__svml_stanh_data_internal(%rip), %zmm3
|
||||
vsubps {rn-sae}, %zmm9, %zmm8, %zmm2
|
||||
kmovw %k0, %edx
|
||||
vfmadd213ps {rn-sae}, %zmm11, %zmm2, %zmm4
|
||||
vpermt2ps _sP0+64+__svml_stanh_data_internal(%rip), %zmm10, %zmm3
|
||||
vfmadd213ps {rn-sae}, %zmm12, %zmm2, %zmm4
|
||||
vfmadd213ps {rn-sae}, %zmm13, %zmm2, %zmm4
|
||||
vfmadd213ps {rn-sae}, %zmm14, %zmm2, %zmm4
|
||||
vfmadd213ps {rn-sae}, %zmm15, %zmm2, %zmm4
|
||||
vfmadd213ps {rn-sae}, %zmm3, %zmm2, %zmm4
|
||||
vorps %zmm0, %zmm4, %zmm0
|
||||
testl %edx, %edx
|
||||
|
||||
/* Go to special inputs processing branch */
|
||||
jne L(SPECIAL_VALUES_BRANCH)
|
||||
# LOE rbx r12 r13 r14 r15 edx zmm0 zmm1
|
||||
|
||||
/* Restore registers
|
||||
* and exit the function
|
||||
*/
|
||||
|
||||
L(EXIT):
|
||||
movq %rbp, %rsp
|
||||
popq %rbp
|
||||
cfi_def_cfa(7, 8)
|
||||
cfi_restore(6)
|
||||
ret
|
||||
cfi_def_cfa(6, 16)
|
||||
cfi_offset(6, -16)
|
||||
|
||||
/* Branch to process
|
||||
* special inputs
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_BRANCH):
|
||||
vmovups %zmm1, 64(%rsp)
|
||||
vmovups %zmm0, 128(%rsp)
|
||||
# LOE rbx r12 r13 r14 r15 edx zmm0
|
||||
|
||||
xorl %eax, %eax
|
||||
# LOE rbx r12 r13 r14 r15 eax edx
|
||||
|
||||
vzeroupper
|
||||
movq %r12, 16(%rsp)
|
||||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
|
||||
movl %eax, %r12d
|
||||
movq %r13, 8(%rsp)
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
|
||||
movl %edx, %r13d
|
||||
movq %r14, (%rsp)
|
||||
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Range mask
|
||||
* bits check
|
||||
*/
|
||||
|
||||
L(RANGEMASK_CHECK):
|
||||
btl %r12d, %r13d
|
||||
|
||||
/* Call scalar math function */
|
||||
jc L(SCALAR_MATH_CALL)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Special inputs
|
||||
* processing loop
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_LOOP):
|
||||
incl %r12d
|
||||
cmpl $16, %r12d
|
||||
|
||||
/* Check bits in range mask */
|
||||
jl L(RANGEMASK_CHECK)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
movq 16(%rsp), %r12
|
||||
cfi_restore(12)
|
||||
movq 8(%rsp), %r13
|
||||
cfi_restore(13)
|
||||
movq (%rsp), %r14
|
||||
cfi_restore(14)
|
||||
vmovups 128(%rsp), %zmm0
|
||||
|
||||
/* Go to exit */
|
||||
jmp L(EXIT)
|
||||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
|
||||
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
|
||||
# LOE rbx r12 r13 r14 r15 zmm0
|
||||
|
||||
/* Scalar math fucntion call
|
||||
* to process special input
|
||||
*/
|
||||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 64(%rsp,%r14,4), %xmm0
|
||||
call tanhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 128(%rsp,%r14,4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
# LOE rbx r15 r12d r13d
|
||||
END(_ZGVeN16v_tanhf_skx)
|
||||
|
||||
.section .rodata, "a"
|
||||
.align 64
|
||||
|
||||
#ifdef __svml_stanh_data_internal_typedef
|
||||
typedef unsigned int VUINT32;
|
||||
typedef struct
|
||||
{
|
||||
__declspec(align(64)) VUINT32 _sC[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP0[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP2[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP3[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP4[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP5[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP6[32][1];
|
||||
__declspec(align(64)) VUINT32 _sP7[32][1];
|
||||
__declspec(align(64)) VUINT32 _iExpMantMask_UISA[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMinIdxOfsMask_UISA[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMaxIdxMask_UISA[16][1];
|
||||
__declspec(align(64)) VUINT32 _sSignMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _sAbsMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iExpMantMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iExpMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMinIdxOfsMask[16][1];
|
||||
__declspec(align(64)) VUINT32 _iMaxIdxMask[16][1];
|
||||
} __svml_stanh_data_internal;
|
||||
#endif
|
||||
__svml_stanh_data_internal:
|
||||
/*== _sC ==*/
|
||||
.long 0x00000000, 0x3d700000, 0x3d900000, 0x3db00000
|
||||
.long 0x3dd00000, 0x3df00000, 0x3e100000, 0x3e300000
|
||||
.long 0x3e500000, 0x3e700000, 0x3e900000, 0x3eb00000
|
||||
.long 0x3ed00000, 0x3ef00000, 0x3f100000, 0x3f300000
|
||||
.long 0x3f500000, 0x3f700000, 0x3f900000, 0x3fb00000
|
||||
.long 0x3fd00000, 0x3ff00000, 0x40100000, 0x40300000
|
||||
.long 0x40500000, 0x40700000, 0x40900000, 0x40b00000
|
||||
.long 0x40d00000, 0x40f00000, 0x41100000, 0x00000000
|
||||
/*== p0 ==*/
|
||||
.align 64
|
||||
.long 0x00000000, 0x3d6fb9c9, 0x3d8fc35f, 0x3daf9169
|
||||
.long 0x3dcf49ab, 0x3deee849, 0x3e0f0ee8, 0x3e2e4984
|
||||
.long 0x3e4d2f8e, 0x3e6bb32e, 0x3e8c51cd, 0x3ea96163
|
||||
.long 0x3ec543f1, 0x3edfd735, 0x3f028438, 0x3f18abf0
|
||||
.long 0x3f2bc480, 0x3f3bec1c, 0x3f4f2e5b, 0x3f613c53
|
||||
.long 0x3f6ce37d, 0x3f743c4f, 0x3f7a5feb, 0x3f7dea85
|
||||
.long 0x3f7f3b3d, 0x3f7fb78c, 0x3f7fefd4, 0x3f7ffdd0
|
||||
.long 0x3f7fffb4, 0x3f7ffff6, 0x3f7fffff, 0x3f800000
|
||||
/*== p2 ==*/
|
||||
.align 64
|
||||
.long 0x3f800000, 0x3f7f1f84, 0x3f7ebd11, 0x3f7e1e5f
|
||||
.long 0x3f7d609f, 0x3f7c842d, 0x3f7b00e5, 0x3f789580
|
||||
.long 0x3f75b8ad, 0x3f726fd9, 0x3f6cc59b, 0x3f63fb92
|
||||
.long 0x3f59ff97, 0x3f4f11d7, 0x3f3d7573, 0x3f24f360
|
||||
.long 0x3f0cbfe7, 0x3eec1a69, 0x3eb0a801, 0x3e6753a2
|
||||
.long 0x3e132f1a, 0x3db7e7d3, 0x3d320845, 0x3c84d3d4
|
||||
.long 0x3bc477b7, 0x3b10d3da, 0x3a01601e, 0x388c1a3b
|
||||
.long 0x3717b0da, 0x35a43bce, 0x338306c6, 0x00000000
|
||||
/*== p3 ==*/
|
||||
.align 64
|
||||
.long 0xb0343c7b, 0xbd6ee69d, 0xbd8f0da7, 0xbdae477d
|
||||
.long 0xbdcd2a1f, 0xbdeba80d, 0xbe0c443b, 0xbe293cf3
|
||||
.long 0xbe44f282, 0xbe5f3651, 0xbe81c7c0, 0xbe96d7ca
|
||||
.long 0xbea7fb8e, 0xbeb50e9e, 0xbec12efe, 0xbec4be92
|
||||
.long 0xbebce070, 0xbead510e, 0xbe8ef7d6, 0xbe4b8704
|
||||
.long 0xbe083237, 0xbdaf7449, 0xbd2e1ec4, 0xbc83bf06
|
||||
.long 0xbbc3e0b5, 0xbb10aadc, 0xba0157db, 0xb88c18f2
|
||||
.long 0xb717b096, 0xb5a43bae, 0xb383012c, 0x00000000
|
||||
/*== p4 ==*/
|
||||
.align 64
|
||||
.long 0xbeaaaaa5, 0xbeab0612, 0xbea7f01f, 0xbea4e120
|
||||
.long 0xbea387b7, 0xbea15962, 0xbe9d57f7, 0xbe976b5a
|
||||
.long 0xbe90230d, 0xbe880dff, 0xbe7479b3, 0xbe4c3d88
|
||||
.long 0xbe212482, 0xbdeb8cba, 0xbd5e78ad, 0x3c6b5e6e
|
||||
.long 0x3d839143, 0x3dc21ee1, 0x3de347af, 0x3dcbec96
|
||||
.long 0x3d99ef2d, 0x3d542ea1, 0x3cdde701, 0x3c2cca67
|
||||
.long 0x3b81cb27, 0x3ac073a1, 0x39ac3032, 0x383a94d9
|
||||
.long 0x36ca081d, 0x355abd4c, 0x332b3cb6, 0x00000000
|
||||
/*== p5 ==*/
|
||||
.align 64
|
||||
.long 0xb76dd6b9, 0xbe1c276d, 0x3c1dcf2f, 0x3dc1a78d
|
||||
.long 0x3d96f985, 0x3da2b61b, 0x3dc13397, 0x3dd2f670
|
||||
.long 0x3df48a0a, 0x3e06c5a8, 0x3e1a3aba, 0x3e27c405
|
||||
.long 0x3e2e78d0, 0x3e2c3e44, 0x3e1d3097, 0x3df4a8f4
|
||||
.long 0x3da38508, 0x3d31416a, 0x3b562657, 0xbcaeeac9
|
||||
.long 0xbcce9419, 0xbcaaeac4, 0xbc49e7d0, 0xbba71ddd
|
||||
.long 0xbb003b0e, 0xba3f9a05, 0xb92c08a7, 0xb7ba9232
|
||||
.long 0xb64a0b0f, 0xb4dac169, 0xb2ab78ac, 0x00000000
|
||||
/*== p6 ==*/
|
||||
.align 64
|
||||
.long 0x3e0910e9, 0x43761143, 0x4165ecdc, 0xc190f756
|
||||
.long 0xc08c097d, 0xc02ba813, 0xbf7f6bda, 0x3f2b1dc0
|
||||
.long 0x3ece105d, 0x3f426a94, 0xbadb0dc4, 0x3da43b17
|
||||
.long 0xbd51ab88, 0xbcaea23d, 0xbd3b6d8d, 0xbd6caaad
|
||||
.long 0xbd795bed, 0xbd5fddda, 0xbd038f3b, 0xbc1cad63
|
||||
.long 0x3abb4766, 0x3b95f10b, 0x3b825873, 0x3afaea66
|
||||
.long 0x3a49f878, 0x39996bf3, 0x388f3e6c, 0x371bb0e3
|
||||
.long 0x35a8a5e6, 0x34369b17, 0x322487b0, 0x00000000
|
||||
/*== p7 ==*/
|
||||
.align 64
|
||||
.long 0xbc0e2f66, 0x460bda12, 0x43d638ef, 0xc3e11c3e
|
||||
.long 0xc2baa4e9, 0xc249da2d, 0xc1859b82, 0x40dd5b57
|
||||
.long 0x40494640, 0x40c730a8, 0xbf0f160e, 0x3e30e76f
|
||||
.long 0xbea81387, 0xbdb26a1c, 0xbd351e57, 0xbb4c01a0
|
||||
.long 0x3c1d7bfb, 0x3c722cd1, 0x3c973f1c, 0x3c33a31b
|
||||
.long 0x3b862ef4, 0x3a27b3d0, 0xba3b5907, 0xba0efc22
|
||||
.long 0xb97f9f0f, 0xb8c8af50, 0xb7bdddfb, 0xb64f2950
|
||||
.long 0xb4e085b1, 0xb3731dfa, 0xb15a1f04, 0x00000000
|
||||
.align 64
|
||||
.long 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000 /* _iExpMantMask_UISA */
|
||||
.align 64
|
||||
.long 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000, 0x3d400000 /* _iMinIdxOfsMask_UISA */
|
||||
.align 64
|
||||
.long 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000, 0x03e00000 /* _iMaxIdxMask_UISA */
|
||||
.align 64
|
||||
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */
|
||||
.align 64
|
||||
.long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */
|
||||
.align 64
|
||||
.long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */
|
||||
.align 64
|
||||
.long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */
|
||||
.align 64
|
||||
.long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */
|
||||
.align 64
|
||||
.long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */
|
||||
.align 64
|
||||
.type __svml_stanh_data_internal,@object
|
||||
.size __svml_stanh_data_internal,.-__svml_stanh_data_internal
|
20
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core-sse2.S
Normal file
20
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core-sse2.S
Normal file
@ -0,0 +1,20 @@
|
||||
/* SSE2 version of vectorized tanhf, vector length is 4.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define _ZGVbN4v_tanhf _ZGVbN4v_tanhf_sse2
|
||||
#include "../svml_s_tanhf4_core.S"
|
28
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core.c
Normal file
28
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core.c
Normal file
@ -0,0 +1,28 @@
|
||||
/* Multiple versions of vectorized tanhf, vector length is 4.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define SYMBOL_NAME _ZGVbN4v_tanhf
|
||||
#include "ifunc-mathvec-sse4_1.h"
|
||||
|
||||
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (_ZGVbN4v_tanhf, __GI__ZGVbN4v_tanhf,
|
||||
__redirect__ZGVbN4v_tanhf)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
#endif
|
832
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S
Normal file
832
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S
Normal file
@ -0,0 +1,832 @@
|
||||
/* Function tanhf vectorized with SSE4.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
https://www.gnu.org/licenses/. */
|
||||
|
||||
/*
|
||||
* ALGORITHM DESCRIPTION:
|
||||
*
|
||||
* NOTE: Since the hyperbolic tangent function is odd
|
||||
* (tanh(x) = -tanh(-x)), below algorithm deals with the absolute
|
||||
* value of the argument |x|: tanh(x) = sign(x) * tanh(|x|)
|
||||
*
|
||||
* We use a table lookup method to compute tanh(|x|).
|
||||
* The basic idea is to split the input range into a number of subintervals
|
||||
* and to approximate tanh(.) with a polynomial on each of them.
|
||||
*
|
||||
* IEEE SPECIAL CONDITIONS:
|
||||
* x = [+,-]0, r = [+,-]0
|
||||
* x = +Inf, r = +1
|
||||
* x = -Inf, r = -1
|
||||
* x = QNaN, r = QNaN
|
||||
* x = SNaN, r = QNaN
|
||||
*
|
||||
*
|
||||
* ALGORITHM DETAILS
|
||||
* We handle special values in a callout function, aside from main path
|
||||
* computations. "Special" for this algorithm are:
|
||||
* INF, NAN, |x| > HUGE_THRESHOLD
|
||||
*
|
||||
*
|
||||
* Main path computations are organized as follows:
|
||||
* Actually we split the interval [0, SATURATION_THRESHOLD)
|
||||
* into a number of subintervals. On each subinterval we approximate tanh(.)
|
||||
* with a minimax polynomial of pre-defined degree. Polynomial coefficients
|
||||
* are computed beforehand and stored in table. We also use
|
||||
*
|
||||
* y := |x| + B,
|
||||
*
|
||||
* here B depends on subinterval and is used to make argument
|
||||
* closer to zero.
|
||||
* We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD],
|
||||
* where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to
|
||||
* preserve main path computation logic but return 1.0 for all arguments.
|
||||
*
|
||||
* Hence reconstruction looks as follows:
|
||||
* we extract proper polynomial and range reduction coefficients
|
||||
* (Pj and B), corresponding to subinterval, to which |x| belongs,
|
||||
* and return
|
||||
*
|
||||
* r := sign(x) * (P0 + P1 * y + ... + Pn * y^n)
|
||||
*
|
||||
* NOTE: we use multiprecision technique to multiply and sum the first
|
||||
* K terms of the polynomial. So Pj, j = 0..K are stored in
|
||||
* table each as a pair of target precision numbers (Pj and PLj) to
|
||||
* achieve wider than target precision.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/* Offsets for data table __svml_stanh_data_internal
|
||||
*/
|
||||
#define _dbP 0
|
||||
#define _sSignMask 4288
|
||||
#define _sAbsMask 4304
|
||||
#define _iExpMantMask 4320
|
||||
#define _iExpMask 4336
|
||||
#define _iMinIdxOfsMask 4352
|
||||
#define _iMaxIdxMask 4368
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.section .text.sse4,"ax",@progbits
|
||||
ENTRY(_ZGVbN4v_tanhf_sse4)
|
||||
subq $72, %rsp
|
||||
cfi_def_cfa_offset(80)
|
||||
movaps %xmm0, %xmm5
|
||||
|
||||
/* Here huge arguments, INF and NaNs are filtered out to callout. */
|
||||
movdqu _iExpMantMask+__svml_stanh_data_internal(%rip), %xmm9
|
||||
lea _dbP+16+__svml_stanh_data_internal(%rip), %r8
|
||||
pand %xmm5, %xmm9
|
||||
|
||||
/* if VMIN, VMAX is defined for I type */
|
||||
pxor %xmm7, %xmm7
|
||||
movdqa %xmm9, %xmm6
|
||||
psubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %xmm9
|
||||
|
||||
/*
|
||||
* small table specific variables *
|
||||
* Constant loading
|
||||
*/
|
||||
movdqu _iMaxIdxMask+__svml_stanh_data_internal(%rip), %xmm10
|
||||
movdqa %xmm9, %xmm11
|
||||
movdqa %xmm9, %xmm8
|
||||
pcmpgtd %xmm10, %xmm11
|
||||
pcmpgtd %xmm7, %xmm8
|
||||
movdqa %xmm11, %xmm14
|
||||
pand %xmm8, %xmm9
|
||||
andps %xmm11, %xmm10
|
||||
andnps %xmm9, %xmm14
|
||||
orps %xmm10, %xmm14
|
||||
psrld $14, %xmm14
|
||||
movd %xmm14, %edx
|
||||
pshufd $1, %xmm14, %xmm12
|
||||
pshufd $2, %xmm14, %xmm13
|
||||
movd %xmm12, %ecx
|
||||
pshufd $3, %xmm14, %xmm15
|
||||
movups _sAbsMask+__svml_stanh_data_internal(%rip), %xmm3
|
||||
movslq %edx, %rdx
|
||||
andps %xmm5, %xmm3
|
||||
movslq %ecx, %rcx
|
||||
pcmpgtd _iExpMask+__svml_stanh_data_internal(%rip), %xmm6
|
||||
movd %xmm13, %esi
|
||||
movups -16(%rdx,%r8), %xmm2
|
||||
movaps %xmm2, %xmm0
|
||||
movd %xmm15, %edi
|
||||
movmskps %xmm6, %eax
|
||||
movups -16(%rcx,%r8), %xmm6
|
||||
unpcklpd %xmm6, %xmm0
|
||||
unpckhpd %xmm6, %xmm2
|
||||
cvtps2pd %xmm3, %xmm6
|
||||
movhlps %xmm3, %xmm3
|
||||
cvtps2pd %xmm3, %xmm3
|
||||
movslq %esi, %rsi
|
||||
movslq %edi, %rdi
|
||||
movups (%rcx,%r8), %xmm8
|
||||
movups (%rdx,%r8), %xmm12
|
||||
movups (%rsi,%r8), %xmm13
|
||||
movaps %xmm12, %xmm10
|
||||
movups (%rdi,%r8), %xmm9
|
||||
movaps %xmm13, %xmm11
|
||||
unpckhpd %xmm8, %xmm12
|
||||
unpckhpd %xmm9, %xmm13
|
||||
mulpd %xmm6, %xmm12
|
||||
mulpd %xmm3, %xmm13
|
||||
unpcklpd %xmm8, %xmm10
|
||||
unpcklpd %xmm9, %xmm11
|
||||
addpd %xmm10, %xmm12
|
||||
addpd %xmm11, %xmm13
|
||||
mulpd %xmm6, %xmm12
|
||||
mulpd %xmm3, %xmm13
|
||||
addpd %xmm2, %xmm12
|
||||
movups -16(%rsi,%r8), %xmm1
|
||||
movups -16(%rdi,%r8), %xmm7
|
||||
movaps %xmm1, %xmm14
|
||||
unpckhpd %xmm7, %xmm1
|
||||
addpd %xmm1, %xmm13
|
||||
mulpd %xmm12, %xmm6
|
||||
mulpd %xmm13, %xmm3
|
||||
addpd %xmm0, %xmm6
|
||||
unpcklpd %xmm7, %xmm14
|
||||
addpd %xmm14, %xmm3
|
||||
cvtpd2ps %xmm6, %xmm0
|
||||
cvtpd2ps %xmm3, %xmm1
|
||||
movups _sSignMask+__svml_stanh_data_internal(%rip), %xmm4
|
||||
movlhps %xmm1, %xmm0
|
||||
andps %xmm5, %xmm4
|
||||
orps %xmm4, %xmm0
|
||||
testl %eax, %eax
|
||||
|
||||
/* Go to special inputs processing branch */
|
||||
jne L(SPECIAL_VALUES_BRANCH)
|
||||
# LOE rbx rbp r12 r13 r14 r15 eax xmm0 xmm5
|
||||
|
||||
/* Restore registers
|
||||
* and exit the function
|
||||
*/
|
||||
|
||||
L(EXIT):
|
||||
addq $72, %rsp
|
||||
cfi_def_cfa_offset(8)
|
||||
ret
|
||||
cfi_def_cfa_offset(80)
|
||||
|
||||
/* Branch to process
|
||||
* special inputs
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_BRANCH):
|
||||
movups %xmm5, 32(%rsp)
|
||||
movups %xmm0, 48(%rsp)
|
||||
# LOE rbx rbp r12 r13 r14 r15 eax
|
||||
|
||||
xorl %edx, %edx
|
||||
movq %r12, 16(%rsp)
|
||||
cfi_offset(12, -64)
|
||||
movl %edx, %r12d
|
||||
movq %r13, 8(%rsp)
|
||||
cfi_offset(13, -72)
|
||||
movl %eax, %r13d
|
||||
movq %r14, (%rsp)
|
||||
cfi_offset(14, -80)
|
||||
# LOE rbx rbp r15 r12d r13d
|
||||
|
||||
/* Range mask
|
||||
* bits check
|
||||
*/
|
||||
|
||||
L(RANGEMASK_CHECK):
|
||||
btl %r12d, %r13d
|
||||
|
||||
/* Call scalar math function */
|
||||
jc L(SCALAR_MATH_CALL)
|
||||
# LOE rbx rbp r15 r12d r13d
|
||||
|
||||
/* Special inputs
|
||||
* processing loop
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_LOOP):
|
||||
incl %r12d
|
||||
cmpl $4, %r12d
|
||||
|
||||
/* Check bits in range mask */
|
||||
jl L(RANGEMASK_CHECK)
|
||||
# LOE rbx rbp r15 r12d r13d
|
||||
|
||||
movq 16(%rsp), %r12
|
||||
cfi_restore(12)
|
||||
movq 8(%rsp), %r13
|
||||
cfi_restore(13)
|
||||
movq (%rsp), %r14
|
||||
cfi_restore(14)
|
||||
movups 48(%rsp), %xmm0
|
||||
|
||||
/* Go to exit */
|
||||
jmp L(EXIT)
|
||||
cfi_offset(12, -64)
|
||||
cfi_offset(13, -72)
|
||||
cfi_offset(14, -80)
|
||||
# LOE rbx rbp r12 r13 r14 r15 xmm0
|
||||
|
||||
/* Scalar math fucntion call
|
||||
* to process special input
|
||||
*/
|
||||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp,%r14,4), %xmm0
|
||||
call tanhf@PLT
|
||||
# LOE rbx rbp r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 48(%rsp,%r14,4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
# LOE rbx rbp r15 r12d r13d
|
||||
END(_ZGVbN4v_tanhf_sse4)
|
||||
|
||||
.section .rodata, "a"
|
||||
.align 16
|
||||
|
||||
#ifdef __svml_stanh_data_internal_typedef
|
||||
typedef unsigned int VUINT32;
|
||||
typedef struct
|
||||
{
|
||||
__declspec(align(16)) VUINT32 _dbP[(134*4)][2];
|
||||
__declspec(align(16)) VUINT32 _sSignMask[4][1];
|
||||
__declspec(align(16)) VUINT32 _sAbsMask[4][1];
|
||||
__declspec(align(16)) VUINT32 _iExpMantMask[4][1];
|
||||
__declspec(align(16)) VUINT32 _iExpMask[4][1];
|
||||
__declspec(align(16)) VUINT32 _iMinIdxOfsMask[4][1];
|
||||
__declspec(align(16)) VUINT32 _iMaxIdxMask[4][1];
|
||||
} __svml_stanh_data_internal;
|
||||
#endif
|
||||
__svml_stanh_data_internal:
|
||||
/* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */
|
||||
.quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01 */
|
||||
.quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00 */
|
||||
.quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06 */
|
||||
.quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01 */
|
||||
.quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08 */
|
||||
.quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00 */
|
||||
.quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05 */
|
||||
.quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01 */
|
||||
.quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08 */
|
||||
.quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00 */
|
||||
.quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04 */
|
||||
.quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01 */
|
||||
.quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08 */
|
||||
.quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00 */
|
||||
.quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04 */
|
||||
.quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01 */
|
||||
.quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08 */
|
||||
.quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00 */
|
||||
.quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04 */
|
||||
.quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01 */
|
||||
.quad 0xBE6D5794E361E964 /* A00 = -5.465394929765249413434e-08 */
|
||||
.quad 0x3FF000055EE2A0CB /* A01 = +1.000005121846742950353e+00 */
|
||||
.quad 0xBF265E6C77E66C8B /* A02 = -1.706607253709506650304e-04 */
|
||||
.quad 0xBFD53264DDCCEDA6 /* A03 = -3.312008062382240103361e-01 */
|
||||
.quad 0xBE729C844D374A6E /* A00 = -6.933284462462096107184e-08 */
|
||||
.quad 0x3FF000067F019093 /* A01 = +1.000006195180536350264e+00 */
|
||||
.quad 0xBF29CC5348D6DCE5 /* A02 = -1.968242326435338705130e-04 */
|
||||
.quad 0xBFD52EE92121ED35 /* A03 = -3.309881995734998416658e-01 */
|
||||
.quad 0xBE775AEA17EAA872 /* A00 = -8.700465590574974405858e-08 */
|
||||
.quad 0x3FF00007CA1D66B8 /* A01 = +1.000007428656699559610e+00 */
|
||||
.quad 0xBF2D8F5EB98A2637 /* A02 = -2.255252009216044881395e-04 */
|
||||
.quad 0xBFD52B435CDF9128 /* A03 = -3.307655722585587376727e-01 */
|
||||
.quad 0xBE7D04DA28C343F0 /* A00 = -1.081040272327705484794e-07 */
|
||||
.quad 0x3FF000094443CCF5 /* A01 = +1.000008837375216730337e+00 */
|
||||
.quad 0xBF30D5B76C947AE5 /* A02 = -2.568791210978817814332e-04 */
|
||||
.quad 0xBFD52773A0776FAD /* A03 = -3.305329386764651045105e-01 */
|
||||
.quad 0xBE81DD77A12C51C7 /* A00 = -1.331054169875768625701e-07 */
|
||||
.quad 0x3FF0000AF1AFD2DA /* A01 = +1.000010437096696680470e+00 */
|
||||
.quad 0xBF331230624C1680 /* A02 = -2.910011410651516805537e-04 */
|
||||
.quad 0xBFD52379FC0B61DF /* A03 = -3.302903138515186909352e-01 */
|
||||
.quad 0xBE85D04EEEB3C435 /* A00 = -1.625247628488202841012e-07 */
|
||||
.quad 0x3FF0000CD6C9B1F2 /* A01 = +1.000012244238970726684e+00 */
|
||||
.quad 0xBF357F0742FADDD4 /* A02 = -3.280060509313874068243e-04 */
|
||||
.quad 0xBFD51F56806D0E81 /* A03 = -3.300377134475880880338e-01 */
|
||||
.quad 0xBE8A6E289B59681B /* A00 = -1.969211333326924655065e-07 */
|
||||
.quad 0x3FF0000EF8268F72 /* A01 = +1.000014275873550406715e+00 */
|
||||
.quad 0xBF381E277A1B747A /* A02 = -3.680082682942575423093e-04 */
|
||||
.quad 0xBFD51B093F1D6FD4 /* A03 = -3.297751537663746734808e-01 */
|
||||
.quad 0xBE8FCBC40EE9ABD5 /* A00 = -2.368983653301529373887e-07 */
|
||||
.quad 0x3FF000115A883B6C /* A01 = +1.000016549721943981410e+00 */
|
||||
.quad 0xBF3AF17AC974B3D9 /* A02 = -4.111218235774406434303e-04 */
|
||||
.quad 0xBFD516924A4C549C /* A03 = -3.295026517456081105450e-01 */
|
||||
.quad 0xBE92FFBC60A3F956 /* A00 = -2.831066871072026054144e-07 */
|
||||
.quad 0x3FF0001402DCED8A /* A01 = +1.000019084151832604590e+00 */
|
||||
.quad 0xBF3DFAE9390C4801 /* A02 = -4.574603454311488280083e-04 */
|
||||
.quad 0xBFD511F1B4D7DC3A /* A03 = -3.292202249571719585575e-01 */
|
||||
.quad 0xBE9690A22F96D5AD /* A00 = -3.362443262393081632612e-07 */
|
||||
.quad 0x3FF00016F63EFF5D /* A01 = +1.000021898173108825247e+00 */
|
||||
.quad 0xBF409E2C839605BB /* A02 = -5.071370461992499986334e-04 */
|
||||
.quad 0xBFD50D27924BEE00 /* A03 = -3.289278916051614487515e-01 */
|
||||
.quad 0xBE9AA56C65E72A73 /* A00 = -3.970591019557469835586e-07 */
|
||||
.quad 0x3FF0001A39F4A43E /* A01 = +1.000025011433776978009e+00 */
|
||||
.quad 0xBF425BD74C3D6667 /* A02 = -5.602647074553602319844e-04 */
|
||||
.quad 0xBFD50833F6E1ABA2 /* A03 = -3.286256705238718156536e-01 */
|
||||
.quad 0xBE9F4BD4FF1A83B0 /* A00 = -4.663500013744687071912e-07 */
|
||||
.quad 0x3FF0001DD36F9EC2 /* A01 = +1.000028444215715683896e+00 */
|
||||
.quad 0xBF44376634149405 /* A02 = -6.169556656102642569831e-04 */
|
||||
.quad 0xBFD50316F77EDEE5 /* A03 = -3.283135811757190158922e-01 */
|
||||
.quad 0xBEA3B625387BB079 /* A00 = -5.874486399249461304297e-07 */
|
||||
.quad 0x3FF00023E14CFBA9 /* A01 = +1.000034217911642153709e+00 */
|
||||
.quad 0xBF47392F923218D2 /* A02 = -7.087213783883111826306e-04 */
|
||||
.quad 0xBFD4FB1FACDEB938 /* A03 = -3.278273761924483942209e-01 */
|
||||
.quad 0xBEAA6E24F543500A /* A00 = -7.876828740601738750574e-07 */
|
||||
.quad 0x3FF0002D5C6E8412 /* A01 = +1.000043259679163742959e+00 */
|
||||
.quad 0xBF4BAF02BD7FDD70 /* A02 = -8.448375110664940040861e-04 */
|
||||
.quad 0xBFD4EFEE6527A7DE /* A03 = -3.271442401734229177279e-01 */
|
||||
.quad 0xBEB16E3EBE2157D0 /* A00 = -1.038947396133402500647e-06 */
|
||||
.quad 0x3FF00038990FEE2F /* A01 = +1.000053975962952312884e+00 */
|
||||
.quad 0xBF50569481C574CB /* A02 = -9.972048056490652716971e-04 */
|
||||
.quad 0xBFD4E419278DA2B4 /* A03 = -3.264220129263251113372e-01 */
|
||||
.quad 0xBEB6A7B6723165D4 /* A00 = -1.350350836279403750524e-06 */
|
||||
.quad 0x3FF00045CAB4158E /* A01 = +1.000066558657042303793e+00 */
|
||||
.quad 0xBF531D7C9C849108 /* A02 = -1.166698160951775212202e-03 */
|
||||
.quad 0xBFD4D7A0BB33B152 /* A03 = -3.256608799117844954552e-01 */
|
||||
.quad 0xBEBD0EE2A8654AFD /* A00 = -1.732000471561702711532e-06 */
|
||||
.quad 0x3FF00055276F18D6 /* A01 = +1.000081209219890521211e+00 */
|
||||
.quad 0xBF562FDBA3FB6C6C /* A02 = -1.354183666925102939860e-03 */
|
||||
.quad 0xBFD4CA85F1B93DB2 /* A03 = -3.248610363561638125773e-01 */
|
||||
.quad 0xBEC269D4036A207E /* A00 = -2.195047297096822741730e-06 */
|
||||
.quad 0x3FF00066E7DA6E4E /* A01 = +1.000098138500919997540e+00 */
|
||||
.quad 0xBF5991499FC36B3A /* A02 = -1.560518167983372759405e-03 */
|
||||
.quad 0xBFD4BCC9A72283D6 /* A03 = -3.240226871658341556426e-01 */
|
||||
.quad 0xBEC7154B6C09CFE1 /* A00 = -2.751729738565190291276e-06 */
|
||||
.quad 0x3FF0007B47086B80 /* A01 = +1.000117566559055148900e+00 */
|
||||
.quad 0xBF5D455433B4F8F4 /* A02 = -1.786548832412968197680e-03 */
|
||||
.quad 0xBFD4AE6CC1BFE145 /* A03 = -3.231460468373550942722e-01 */
|
||||
.quad 0xBECCA68CC64A0F8A /* A00 = -3.415415948561670285790e-06 */
|
||||
.quad 0x3FF00092827742F7 /* A01 = +1.000139722473418535387e+00 */
|
||||
.quad 0xBF60A7BF15A527AF /* A02 = -2.033112728132522705610e-03 */
|
||||
.quad 0xBFD49F703214084C /* A03 = -3.222313393636155876010e-01 */
|
||||
.quad 0xBED19E68676B241B /* A00 = -4.200644630977303616698e-06 */
|
||||
.quad 0x3FF000ACDA037B26 /* A01 = +1.000164844146362863597e+00 */
|
||||
.quad 0xBF62D99F836A02F8 /* A02 = -2.301036405072284102280e-03 */
|
||||
.quad 0xBFD48FD4F2B91B28 /* A03 = -3.212787981359945810311e-01 */
|
||||
.quad 0xBED57CF4B0C7AA54 /* A00 = -5.123164339408145209103e-06 */
|
||||
.quad 0x3FF000CA8FD9E1A1 /* A01 = +1.000193178099017865534e+00 */
|
||||
.quad 0xBF653A014548E686 /* A02 = -2.591135484433962181405e-03 */
|
||||
.quad 0xBFD47F9C0844B38F /* A03 = -3.202886658426046806447e-01 */
|
||||
.quad 0xBEDA012B1B1A41E2 /* A00 = -6.199971197454598722328e-06 */
|
||||
.quad 0x3FF000EBE868FDF4 /* A01 = +1.000224979259539459520e+00 */
|
||||
.quad 0xBF67CA9427E0A544 /* A02 = -2.904214255086275467410e-03 */
|
||||
.quad 0xBFD46EC6812ADB37 /* A03 = -3.192611943626845749655e-01 */
|
||||
.quad 0xBEDF3EAC5BF12194 /* A00 = -7.449344990702664567927e-06 */
|
||||
.quad 0x3FF001112A520784 /* A01 = +1.000260510744255704196e+00 */
|
||||
.quad 0xBF6A8D01ABDA4DC4 /* A02 = -3.241065277345108255891e-03 */
|
||||
.quad 0xBFD45D55759FFA4A /* A03 = -3.181966446572103146551e-01 */
|
||||
.quad 0xBEE2A541BC274267 /* A00 = -8.890883582164319970972e-06 */
|
||||
.quad 0x3FF0013A9E5961F2 /* A01 = +1.000300043631906721231e+00 */
|
||||
.quad 0xBF6D82ECD080C540 /* A02 = -3.602468994380686462264e-03 */
|
||||
.quad 0xBFD44B4A0779C0AD /* A03 = -3.170952866557950611259e-01 */
|
||||
.quad 0xBEE61D97609A27F4 /* A00 = -1.054553560499505625520e-05 */
|
||||
.quad 0x3FF001688F56A3AF /* A01 = +1.000343856731187974773e+00 */
|
||||
.quad 0xBF7056F8EFB683EC /* A02 = -3.989193351487490407647e-03 */
|
||||
.quad 0xBFD438A5620F0F74 /* A03 = -3.159573991399533543500e-01 */
|
||||
.quad 0xBEEA145429EDD370 /* A00 = -1.243563138839952927732e-05 */
|
||||
.quad 0x3FF0019B4A242A67 /* A01 = +1.000392236341804297339e+00 */
|
||||
.quad 0xBF7207D31CA78D9B /* A02 = -4.401993423445739288258e-03 */
|
||||
.quad 0xBFD42568BA16E7CD /* A03 = -3.147832696228050619602e-01 */
|
||||
.quad 0xBEEE96370D52680F /* A00 = -1.458491207477835326165e-05 */
|
||||
.quad 0x3FF001D31D8E4115 /* A01 = +1.000445476009251821736e+00 */
|
||||
.quad 0xBF73D4CC11EDC094 /* A02 = -4.841611050196221316400e-03 */
|
||||
.quad 0xBFD411954D8664E7 /* A03 = -3.135731942252974469021e-01 */
|
||||
.quad 0xBEF338C046215EF8 /* A00 = -1.833122622260562810219e-05 */
|
||||
.quad 0x3FF00230C32C2EC1 /* A01 = +1.000534784691737621998e+00 */
|
||||
.quad 0xBF76BD019BCC5DAF /* A02 = -5.551344188254799492943e-03 */
|
||||
.quad 0xBFD3F2C7156DC21E /* A03 = -3.116929730668135389848e-01 */
|
||||
.quad 0xBEF9B15EAE411EAE /* A00 = -2.450261207822986676092e-05 */
|
||||
.quad 0x3FF002C2DF057A4D /* A01 = +1.000674124886830940184e+00 */
|
||||
.quad 0xBF7B08CCD9AC1E30 /* A02 = -6.600189396301511801646e-03 */
|
||||
.quad 0xBFD3C7A7A114FED8 /* A03 = -3.090609620157755976777e-01 */
|
||||
.quad 0xBF00E36483C373B3 /* A00 = -3.221178528332122595812e-05 */
|
||||
.quad 0x3FF0036F419480D7 /* A01 = +1.000838524028997644777e+00 */
|
||||
.quad 0xBF7FD255D1777007 /* A02 = -7.768950679260206403087e-03 */
|
||||
.quad 0xBFD39A453911D6CE /* A03 = -3.062909180947429588215e-01 */
|
||||
.quad 0xBF05DFA04DD12059 /* A00 = -4.172046622180685472624e-05 */
|
||||
.quad 0x3FF00438B2A03D8D /* A01 = +1.001030633695197069599e+00 */
|
||||
.quad 0xBF828F8DBB4A9D10 /* A02 = -9.062869337255224921890e-03 */
|
||||
.quad 0xBFD36AAB704697D9 /* A03 = -3.033856007044711255993e-01 */
|
||||
.quad 0xBF0BF3E0C647DEFB /* A00 = -5.331544597092331081714e-05 */
|
||||
.quad 0x3FF005221063D36D /* A01 = +1.001253189109060359741e+00 */
|
||||
.quad 0xBF857A2CB3C96102 /* A02 = -1.048693584122917590862e-02 */
|
||||
.quad 0xBFD338E65BBB4FEC /* A03 = -3.003478904549854444639e-01 */
|
||||
.quad 0xBF11A506ED7C9D31 /* A00 = -6.730894835681591541979e-05 */
|
||||
.quad 0x3FF0062E4D0EA92A /* A01 = +1.001508999829250345925e+00 */
|
||||
.quad 0xBF88AB82C2761AF3 /* A02 = -1.204588085125866091241e-02 */
|
||||
.quad 0xBFD305028D6BD206 /* A03 = -2.971807843271395688234e-01 */
|
||||
.quad 0xBF1607C0922D9BF1 /* A00 = -8.403885708006799337092e-05 */
|
||||
.quad 0x3FF007606C341961 /* A01 = +1.001800940198869449560e+00 */
|
||||
.quad 0xBF8C25E6DA487BCF /* A02 = -1.374416688582682892494e-02 */
|
||||
.quad 0xBFD2CF0D0EE8F7B5 /* A03 = -2.938873906713255768075e-01 */
|
||||
.quad 0xBF1B3A8480A0A16D /* A00 = -1.038688061788578038307e-04 */
|
||||
.quad 0x3FF008BB802D02D6 /* A01 = +1.002131939589323561535e+00 */
|
||||
.quad 0xBF8FEB8AE99FD100 /* A02 = -1.558598065819483124983e-02 */
|
||||
.quad 0xBFD297135BD0911B /* A03 = -2.904709240558688843059e-01 */
|
||||
.quad 0xBF20ABB9BDB75C65 /* A00 = -1.271881327357976163798e-04 */
|
||||
.quad 0x3FF00A42A76D8CD1 /* A01 = +1.002504972472525901495e+00 */
|
||||
.quad 0xBF91FF3D752BB9E6 /* A02 = -1.757522609380570560722e-02 */
|
||||
.quad 0xBFD25D235C1F88B4 /* A03 = -2.869346999779154305799e-01 */
|
||||
.quad 0xBF243D3254425461 /* A00 = -1.544116913733432829448e-04 */
|
||||
.quad 0x3FF00BF909D1795E /* A01 = +1.002923048355647051011e+00 */
|
||||
.quad 0xBF94304E04D44942 /* A02 = -1.971551804042204897316e-02 */
|
||||
.quad 0xBFD2214B5E61CFA6 /* A03 = -2.832821294498394371075e-01 */
|
||||
.quad 0xBF286070011B61CE /* A00 = -1.859795307186510085994e-04 */
|
||||
.quad 0x3FF00DE1D5E1627E /* A01 = +1.003389201612804537689e+00 */
|
||||
.quad 0xBF9689D5F4163F59 /* A02 = -2.201017668045266231780e-02 */
|
||||
.quad 0xBFD1E39A11C3B42C /* A03 = -2.795167134743816728104e-01 */
|
||||
.quad 0xBF2D250B366A79E8 /* A00 = -2.223564326486314902259e-04 */
|
||||
.quad 0x3FF010003E134001 /* A01 = +1.003906481248123094829e+00 */
|
||||
.quad 0xBF990C9FF91F6F81 /* A02 = -2.446222265267250853271e-02 */
|
||||
.quad 0xBFD1A41E80084CDC /* A03 = -2.756420374218586655246e-01 */
|
||||
.quad 0xBF314DB5DDC2A30E /* A00 = -2.640313157465248123865e-04 */
|
||||
.quad 0x3FF012577608921B /* A01 = +1.004477940624503018441e+00 */
|
||||
.quad 0xBF9BB9626875B0C9 /* A02 = -2.707437288829409385849e-02 */
|
||||
.quad 0xBFD162E80768A9D0 /* A03 = -2.716617653228725615122e-01 */
|
||||
.quad 0xBF346A6133808864 /* A00 = -3.115165050094957730625e-04 */
|
||||
.quad 0x3FF014EAAFCC88A3 /* A01 = +1.005106627192198898157e+00 */
|
||||
.quad 0xBF9E90BEF9BF7419 /* A02 = -2.984903716411588595059e-02 */
|
||||
.quad 0xBFD12006545F7FAD /* A03 = -2.675796340899932457269e-01 */
|
||||
.quad 0xBF37F180DC3848EA /* A00 = -3.653468704395550778821e-04 */
|
||||
.quad 0x3FF017BD19147861 /* A01 = +1.005795572250939295955e+00 */
|
||||
.quad 0xBFA0C9A14C702E07 /* A02 = -3.278831537326359207851e-02 */
|
||||
.quad 0xBFD0DB895B650092 /* A03 = -2.633994476818851682154e-01 */
|
||||
.quad 0xBF3BEC6AAC6D7635 /* A00 = -4.260788377246944457107e-04 */
|
||||
.quad 0x3FF01AD1D884E719 /* A01 = +1.006547780778822565040e+00 */
|
||||
.quad 0xBFA260B2A1B1434A /* A02 = -3.589399551186163439542e-02 */
|
||||
.quad 0xBFD09581529E93D6 /* A03 = -2.591250712233067465817e-01 */
|
||||
.quad 0xBF4164E26167882B /* A00 = -5.308251737086202562063e-04 */
|
||||
.quad 0x3FF01FEF14B62B81 /* A01 = +1.007796364693348545316e+00 */
|
||||
.quad 0xBFA4EB014538AA42 /* A02 = -4.085544557559163403315e-02 */
|
||||
.quad 0xBFD029D36FEAF41F /* A03 = -2.525528519580024222613e-01 */
|
||||
.quad 0xBF46F6FFF4E53DC8 /* A00 = -7.008313930700277652464e-04 */
|
||||
.quad 0x3FF027CBB51CBBA0 /* A01 = +1.009715754956893363214e+00 */
|
||||
.quad 0xBFA89DEC9FEC112E /* A02 = -4.807986690687680864098e-02 */
|
||||
.quad 0xBFCF2A99464D0DB4 /* A03 = -2.434875100390009317053e-01 */
|
||||
.quad 0xBF4DCC9C4F66A4D9 /* A00 = -9.094012482836712945103e-04 */
|
||||
.quad 0x3FF030E7CFCCD583 /* A01 = +1.011939822882909068014e+00 */
|
||||
.quad 0xBFACAA3B95814081 /* A02 = -5.598627281199331645611e-02 */
|
||||
.quad 0xBFCDF78F156BE7CF /* A03 = -2.341173987004467604844e-01 */
|
||||
.quad 0xBF5308ED74E5C7A6 /* A00 = -1.161796466103906435435e-03 */
|
||||
.quad 0x3FF03B5986412ECB /* A01 = +1.014489674026594512313e+00 */
|
||||
.quad 0xBFB087EBA88DCC3F /* A02 = -6.457398285947223148806e-02 */
|
||||
.quad 0xBFCCBB9BD134862F /* A03 = -2.244753619680052991736e-01 */
|
||||
.quad 0xBF57FA23C00DF4B5 /* A00 = -1.463446533505758208674e-03 */
|
||||
.quad 0x3FF0473558A1BCC0 /* A01 = +1.017384859292903342975e+00 */
|
||||
.quad 0xBFB2E702BC6360EF /* A02 = -7.383744334527241048871e-02 */
|
||||
.quad 0xBFCB77D546379288 /* A03 = -2.145945160729250122955e-01 */
|
||||
.quad 0xBF5DD12971557F71 /* A00 = -1.819887610814388068450e-03 */
|
||||
.quad 0x3FF0548DDF5000A8 /* A01 = +1.020643112482540360020e+00 */
|
||||
.quad 0xBFB571B63DA186E1 /* A02 = -8.376635555898871710045e-02 */
|
||||
.quad 0xBFCA2D5202605148 /* A03 = -2.045080672838912594358e-01 */
|
||||
.quad 0xBF6252B1AD5D4F17 /* A00 = -2.236697221556737096709e-03 */
|
||||
.quad 0x3FF063738A910BF7 /* A01 = +1.024280110622155737232e+00 */
|
||||
.quad 0xBFB8270C8E6B601B /* A02 = -9.434584118878357184013e-02 */
|
||||
.quad 0xBFC8DD27D950A07E /* A03 = -1.942491351230763441116e-01 */
|
||||
.quad 0xBF66470C91730CFC /* A00 = -2.719425723258004842786e-03 */
|
||||
.quad 0x3FF073F468FCF331 /* A01 = +1.028309259519300633556e+00 */
|
||||
.quad 0xBFBB05C2952191E4 /* A02 = -1.055566419686964629854e-01 */
|
||||
.quad 0xBFC7886A770DE2BD /* A03 = -1.838505822486435070662e-01 */
|
||||
.quad 0xBF6AD114AC8E98EC /* A00 = -3.273525599485007861467e-03 */
|
||||
.quad 0x3FF0861BF53E5226 /* A01 = +1.032741506559554434119e+00 */
|
||||
.quad 0xBFBE0C4F9B461507 /* A02 = -1.173753503881763554650e-01 */
|
||||
.quad 0xBFC6302A037CDE3A /* A03 = -1.733448521642786954722e-01 */
|
||||
.quad 0xBF6FFBDE2A6C2AF8 /* A00 = -3.904279630096648551207e-03 */
|
||||
.quad 0x3FF099F2EB8E7DA3 /* A01 = +1.037585182326304034106e+00 */
|
||||
.quad 0xBFC09C74D192DDF0 /* A02 = -1.297746680554463516444e-01 */
|
||||
.quad 0xBFC4D571D8E3079F /* A03 = -1.627638157861470424859e-01 */
|
||||
.quad 0xBF72E8FDC0B952AA /* A00 = -4.616728994353872309042e-03 */
|
||||
.quad 0x3FF0AF7F273C9533 /* A01 = +1.042845872181101141152e+00 */
|
||||
.quad 0xBFC244C512736F10 /* A02 = -1.427236881344176033792e-01 */
|
||||
.quad 0xBFC379474F58B902 /* A03 = -1.521386277613104298645e-01 */
|
||||
.quad 0xBF762EABAF17395B /* A00 = -5.415602341101023557701e-03 */
|
||||
.quad 0x3FF0C6C3886F63FB /* A01 = +1.048526318502125631582e+00 */
|
||||
.quad 0xBFC3FDF9918EA12A /* A02 = -1.561881981590514389957e-01 */
|
||||
.quad 0xBFC21CA89ECAB895 /* A03 = -1.414995932913753196036e-01 */
|
||||
.quad 0xBF79D387CE5B2BAE /* A00 = -6.305246822828998107258e-03 */
|
||||
.quad 0x3FF0DFBFE2346376 /* A01 = +1.054626353847394337748e+00 */
|
||||
.quad 0xBFC5C6DA43602620 /* A02 = -1.701309994680721970894e-01 */
|
||||
.quad 0xBFC0C08BD8DB6631 /* A03 = -1.308760460731704100557e-01 */
|
||||
.quad 0xBF7DDBA8E8DA9060 /* A00 = -7.289562037531366334164e-03 */
|
||||
.quad 0x3FF0FA70F0D1B464 /* A01 = +1.061142864894713433443e+00 */
|
||||
.quad 0xBFC79E18D92BAA7C /* A02 = -1.845122394946264732241e-01 */
|
||||
.quad 0xBFBECBBBF74C2669 /* A03 = -1.202962378266875381749e-01 */
|
||||
.quad 0xBF81254E76EA25DA /* A00 = -8.371937755572145950511e-03 */
|
||||
.quad 0x3FF116D05835EBD0 /* A01 = +1.068069786618014660462e+00 */
|
||||
.quad 0xBFC982539E2ED224 /* A02 = -1.992897531869327609755e-01 */
|
||||
.quad 0xBFBC1B043C350159 /* A03 = -1.097872397413132278254e-01 */
|
||||
.quad 0xBF8391ACBA863403 /* A00 = -9.555196230190082448686e-03 */
|
||||
.quad 0x3FF134D4AA477FE2 /* A01 = +1.075398125794884141015e+00 */
|
||||
.quad 0xBFCB7218609FEAFB /* A02 = -2.144194099235717521079e-01 */
|
||||
.quad 0xBFB970A16CB88329 /* A03 = -9.937485603633135211599e-02 */
|
||||
.quad 0xBF87935088E48E8B /* A00 = -1.151144902957603431692e-02 */
|
||||
.quad 0x3FF1649892AD7DD3 /* A01 = +1.087059567413110938716e+00 */
|
||||
.quad 0xBFCE6971DDE75409 /* A02 = -2.375929196847723912089e-01 */
|
||||
.quad 0xBFB58291E88CB251 /* A03 = -8.402358939628952472223e-02 */
|
||||
.quad 0xBF8DB3A62C325325 /* A00 = -1.450280973794233242702e-02 */
|
||||
.quad 0x3FF1A9C900C6DEEA /* A01 = +1.103951457056548068891e+00 */
|
||||
.quad 0xBFD13DBC65B0E08E /* A02 = -2.693930619311765140012e-01 */
|
||||
.quad 0xBFB06696F62696D1 /* A03 = -6.406539449252625362252e-02 */
|
||||
.quad 0xBF92583699F2E27A /* A00 = -1.791463198307716858659e-02 */
|
||||
.quad 0x3FF1F451B85AA9F0 /* A01 = +1.122148246892376022288e+00 */
|
||||
.quad 0xBFD34FD5F8288180 /* A02 = -3.017477916164565954205e-01 */
|
||||
.quad 0xBFA6FB692825B683 /* A03 = -4.488686194495718900788e-02 */
|
||||
.quad 0xBF9641C26E673D6F /* A00 = -2.173522757385398448959e-02 */
|
||||
.quad 0x3FF24364DA5E2B07 /* A01 = +1.141453602790251542487e+00 */
|
||||
.quad 0xBFD564A5A5EF5890 /* A02 = -3.342680092295120530821e-01 */
|
||||
.quad 0xBF9B43712011A982 /* A03 = -2.662445791467283467968e-02 */
|
||||
.quad 0xBF9A901038EC2F39 /* A00 = -2.594018313816024226548e-02 */
|
||||
.quad 0x3FF2961356DFFEBA /* A01 = +1.161639537196534011088e+00 */
|
||||
.quad 0xBFD775EBB17198C7 /* A02 = -3.665723069046972759644e-01 */
|
||||
.quad 0xBF833B1A926CD462 /* A03 = -9.390075295963199591975e-03 */
|
||||
.quad 0xBF9F396A6A461B91 /* A00 = -3.049246095317987084727e-02 */
|
||||
.quad 0x3FF2EB53BAEF534B /* A01 = +1.182452898229899629357e+00 */
|
||||
.quad 0xBFD97DABF8AD8BBD /* A02 = -3.982953957076310058660e-01 */
|
||||
.quad 0x3F7B8F6A3E0F8837 /* A03 = +6.728568086119371925713e-03 */
|
||||
.quad 0xBFA21878590F8BAA /* A00 = -3.534294211546946951064e-02 */
|
||||
.quad 0x3FF34209790236E1 /* A01 = +1.203622315111197105253e+00 */
|
||||
.quad 0xBFDB764C0E71BECB /* A02 = -4.290952817018306997277e-01 */
|
||||
.quad 0x3F962FE0C03F84C0 /* A03 = +2.166701482190513949888e-02 */
|
||||
.quad 0xBFA4B36B9AD27ECC /* A00 = -4.043136849327097492868e-02 */
|
||||
.quad 0x3FF3990C5B12FC16 /* A01 = +1.224865298994477935679e+00 */
|
||||
.quad 0xBFDD5AABB0D01390 /* A02 = -4.586590983092770912322e-01 */
|
||||
.quad 0x3FA21DAF5CA162DB /* A03 = +3.538272863142363083844e-02 */
|
||||
.quad 0xBFA7645E4D7BF28B /* A00 = -4.568762489177399105378e-02 */
|
||||
.quad 0x3FF3EF2FD51C0D9F /* A01 = +1.245895225962932562069e+00 */
|
||||
.quad 0xBFDF26377E1B686E /* A02 = -4.867075664057044503963e-01 */
|
||||
.quad 0x3FA8803E756EE812 /* A03 = +4.785342391501513914509e-02 */
|
||||
.quad 0xBFAA210925C64413 /* A00 = -5.103329263796054643398e-02 */
|
||||
.quad 0x3FF44349F897D8E7 /* A01 = +1.266427966181760345066e+00 */
|
||||
.quad 0xBFE06A7B02C6D8E2 /* A02 = -5.129981092675530707226e-01 */
|
||||
.quad 0x3FAE3F194734F5D0 /* A03 = +5.907515520309980505687e-02 */
|
||||
.quad 0xBFACDE48F8A19BBB /* A00 = -5.638340029764018351832e-02 */
|
||||
.quad 0x3FF49439D5466582 /* A01 = +1.286187966447272845727e+00 */
|
||||
.quad 0xBFE131C7C1063DDC /* A02 = -5.373266954429101183166e-01 */
|
||||
.quad 0x3FB1ADEEC36AD805 /* A03 = +6.906025191241844940482e-02 */
|
||||
.quad 0xBFAF905D8F585680 /* A00 = -6.164829611604449866036e-02 */
|
||||
.quad 0x3FF4E0ED1FD27F99 /* A01 = +1.304913639360142818546e+00 */
|
||||
.quad 0xBFE1E7A859DC1D3D /* A02 = -5.595285182070380836095e-01 */
|
||||
.quad 0x3FB3ED018E4642A1 /* A03 = +7.783517573831001679086e-02 */
|
||||
.quad 0xBFB11595104160BA /* A00 = -6.673556944713512906198e-02 */
|
||||
.quad 0x3FF528650340490B /* A01 = +1.322361958217302513319e+00 */
|
||||
.quad 0xBFE28B14B40BC974 /* A02 = -5.794776455425521000109e-01 */
|
||||
.quad 0x3FB5DF49F5BAF6D7 /* A03 = +8.543836831355676453281e-02 */
|
||||
.quad 0xBFB2513A97344BA4 /* A00 = -7.155195418844911836587e-02 */
|
||||
.quad 0x3FF569BA0DB5EE14 /* A01 = +1.338312200124055273420e+00 */
|
||||
.quad 0xBFE31B53A8B67B20 /* A02 = -5.970857901737396389308e-01 */
|
||||
.quad 0x3FB787F297BB0544 /* A03 = +9.191814617499455275507e-02 */
|
||||
.quad 0xBFB37512E848FAFA /* A00 = -7.600515528700305112331e-02 */
|
||||
.quad 0x3FF5A41F33B403C8 /* A01 = +1.352568819013173495591e+00 */
|
||||
.quad 0xBFE397F6EA9A58A5 /* A02 = -6.123003561103997904880e-01 */
|
||||
.quad 0x3FB8EAA9FF25CA06 /* A03 = +9.733068923177520814782e-02 */
|
||||
.quad 0xBFB47B3E603AFC5D /* A00 = -8.000554894805263217439e-02 */
|
||||
.quad 0x3FF5D6E3EDE40487 /* A01 = +1.364963464031718975988e+00 */
|
||||
.quad 0xBFE400D5BCA6D631 /* A02 = -6.251019177058819709103e-01 */
|
||||
.quad 0x3FBA0B830ED567FE /* A03 = +1.017381583418739132707e-01 */
|
||||
.quad 0xBFB5BBFE8AC90496 /* A00 = -8.489981544791400103200e-02 */
|
||||
.quad 0x3FF612BA70107E95 /* A01 = +1.379572332145390989311e+00 */
|
||||
.quad 0xBFE477EAF1FA7693 /* A02 = -6.396383978023599814478e-01 */
|
||||
.quad 0x3FBB4784B7C08A95 /* A03 = +1.065600346196709652391e-01 */
|
||||
.quad 0xBFB6D5D940743939 /* A00 = -8.920057128509463473254e-02 */
|
||||
.quad 0x3FF644A8748F70CE /* A01 = +1.391762214006166953340e+00 */
|
||||
.quad 0xBFE4D646AB07EA37 /* A02 = -6.511567440459832267763e-01 */
|
||||
.quad 0x3FBC354F4E1D5292 /* A03 = +1.101884427747086558913e-01 */
|
||||
.quad 0xBFB7223D19E4F3D1 /* A00 = -9.036619074045339206069e-02 */
|
||||
.quad 0x3FF6518FEB42B7FA /* A01 = +1.394912642466350494175e+00 */
|
||||
.quad 0xBFE4ED86CB87498C /* A02 = -6.539949393430091184598e-01 */
|
||||
.quad 0x3FBC6D29F28CCA9B /* A03 = +1.110407082713131127205e-01 */
|
||||
.quad 0xBFB6878652FF6312 /* A00 = -8.800544287022329936754e-02 */
|
||||
.quad 0x3FF63948C302D040 /* A01 = +1.388985406648330922508e+00 */
|
||||
.quad 0xBFE4C4E2E7904E17 /* A02 = -6.490339777687407218920e-01 */
|
||||
.quad 0x3FBC127356CA1ABE /* A03 = +1.096565329445224612481e-01 */
|
||||
.quad 0xBFB4F5D18B0C91D6 /* A00 = -8.187589306596207427980e-02 */
|
||||
.quad 0x3FF5FD27EB7DD0B8 /* A01 = +1.374305648697413673176e+00 */
|
||||
.quad 0xBFE464E01A2B2FC6 /* A02 = -6.373138915164353601739e-01 */
|
||||
.quad 0x3FBB460547674A30 /* A03 = +1.065371798825160976065e-01 */
|
||||
.quad 0xBFB26642FA16A685 /* A00 = -7.187288861919156890412e-02 */
|
||||
.quad 0x3FF59F9BEDE1C95A /* A01 = +1.351467065073470141812e+00 */
|
||||
.quad 0xBFE3D67920C8FBEA /* A02 = -6.199308052381387046381e-01 */
|
||||
.quad 0x3FBA24F6A8D3CBC1 /* A03 = +1.021265184570401413078e-01 */
|
||||
.quad 0xBFADB5294794F097 /* A00 = -5.802277563859197656582e-02 */
|
||||
.quad 0x3FF523EA7B9CF453 /* A01 = +1.321268542159732772845e+00 */
|
||||
.quad 0xBFE322A8B55E35DB /* A02 = -5.979808370918208160205e-01 */
|
||||
.quad 0x3FB8C8673B1B3E37 /* A03 = +9.680791085269722928697e-02 */
|
||||
.quad 0xBFA4B7D661965C6A /* A00 = -4.046506825687219699450e-02 */
|
||||
.quad 0x3FF48DE3E2CE3122 /* A01 = +1.284641157110919085227e+00 */
|
||||
.quad 0xBFE251FED1A7F445 /* A02 = -5.725092024655472622285e-01 */
|
||||
.quad 0x3FB745699FCABDB9 /* A03 = +9.090290213747821701507e-02 */
|
||||
.quad 0xBF93E60456E4EE1D /* A00 = -1.943213253365004902773e-02 */
|
||||
.quad 0x3FF3E1A14E628A59 /* A01 = +1.242585474196536532432e+00 */
|
||||
.quad 0xBFE16C5AB660E876 /* A02 = -5.444768488007543094653e-01 */
|
||||
.quad 0x3FB5AD33AA8C188F /* A03 = +8.467410005332197397987e-02 */
|
||||
.quad 0x3F738C17C47C7961 /* A00 = +4.772274820224659853951e-03 */
|
||||
.quad 0x3FF3234DDE3BD146 /* A01 = +1.196119182682268355933e+00 */
|
||||
.quad 0xBFE078C0D77A9D3B /* A02 = -5.147403915952176722826e-01 */
|
||||
.quad 0x3FB40D74B3E276B8 /* A03 = +7.833032027925923568290e-02 */
|
||||
.quad 0x3FA0474BECC689C7 /* A00 = +3.179394975019849550746e-02 */
|
||||
.quad 0x3FF256FB4FA7D18A /* A01 = +1.146235762743432307076e+00 */
|
||||
.quad 0xBFDEFA8E3FB285E2 /* A02 = -4.840427038235174395098e-01 */
|
||||
.quad 0x3FB270C007493D59 /* A03 = +7.203293016322244446403e-02 */
|
||||
.quad 0x3FAF5BD51E479BDC /* A00 = +6.124750132203590768931e-02 */
|
||||
.quad 0x3FF18081D0B53BC5 /* A01 = +1.093873801484492647162e+00 */
|
||||
.quad 0xBFDCFE2439BD0C03 /* A02 = -4.530115665294831006626e-01 */
|
||||
.quad 0x3FB0DEFE5A45AFDD /* A03 = +6.590261176978580437424e-02 */
|
||||
.quad 0x3FB7BD5D2806EA26 /* A00 = +9.273321368429118805032e-02 */
|
||||
.quad 0x3FF0A369E35B4440 /* A01 = +1.039895904647224256223e+00 */
|
||||
.quad 0xBFDB04BC5C9951E7 /* A02 = -4.221640495573226181669e-01 */
|
||||
.quad 0x3FAEBBBAA9D6DEEF /* A03 = +6.002600978120919278380e-02 */
|
||||
.quad 0x3FC01BE411098DBC /* A00 = +1.258511622610124502941e-01 */
|
||||
.quad 0x3FEF85BDABC031C1 /* A01 = +9.850757936961188621083e-01 */
|
||||
.quad 0xBFD91521375097C2 /* A02 = -3.919146576102968682065e-01 */
|
||||
.quad 0x3FABE26F0086D982 /* A03 = +5.446192628317005068883e-02 */
|
||||
.quad 0x3FC481D7FF5776B9 /* A00 = +1.602125164781023347604e-01 */
|
||||
.quad 0x3FEDC3506C1E7218 /* A01 = +9.300920592973538347792e-01 */
|
||||
.quad 0xBFD7349A88DA7D4F /* A02 = -3.625856720409119104964e-01 */
|
||||
.quad 0x3FA936E2DFF8E2AE /* A03 = +4.924687370334389358018e-02 */
|
||||
.quad 0x3FC90471F96FA27A /* A00 = +1.954481571149420671141e-01 */
|
||||
.quad 0x3FEC0451601987A2 /* A01 = +8.755270840595026360376e-01 */
|
||||
.quad 0xBFD5671CD4B898DC /* A02 = -3.344184949259110251063e-01 */
|
||||
.quad 0x3FA6BB9594603B67 /* A03 = +4.439990459660841243261e-02 */
|
||||
.quad 0x3FCFD8ADB9ED944C /* A00 = +2.488000066615846384011e-01 */
|
||||
.quad 0x3FE978C073F6809A /* A01 = +7.959902062321078108909e-01 */
|
||||
.quad 0xBFD2DF7E00BCD5A9 /* A02 = -2.948908812716931060471e-01 */
|
||||
.quad 0x3FA3614033D490B2 /* A03 = +3.785133965200894456959e-02 */
|
||||
.quad 0x3FD4846A12AFE5A0 /* A00 = +3.205819303981005674586e-01 */
|
||||
.quad 0x3FE63A1147D40472 /* A01 = +6.945883181471244061100e-01 */
|
||||
.quad 0xBFCFA2268AD34450 /* A02 = -2.471359422548027318101e-01 */
|
||||
.quad 0x3F9F150201D9FFE0 /* A03 = +3.035357605267552383310e-02 */
|
||||
.quad 0x3FD9018641F82BEB /* A00 = +3.907180446846598154131e-01 */
|
||||
.quad 0x3FE33B7C220FFBDC /* A01 = +6.010113396913498995389e-01 */
|
||||
.quad 0xBFCA4E4187E29C86 /* A02 = -2.055131829740483584423e-01 */
|
||||
.quad 0x3F98C30CED19F8F4 /* A03 = +2.418155858185229434287e-02 */
|
||||
.quad 0x3FDD4B8255BEB078 /* A00 = +4.577337109901757905561e-01 */
|
||||
.quad 0x3FE0858B19D3A49B /* A01 = +5.163016800335243905451e-01 */
|
||||
.quad 0xBFC5BC929EACE564 /* A02 = -1.698172831327539045176e-01 */
|
||||
.quad 0x3F93A083CE57DE2B /* A03 = +1.916700312537337677621e-02 */
|
||||
.quad 0x3FE0A8E5E039295C /* A00 = +5.206174258576470315063e-01 */
|
||||
.quad 0x3FDC35E1234583FE /* A01 = +4.407885403107342225937e-01 */
|
||||
.quad 0xBFC1DE034E31AEB9 /* A02 = -1.395877963835710222629e-01 */
|
||||
.quad 0x3F8EFDEBB3471BDC /* A03 = +1.513275280821162888101e-02 */
|
||||
.quad 0x3FE2851B603CB2A5 /* A00 = +5.787484054213406503564e-01 */
|
||||
.quad 0x3FD7F4A44ABBB286 /* A01 = +3.743067483726821853551e-01 */
|
||||
.quad 0xBFBD3EEB67087DE7 /* A02 = -1.142413260026767657385e-01 */
|
||||
.quad 0x3F8864F38329E8BD /* A03 = +1.191129917173260922836e-02 */
|
||||
.quad 0x3FE437DBE3C34AC1 /* A00 = +6.318187187665317283702e-01 */
|
||||
.quad 0x3FD43F6F789441B5 /* A01 = +3.163717916040938438194e-01 */
|
||||
.quad 0xBFB7D92E7901B9A4 /* A02 = -9.315767721429907277653e-02 */
|
||||
.quad 0x3F8327ED342308E1 /* A03 = +9.353497651663324544136e-03 */
|
||||
.quad 0x3FE5C0977766D55C /* A00 = +6.797597248138731451661e-01 */
|
||||
.quad 0x3FD10B42A764D8F9 /* A01 = +2.663122782427219115142e-01 */
|
||||
.quad 0xBFB3633351D3D70F /* A02 = -7.573242900602060456716e-02 */
|
||||
.quad 0x3F7E079E30FF899C /* A03 = +7.331483779099558922843e-03 */
|
||||
.quad 0x3FE7202CE08A88C4 /* A00 = +7.226776490754436288455e-01 */
|
||||
.quad 0x3FCC973EB5662B01 /* A01 = +2.233656297433626314319e-01 */
|
||||
.quad 0xBFAF70A455F9920B /* A02 = -6.140626477716545211782e-02 */
|
||||
.quad 0x3F77812411CE99B6 /* A03 = +5.738392731393584730859e-03 */
|
||||
.quad 0x3FE85879424095B1 /* A00 = +7.608000082006382003286e-01 */
|
||||
.quad 0x3FC7E73BD1674D84 /* A01 = +1.867441914060742336190e-01 */
|
||||
.quad 0xBFA96F84E4BF333B /* A02 = -4.967894832916504993525e-02 */
|
||||
.quad 0x3F72606DDCA6E117 /* A03 = +4.486493251924870105662e-03 */
|
||||
.quad 0x3FE96BFE4957F4DD /* A00 = +7.944327766887472330737e-01 */
|
||||
.quad 0x3FC3ED4780D25478 /* A01 = +1.556786898624158421711e-01 */
|
||||
.quad 0xBFA489C5F9A56B58 /* A02 = -4.011362717093075458408e-02 */
|
||||
.quad 0x3F6CB5DC17E9AD2A /* A03 = +3.504686231556104931972e-03 */
|
||||
.quad 0x3FEA5D9CB2F41234 /* A00 = +8.239272589858672724006e-01 */
|
||||
.quad 0x3FC091A758374DCF /* A01 = +1.294449978582705440555e-01 */
|
||||
.quad 0xBFA08E436D4B5CE0 /* A02 = -3.233538350257858517978e-02 */
|
||||
.quad 0x3F666997AD53E6B7 /* A03 = +2.735897297154145629133e-03 */
|
||||
.quad 0x3FEB3060342CB850 /* A00 = +8.496552485501158713532e-01 */
|
||||
.quad 0x3FBB7D30BBC7DC1B /* A01 = +1.073790033768634993860e-01 */
|
||||
.quad 0xBF9AA6BA3443D9E3 /* A02 = -2.602663940430173170060e-02 */
|
||||
.quad 0x3F617CA764B7850B /* A03 = +2.134634914668814050648e-03 */
|
||||
.quad 0x3FEBE759A6A0C7B8 /* A00 = +8.719909910635044170135e-01 */
|
||||
.quad 0x3FB6C10DE6A703FF /* A01 = +8.888327485239243264115e-02 */
|
||||
.quad 0xBF956C566D8BE1F6 /* A02 = -2.092108768099084498138e-02 */
|
||||
.quad 0x3F5B46D1A4A59CF8 /* A03 = +1.664833764687232917079e-03 */
|
||||
.quad 0x3FEC858494887A04 /* A00 = +8.912985707318630268503e-01 */
|
||||
.quad 0x3FB2CC31F543394D /* A01 = +7.342827070099140762682e-02 */
|
||||
.quad 0xBF9133477FF69137 /* A02 = -1.679717749142747504343e-02 */
|
||||
.quad 0x3F5544482FBB4DA5 /* A03 = +1.298017973501022466823e-03 */
|
||||
.quad 0x3FED0DB59D0E32E9 /* A00 = +9.079235141267335551518e-01 */
|
||||
.quad 0x3FAF006BAFFC6EF4 /* A01 = +6.055008433597022787787e-02 */
|
||||
.quad 0xBF8B97146FA2B97A /* A02 = -1.347175565419144252499e-02 */
|
||||
.quad 0x3F5093B01F4CDC69 /* A03 = +1.011774057770665211434e-03 */
|
||||
.quad 0x3FEDB487C3EC457C /* A00 = +9.282873942012623835751e-01 */
|
||||
.quad 0x3FA7390C09D0BD1D /* A01 = +4.535710925881118044112e-02 */
|
||||
.quad 0xBF83D9F7C3181106 /* A02 = -9.693084374710735778846e-03 */
|
||||
.quad 0x3F46E34A0A3C0E64 /* A03 = +6.984817050299072134500e-04 */
|
||||
.quad 0x3FEE5FFCB4E6EB00 /* A00 = +9.492171796076434020506e-01 */
|
||||
.quad 0x3F9F4913ED00AADF /* A01 = +3.055220731782070861526e-02 */
|
||||
.quad 0xBF79670BD0E59B5C /* A02 = -6.201788097633133961528e-03 */
|
||||
.quad 0x3F3BC998EBCAF96D /* A03 = +4.240034429975534616304e-04 */
|
||||
.quad 0x3FEEDBA41E9542FE /* A00 = +9.643116566968215064293e-01 */
|
||||
.quad 0x3F94F5DD18D9C24D /* A01 = +2.046914543319848858727e-02 */
|
||||
.quad 0xBF7034896AA122B9 /* A02 = -3.956352980886528904192e-03 */
|
||||
.quad 0x3F30DCCB47810B39 /* A03 = +2.573009765038273091199e-04 */
|
||||
.quad 0x3FEF33F2882520ED /* A00 = +9.750912341196716903724e-01 */
|
||||
.quad 0x3F8BF37F2CF553FF /* A01 = +1.364802699996836392315e-02 */
|
||||
.quad 0xBF649F6F05A69619 /* A02 = -2.517430152880317534986e-03 */
|
||||
.quad 0x3F247623C950AAC9 /* A03 = +1.561087307505231250044e-04 */
|
||||
.quad 0x3FEF727757751741 /* A00 = +9.827229221489021115943e-01 */
|
||||
.quad 0x3F828E67912C4400 /* A01 = +9.060677640748693306705e-03 */
|
||||
.quad 0xBF5A2F51A806CC2C /* A02 = -1.598195784123355826789e-03 */
|
||||
.quad 0x3F18D35D7687E613 /* A03 = +9.470231965016282719549e-05 */
|
||||
.quad 0x3FEF9E6325C5942A /* A00 = +9.880843866091073568469e-01 */
|
||||
.quad 0x3F788AB117618F76 /* A01 = +5.991641772286606867914e-03 */
|
||||
.quad 0xBF5096EAB0B1EA89 /* A02 = -1.012543859160305046233e-03 */
|
||||
.quad 0x3F0E1E50EC4435AB /* A03 = +5.744633156910412119652e-05 */
|
||||
.quad 0x3FEFBD0784049369 /* A00 = +9.918248728250605994461e-01 */
|
||||
.quad 0x3F702BBD8294035F /* A01 = +3.947963975634432264028e-03 */
|
||||
.quad 0xBF44FB55E0F00593 /* A02 = -6.403130845457509273330e-04 */
|
||||
.quad 0x3F0244DCD723230A /* A03 = +3.484534217219031730379e-05 */
|
||||
.quad 0x3FEFD245E2366A43 /* A00 = +9.944180887426415926811e-01 */
|
||||
.quad 0x3F653D82EC088433 /* A01 = +2.592807490387838333795e-03 */
|
||||
.quad 0xBF3A7DF75E013CB8 /* A02 = -4.042366908878036561859e-04 */
|
||||
.quad 0x3EF6298E69F991CD /* A03 = +2.113564425911141559972e-05 */
|
||||
.quad 0x3FEFE0EAA508BC69 /* A00 = +9.962056372950317539861e-01 */
|
||||
.quad 0x3F5BD0771AF3FDDA /* A01 = +1.697651208644282514598e-03 */
|
||||
.quad 0xBF30B2E1254DE571 /* A02 = -2.548026725928887099328e-04 */
|
||||
.quad 0x3EEAE28B70EC0256 /* A03 = +1.281973848454955042307e-05 */
|
||||
.quad 0x3FEFEAF5303D7F96 /* A00 = +9.974313680831865536192e-01 */
|
||||
.quad 0x3F5229111365657E /* A01 = +1.108423877289460134782e-03 */
|
||||
.quad 0xBF250572D04DFE66 /* A02 = -1.603796628408704519168e-04 */
|
||||
.quad 0x3EE04E89BB57C981 /* A03 = +7.775682983689149966743e-06 */
|
||||
.quad 0x3FEFF1CF52F1CF44 /* A00 = +9.982678051005469122003e-01 */
|
||||
.quad 0x3F47A71316147CEB /* A01 = +7.218211359577819110842e-04 */
|
||||
.quad 0xBF1A6D7604055719 /* A02 = -1.008132248946049582547e-04 */
|
||||
.quad 0x3ED3C8047586A85C /* A03 = +4.716233739913014633626e-06 */
|
||||
.quad 0x3FEFF6770369EF69 /* A00 = +9.988360468555416149528e-01 */
|
||||
.quad 0x3F3EBB261180FBF0 /* A01 = +4.689186039321105101130e-04 */
|
||||
.quad 0xBF1097754FE19D7F /* A02 = -6.329206004950480057066e-05 */
|
||||
.quad 0x3EC7FEFF83BCA0A7 /* A03 = +2.860556404988488738366e-06 */
|
||||
.quad 0x3FEFF99D42371AC4 /* A00 = +9.992204945818561334647e-01 */
|
||||
.quad 0x3F33EB2AEC271F59 /* A01 = +3.039340773764907474054e-04 */
|
||||
.quad 0xBF04CF18E0FC0D79 /* A02 = -3.968996690952969588805e-05 */
|
||||
.quad 0x3EBD1BDBD6019BE9 /* A03 = +1.735021065507727833886e-06 */
|
||||
.quad 0x3FEFFBBCA32B0D91 /* A00 = +9.994795977476532700123e-01 */
|
||||
.quad 0x3F29C41E1615110A /* A01 = +1.965796209707565346710e-04 */
|
||||
.quad 0xBEFA11F93D9DCB5A /* A02 = -2.486248909101414873235e-05 */
|
||||
.quad 0x3EB1A7CA4546F7A7 /* A03 = +1.052345642723709228769e-06 */
|
||||
.quad 0x3FEFFD298B8E8DE2 /* A00 = +9.996535993308806045121e-01 */
|
||||
.quad 0x3F20A1C42D523C5B /* A01 = +1.268913244172078754520e-04 */
|
||||
.quad 0xBEF0507A364AFAE4 /* A02 = -1.555859070622834605755e-05 */
|
||||
.quad 0x3EA56ACA17E7CDF4 /* A03 = +6.382806956848098872313e-07 */
|
||||
.quad 0x3FEFFE1DC82BA5A3 /* A00 = +9.997700604991915929176e-01 */
|
||||
.quad 0x3F156E73B90F1769 /* A01 = +8.175450626798714452801e-05 */
|
||||
.quad 0xBEE4663579D0A09F /* A02 = -9.727122057226747625365e-06 */
|
||||
.quad 0x3E99FAF6FEC5D4C1 /* A03 = +3.871371052824002996020e-07 */
|
||||
.quad 0x3FEFFEF8D0BB5E81 /* A00 = +9.998745037837154514548e-01 */
|
||||
.quad 0x3F06686DA18D39C3 /* A01 = +4.273972098777251447726e-05 */
|
||||
.quad 0xBED46BC298073E90 /* A02 = -4.868731025855742842491e-06 */
|
||||
.quad 0x3E88E42286B9D0FD /* A03 = +1.854535328530838170114e-07 */
|
||||
.quad 0x3FEFFF8DBC68DDC7 /* A00 = +9.999455146670975791423e-01 */
|
||||
.quad 0x3EF26B2953A80AF0 /* A01 = +1.756534514108903368909e-05 */
|
||||
.quad 0xBEBFC4472D580F83 /* A02 = -1.893443529411295465239e-06 */
|
||||
.quad 0x3E72505B4553D19F /* A03 = +6.822456673547912277047e-08 */
|
||||
.quad 0x3FEFFFCED1276609 /* A00 = +9.999765477215883935358e-01 */
|
||||
.quad 0x3EDE1A94C7CC58F5 /* A01 = +7.177313020153979672606e-06 */
|
||||
.quad 0xBEA8A2C988744E57 /* A02 = -7.342066660497443762363e-07 */
|
||||
.quad 0x3E5AF30036BBBAF4 /* A03 = +2.509841882843541084885e-08 */
|
||||
.quad 0x3FEFFFEAFE70FCFC /* A00 = +9.999899835164849370983e-01 */
|
||||
.quad 0x3EC879175E3549F5 /* A01 = +2.917410471128503564412e-06 */
|
||||
.quad 0xBE930E36677D1813 /* A02 = -2.839493400307523115929e-07 */
|
||||
.quad 0x3E43D4005B42D48F /* A03 = +9.233192745401904898013e-09 */
|
||||
.quad 0x3ff0000000000000
|
||||
.quad 0x0000000000000000
|
||||
.quad 0x0000000000000000
|
||||
.quad 0x0000000000000000
|
||||
.align 16
|
||||
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */
|
||||
.align 16
|
||||
.long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */
|
||||
.align 16
|
||||
.long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */
|
||||
.align 16
|
||||
.long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */
|
||||
.align 16
|
||||
.long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */
|
||||
.align 16
|
||||
.long 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */
|
||||
.align 16
|
||||
.type __svml_stanh_data_internal,@object
|
||||
.size __svml_stanh_data_internal,.-__svml_stanh_data_internal
|
20
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core-sse.S
Normal file
20
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core-sse.S
Normal file
@ -0,0 +1,20 @@
|
||||
/* SSE version of vectorized tanhf, vector length is 8.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define _ZGVdN8v_tanhf _ZGVdN8v_tanhf_sse_wrapper
|
||||
#include "../svml_s_tanhf8_core.S"
|
28
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core.c
Normal file
28
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core.c
Normal file
@ -0,0 +1,28 @@
|
||||
/* Multiple versions of vectorized tanhf, vector length is 8.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#define SYMBOL_NAME _ZGVdN8v_tanhf
|
||||
#include "ifunc-mathvec-avx2.h"
|
||||
|
||||
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
|
||||
|
||||
#ifdef SHARED
|
||||
__hidden_ver1 (_ZGVdN8v_tanhf, __GI__ZGVdN8v_tanhf,
|
||||
__redirect__ZGVdN8v_tanhf)
|
||||
__attribute__ ((visibility ("hidden")));
|
||||
#endif
|
844
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S
Normal file
844
sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S
Normal file
@ -0,0 +1,844 @@
|
||||
/* Function tanhf vectorized with AVX2.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
https://www.gnu.org/licenses/. */
|
||||
|
||||
/*
|
||||
* ALGORITHM DESCRIPTION:
|
||||
*
|
||||
* NOTE: Since the hyperbolic tangent function is odd
|
||||
* (tanh(x) = -tanh(-x)), below algorithm deals with the absolute
|
||||
* value of the argument |x|: tanh(x) = sign(x) * tanh(|x|)
|
||||
*
|
||||
* We use a table lookup method to compute tanh(|x|).
|
||||
* The basic idea is to split the input range into a number of subintervals
|
||||
* and to approximate tanh(.) with a polynomial on each of them.
|
||||
*
|
||||
* IEEE SPECIAL CONDITIONS:
|
||||
* x = [+,-]0, r = [+,-]0
|
||||
* x = +Inf, r = +1
|
||||
* x = -Inf, r = -1
|
||||
* x = QNaN, r = QNaN
|
||||
* x = SNaN, r = QNaN
|
||||
*
|
||||
*
|
||||
* ALGORITHM DETAILS
|
||||
* We handle special values in a callout function, aside from main path
|
||||
* computations. "Special" for this algorithm are:
|
||||
* INF, NAN, |x| > HUGE_THRESHOLD
|
||||
*
|
||||
*
|
||||
* Main path computations are organized as follows:
|
||||
* Actually we split the interval [0, SATURATION_THRESHOLD)
|
||||
* into a number of subintervals. On each subinterval we approximate tanh(.)
|
||||
* with a minimax polynomial of pre-defined degree. Polynomial coefficients
|
||||
* are computed beforehand and stored in table. We also use
|
||||
*
|
||||
* y := |x| + B,
|
||||
*
|
||||
* here B depends on subinterval and is used to make argument
|
||||
* closer to zero.
|
||||
* We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD],
|
||||
* where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to
|
||||
* preserve main path computation logic but return 1.0 for all arguments.
|
||||
*
|
||||
* Hence reconstruction looks as follows:
|
||||
* we extract proper polynomial and range reduction coefficients
|
||||
* (Pj and B), corresponding to subinterval, to which |x| belongs,
|
||||
* and return
|
||||
*
|
||||
* r := sign(x) * (P0 + P1 * y + ... + Pn * y^n)
|
||||
*
|
||||
* NOTE: we use multiprecision technique to multiply and sum the first
|
||||
* K terms of the polynomial. So Pj, j = 0..K are stored in
|
||||
* table each as a pair of target precision numbers (Pj and PLj) to
|
||||
* achieve wider than target precision.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/* Offsets for data table __svml_stanh_data_internal
|
||||
*/
|
||||
#define _dbP 0
|
||||
#define _sSignMask 4288
|
||||
#define _sAbsMask 4320
|
||||
#define _iExpMantMask 4352
|
||||
#define _iExpMask 4384
|
||||
#define _iMinIdxOfsMask 4416
|
||||
#define _iMaxIdxMask 4448
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
.text
|
||||
.section .text.avx2,"ax",@progbits
|
||||
ENTRY(_ZGVdN8v_tanhf_avx2)
|
||||
pushq %rbp
|
||||
cfi_def_cfa_offset(16)
|
||||
movq %rsp, %rbp
|
||||
cfi_def_cfa(6, 16)
|
||||
cfi_offset(6, -16)
|
||||
andq $-32, %rsp
|
||||
pushq %r12
|
||||
subq $120, %rsp
|
||||
lea _dbP+16+__svml_stanh_data_internal(%rip), %r10
|
||||
vmovaps %ymm0, %ymm12
|
||||
|
||||
/* Here huge arguments, INF and NaNs are filtered out to callout. */
|
||||
vpand _iExpMantMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm14
|
||||
|
||||
/*
|
||||
* small table specific variables *
|
||||
* Constant loading
|
||||
*/
|
||||
vmovups _iMaxIdxMask+__svml_stanh_data_internal(%rip), %ymm8
|
||||
vpsubd _iMinIdxOfsMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm9
|
||||
|
||||
/* if VMIN, VMAX is defined for I type */
|
||||
vxorps %ymm15, %ymm15, %ymm15
|
||||
vpcmpgtd %ymm15, %ymm9, %ymm0
|
||||
vpand %ymm0, %ymm9, %ymm7
|
||||
vpcmpgtd %ymm8, %ymm9, %ymm6
|
||||
vblendvps %ymm6, %ymm8, %ymm7, %ymm3
|
||||
vpsrld $14, %ymm3, %ymm1
|
||||
vpcmpgtd _iExpMask+__svml_stanh_data_internal(%rip), %ymm14, %ymm13
|
||||
vmovmskps %ymm13, %r11d
|
||||
vandps _sAbsMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm10
|
||||
vandps _sSignMask+__svml_stanh_data_internal(%rip), %ymm12, %ymm11
|
||||
vextractf128 $1, %ymm1, %xmm2
|
||||
vmovd %xmm1, %r9d
|
||||
vmovd %xmm2, %ecx
|
||||
vpextrd $1, %xmm2, %edx
|
||||
vpextrd $1, %xmm1, %r8d
|
||||
movslq %r9d, %r9
|
||||
movslq %edx, %rdx
|
||||
movslq %r8d, %r8
|
||||
vpextrd $2, %xmm1, %edi
|
||||
movslq %ecx, %rcx
|
||||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22
|
||||
vpextrd $3, %xmm2, %r12d
|
||||
vpextrd $3, %xmm1, %esi
|
||||
vpextrd $2, %xmm2, %eax
|
||||
movslq %edi, %rdi
|
||||
movslq %r12d, %r12
|
||||
movslq %esi, %rsi
|
||||
movslq %eax, %rax
|
||||
vmovupd -16(%r9,%r10), %xmm5
|
||||
vmovupd -16(%rdx,%r10), %xmm14
|
||||
vmovupd -16(%rcx,%r10), %xmm13
|
||||
vmovupd (%r9,%r10), %xmm1
|
||||
vmovupd (%r8,%r10), %xmm2
|
||||
vmovupd -16(%r8,%r10), %xmm4
|
||||
vinsertf128 $1, -16(%rdi,%r10), %ymm5, %ymm15
|
||||
vinsertf128 $1, -16(%r12,%r10), %ymm14, %ymm3
|
||||
vinsertf128 $1, -16(%rax,%r10), %ymm13, %ymm6
|
||||
vinsertf128 $1, (%rdi,%r10), %ymm1, %ymm5
|
||||
vinsertf128 $1, (%rsi,%r10), %ymm2, %ymm14
|
||||
vunpcklpd %ymm3, %ymm6, %ymm8
|
||||
vunpckhpd %ymm3, %ymm6, %ymm6
|
||||
vunpcklpd %ymm14, %ymm5, %ymm3
|
||||
vunpckhpd %ymm14, %ymm5, %ymm2
|
||||
vmovupd (%rcx,%r10), %xmm13
|
||||
vcvtps2pd %xmm10, %ymm5
|
||||
vextractf128 $1, %ymm10, %xmm10
|
||||
vfmadd213pd %ymm3, %ymm5, %ymm2
|
||||
vinsertf128 $1, -16(%rsi,%r10), %ymm4, %ymm0
|
||||
vmovupd (%rdx,%r10), %xmm4
|
||||
vunpcklpd %ymm0, %ymm15, %ymm9
|
||||
vunpckhpd %ymm0, %ymm15, %ymm7
|
||||
vfmadd213pd %ymm7, %ymm5, %ymm2
|
||||
vfmadd213pd %ymm9, %ymm5, %ymm2
|
||||
vinsertf128 $1, (%r12,%r10), %ymm4, %ymm0
|
||||
vcvtps2pd %xmm10, %ymm4
|
||||
vinsertf128 $1, (%rax,%r10), %ymm13, %ymm15
|
||||
vunpcklpd %ymm0, %ymm15, %ymm1
|
||||
vunpckhpd %ymm0, %ymm15, %ymm0
|
||||
vfmadd213pd %ymm1, %ymm4, %ymm0
|
||||
vcvtpd2ps %ymm2, %xmm1
|
||||
vfmadd213pd %ymm6, %ymm4, %ymm0
|
||||
vfmadd213pd %ymm8, %ymm4, %ymm0
|
||||
vcvtpd2ps %ymm0, %xmm0
|
||||
vinsertf128 $1, %xmm0, %ymm1, %ymm2
|
||||
vorps %ymm11, %ymm2, %ymm0
|
||||
testl %r11d, %r11d
|
||||
|
||||
/* Go to special inputs processing branch */
|
||||
jne L(SPECIAL_VALUES_BRANCH)
|
||||
# LOE rbx r13 r14 r15 r11d ymm0 ymm12
|
||||
|
||||
/* Restore registers
|
||||
* and exit the function
|
||||
*/
|
||||
|
||||
L(EXIT):
|
||||
addq $120, %rsp
|
||||
cfi_restore(12)
|
||||
popq %r12
|
||||
movq %rbp, %rsp
|
||||
popq %rbp
|
||||
cfi_def_cfa(7, 8)
|
||||
cfi_restore(6)
|
||||
ret
|
||||
cfi_def_cfa(6, 16)
|
||||
cfi_offset(6, -16)
|
||||
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -8; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xf8, 0xff, 0xff, 0xff, 0x22
|
||||
|
||||
/* Branch to process
|
||||
* special inputs
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_BRANCH):
|
||||
vmovups %ymm12, 32(%rsp)
|
||||
vmovups %ymm0, 64(%rsp)
|
||||
# LOE rbx r13 r14 r15 r11d ymm0
|
||||
|
||||
xorl %r12d, %r12d
|
||||
# LOE rbx r13 r14 r15 r11d r12d
|
||||
|
||||
vzeroupper
|
||||
movq %r13, 8(%rsp)
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22
|
||||
movl %r11d, %r13d
|
||||
movq %r14, (%rsp)
|
||||
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Range mask
|
||||
* bits check
|
||||
*/
|
||||
|
||||
L(RANGEMASK_CHECK):
|
||||
btl %r12d, %r13d
|
||||
|
||||
/* Call scalar math function */
|
||||
jc L(SCALAR_MATH_CALL)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
/* Special inputs
|
||||
* processing loop
|
||||
*/
|
||||
|
||||
L(SPECIAL_VALUES_LOOP):
|
||||
incl %r12d
|
||||
cmpl $8, %r12d
|
||||
|
||||
/* Check bits in range mask */
|
||||
jl L(RANGEMASK_CHECK)
|
||||
# LOE rbx r15 r12d r13d
|
||||
|
||||
movq 8(%rsp), %r13
|
||||
cfi_restore(13)
|
||||
movq (%rsp), %r14
|
||||
cfi_restore(14)
|
||||
vmovups 64(%rsp), %ymm0
|
||||
|
||||
/* Go to exit */
|
||||
jmp L(EXIT)
|
||||
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22
|
||||
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */
|
||||
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22
|
||||
# LOE rbx r13 r14 r15 ymm0
|
||||
|
||||
/* Scalar math fucntion call
|
||||
* to process special input
|
||||
*/
|
||||
|
||||
L(SCALAR_MATH_CALL):
|
||||
movl %r12d, %r14d
|
||||
movss 32(%rsp,%r14,4), %xmm0
|
||||
call tanhf@PLT
|
||||
# LOE rbx r14 r15 r12d r13d xmm0
|
||||
|
||||
movss %xmm0, 64(%rsp,%r14,4)
|
||||
|
||||
/* Process special inputs in loop */
|
||||
jmp L(SPECIAL_VALUES_LOOP)
|
||||
# LOE rbx r15 r12d r13d
|
||||
END(_ZGVdN8v_tanhf_avx2)
|
||||
|
||||
.section .rodata, "a"
|
||||
.align 32
|
||||
|
||||
#ifdef __svml_stanh_data_internal_typedef
|
||||
typedef unsigned int VUINT32;
|
||||
typedef struct
|
||||
{
|
||||
__declspec(align(32)) VUINT32 _dbP[(134*4)][2];
|
||||
__declspec(align(32)) VUINT32 _sSignMask[8][1];
|
||||
__declspec(align(32)) VUINT32 _sAbsMask[8][1];
|
||||
__declspec(align(32)) VUINT32 _iExpMantMask[8][1];
|
||||
__declspec(align(32)) VUINT32 _iExpMask[8][1];
|
||||
__declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1];
|
||||
__declspec(align(32)) VUINT32 _iMaxIdxMask[8][1];
|
||||
} __svml_stanh_data_internal;
|
||||
#endif
|
||||
__svml_stanh_data_internal:
|
||||
/* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */
|
||||
.quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01 */
|
||||
.quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00 */
|
||||
.quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06 */
|
||||
.quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01 */
|
||||
.quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08 */
|
||||
.quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00 */
|
||||
.quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05 */
|
||||
.quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01 */
|
||||
.quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08 */
|
||||
.quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00 */
|
||||
.quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04 */
|
||||
.quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01 */
|
||||
.quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08 */
|
||||
.quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00 */
|
||||
.quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04 */
|
||||
.quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01 */
|
||||
.quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08 */
|
||||
.quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00 */
|
||||
.quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04 */
|
||||
.quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01 */
|
||||
.quad 0xBE6D5794E361E964 /* A00 = -5.465394929765249413434e-08 */
|
||||
.quad 0x3FF000055EE2A0CB /* A01 = +1.000005121846742950353e+00 */
|
||||
.quad 0xBF265E6C77E66C8B /* A02 = -1.706607253709506650304e-04 */
|
||||
.quad 0xBFD53264DDCCEDA6 /* A03 = -3.312008062382240103361e-01 */
|
||||
.quad 0xBE729C844D374A6E /* A00 = -6.933284462462096107184e-08 */
|
||||
.quad 0x3FF000067F019093 /* A01 = +1.000006195180536350264e+00 */
|
||||
.quad 0xBF29CC5348D6DCE5 /* A02 = -1.968242326435338705130e-04 */
|
||||
.quad 0xBFD52EE92121ED35 /* A03 = -3.309881995734998416658e-01 */
|
||||
.quad 0xBE775AEA17EAA872 /* A00 = -8.700465590574974405858e-08 */
|
||||
.quad 0x3FF00007CA1D66B8 /* A01 = +1.000007428656699559610e+00 */
|
||||
.quad 0xBF2D8F5EB98A2637 /* A02 = -2.255252009216044881395e-04 */
|
||||
.quad 0xBFD52B435CDF9128 /* A03 = -3.307655722585587376727e-01 */
|
||||
.quad 0xBE7D04DA28C343F0 /* A00 = -1.081040272327705484794e-07 */
|
||||
.quad 0x3FF000094443CCF5 /* A01 = +1.000008837375216730337e+00 */
|
||||
.quad 0xBF30D5B76C947AE5 /* A02 = -2.568791210978817814332e-04 */
|
||||
.quad 0xBFD52773A0776FAD /* A03 = -3.305329386764651045105e-01 */
|
||||
.quad 0xBE81DD77A12C51C7 /* A00 = -1.331054169875768625701e-07 */
|
||||
.quad 0x3FF0000AF1AFD2DA /* A01 = +1.000010437096696680470e+00 */
|
||||
.quad 0xBF331230624C1680 /* A02 = -2.910011410651516805537e-04 */
|
||||
.quad 0xBFD52379FC0B61DF /* A03 = -3.302903138515186909352e-01 */
|
||||
.quad 0xBE85D04EEEB3C435 /* A00 = -1.625247628488202841012e-07 */
|
||||
.quad 0x3FF0000CD6C9B1F2 /* A01 = +1.000012244238970726684e+00 */
|
||||
.quad 0xBF357F0742FADDD4 /* A02 = -3.280060509313874068243e-04 */
|
||||
.quad 0xBFD51F56806D0E81 /* A03 = -3.300377134475880880338e-01 */
|
||||
.quad 0xBE8A6E289B59681B /* A00 = -1.969211333326924655065e-07 */
|
||||
.quad 0x3FF0000EF8268F72 /* A01 = +1.000014275873550406715e+00 */
|
||||
.quad 0xBF381E277A1B747A /* A02 = -3.680082682942575423093e-04 */
|
||||
.quad 0xBFD51B093F1D6FD4 /* A03 = -3.297751537663746734808e-01 */
|
||||
.quad 0xBE8FCBC40EE9ABD5 /* A00 = -2.368983653301529373887e-07 */
|
||||
.quad 0x3FF000115A883B6C /* A01 = +1.000016549721943981410e+00 */
|
||||
.quad 0xBF3AF17AC974B3D9 /* A02 = -4.111218235774406434303e-04 */
|
||||
.quad 0xBFD516924A4C549C /* A03 = -3.295026517456081105450e-01 */
|
||||
.quad 0xBE92FFBC60A3F956 /* A00 = -2.831066871072026054144e-07 */
|
||||
.quad 0x3FF0001402DCED8A /* A01 = +1.000019084151832604590e+00 */
|
||||
.quad 0xBF3DFAE9390C4801 /* A02 = -4.574603454311488280083e-04 */
|
||||
.quad 0xBFD511F1B4D7DC3A /* A03 = -3.292202249571719585575e-01 */
|
||||
.quad 0xBE9690A22F96D5AD /* A00 = -3.362443262393081632612e-07 */
|
||||
.quad 0x3FF00016F63EFF5D /* A01 = +1.000021898173108825247e+00 */
|
||||
.quad 0xBF409E2C839605BB /* A02 = -5.071370461992499986334e-04 */
|
||||
.quad 0xBFD50D27924BEE00 /* A03 = -3.289278916051614487515e-01 */
|
||||
.quad 0xBE9AA56C65E72A73 /* A00 = -3.970591019557469835586e-07 */
|
||||
.quad 0x3FF0001A39F4A43E /* A01 = +1.000025011433776978009e+00 */
|
||||
.quad 0xBF425BD74C3D6667 /* A02 = -5.602647074553602319844e-04 */
|
||||
.quad 0xBFD50833F6E1ABA2 /* A03 = -3.286256705238718156536e-01 */
|
||||
.quad 0xBE9F4BD4FF1A83B0 /* A00 = -4.663500013744687071912e-07 */
|
||||
.quad 0x3FF0001DD36F9EC2 /* A01 = +1.000028444215715683896e+00 */
|
||||
.quad 0xBF44376634149405 /* A02 = -6.169556656102642569831e-04 */
|
||||
.quad 0xBFD50316F77EDEE5 /* A03 = -3.283135811757190158922e-01 */
|
||||
.quad 0xBEA3B625387BB079 /* A00 = -5.874486399249461304297e-07 */
|
||||
.quad 0x3FF00023E14CFBA9 /* A01 = +1.000034217911642153709e+00 */
|
||||
.quad 0xBF47392F923218D2 /* A02 = -7.087213783883111826306e-04 */
|
||||
.quad 0xBFD4FB1FACDEB938 /* A03 = -3.278273761924483942209e-01 */
|
||||
.quad 0xBEAA6E24F543500A /* A00 = -7.876828740601738750574e-07 */
|
||||
.quad 0x3FF0002D5C6E8412 /* A01 = +1.000043259679163742959e+00 */
|
||||
.quad 0xBF4BAF02BD7FDD70 /* A02 = -8.448375110664940040861e-04 */
|
||||
.quad 0xBFD4EFEE6527A7DE /* A03 = -3.271442401734229177279e-01 */
|
||||
.quad 0xBEB16E3EBE2157D0 /* A00 = -1.038947396133402500647e-06 */
|
||||
.quad 0x3FF00038990FEE2F /* A01 = +1.000053975962952312884e+00 */
|
||||
.quad 0xBF50569481C574CB /* A02 = -9.972048056490652716971e-04 */
|
||||
.quad 0xBFD4E419278DA2B4 /* A03 = -3.264220129263251113372e-01 */
|
||||
.quad 0xBEB6A7B6723165D4 /* A00 = -1.350350836279403750524e-06 */
|
||||
.quad 0x3FF00045CAB4158E /* A01 = +1.000066558657042303793e+00 */
|
||||
.quad 0xBF531D7C9C849108 /* A02 = -1.166698160951775212202e-03 */
|
||||
.quad 0xBFD4D7A0BB33B152 /* A03 = -3.256608799117844954552e-01 */
|
||||
.quad 0xBEBD0EE2A8654AFD /* A00 = -1.732000471561702711532e-06 */
|
||||
.quad 0x3FF00055276F18D6 /* A01 = +1.000081209219890521211e+00 */
|
||||
.quad 0xBF562FDBA3FB6C6C /* A02 = -1.354183666925102939860e-03 */
|
||||
.quad 0xBFD4CA85F1B93DB2 /* A03 = -3.248610363561638125773e-01 */
|
||||
.quad 0xBEC269D4036A207E /* A00 = -2.195047297096822741730e-06 */
|
||||
.quad 0x3FF00066E7DA6E4E /* A01 = +1.000098138500919997540e+00 */
|
||||
.quad 0xBF5991499FC36B3A /* A02 = -1.560518167983372759405e-03 */
|
||||
.quad 0xBFD4BCC9A72283D6 /* A03 = -3.240226871658341556426e-01 */
|
||||
.quad 0xBEC7154B6C09CFE1 /* A00 = -2.751729738565190291276e-06 */
|
||||
.quad 0x3FF0007B47086B80 /* A01 = +1.000117566559055148900e+00 */
|
||||
.quad 0xBF5D455433B4F8F4 /* A02 = -1.786548832412968197680e-03 */
|
||||
.quad 0xBFD4AE6CC1BFE145 /* A03 = -3.231460468373550942722e-01 */
|
||||
.quad 0xBECCA68CC64A0F8A /* A00 = -3.415415948561670285790e-06 */
|
||||
.quad 0x3FF00092827742F7 /* A01 = +1.000139722473418535387e+00 */
|
||||
.quad 0xBF60A7BF15A527AF /* A02 = -2.033112728132522705610e-03 */
|
||||
.quad 0xBFD49F703214084C /* A03 = -3.222313393636155876010e-01 */
|
||||
.quad 0xBED19E68676B241B /* A00 = -4.200644630977303616698e-06 */
|
||||
.quad 0x3FF000ACDA037B26 /* A01 = +1.000164844146362863597e+00 */
|
||||
.quad 0xBF62D99F836A02F8 /* A02 = -2.301036405072284102280e-03 */
|
||||
.quad 0xBFD48FD4F2B91B28 /* A03 = -3.212787981359945810311e-01 */
|
||||
.quad 0xBED57CF4B0C7AA54 /* A00 = -5.123164339408145209103e-06 */
|
||||
.quad 0x3FF000CA8FD9E1A1 /* A01 = +1.000193178099017865534e+00 */
|
||||
.quad 0xBF653A014548E686 /* A02 = -2.591135484433962181405e-03 */
|
||||
.quad 0xBFD47F9C0844B38F /* A03 = -3.202886658426046806447e-01 */
|
||||
.quad 0xBEDA012B1B1A41E2 /* A00 = -6.199971197454598722328e-06 */
|
||||
.quad 0x3FF000EBE868FDF4 /* A01 = +1.000224979259539459520e+00 */
|
||||
.quad 0xBF67CA9427E0A544 /* A02 = -2.904214255086275467410e-03 */
|
||||
.quad 0xBFD46EC6812ADB37 /* A03 = -3.192611943626845749655e-01 */
|
||||
.quad 0xBEDF3EAC5BF12194 /* A00 = -7.449344990702664567927e-06 */
|
||||
.quad 0x3FF001112A520784 /* A01 = +1.000260510744255704196e+00 */
|
||||
.quad 0xBF6A8D01ABDA4DC4 /* A02 = -3.241065277345108255891e-03 */
|
||||
.quad 0xBFD45D55759FFA4A /* A03 = -3.181966446572103146551e-01 */
|
||||
.quad 0xBEE2A541BC274267 /* A00 = -8.890883582164319970972e-06 */
|
||||
.quad 0x3FF0013A9E5961F2 /* A01 = +1.000300043631906721231e+00 */
|
||||
.quad 0xBF6D82ECD080C540 /* A02 = -3.602468994380686462264e-03 */
|
||||
.quad 0xBFD44B4A0779C0AD /* A03 = -3.170952866557950611259e-01 */
|
||||
.quad 0xBEE61D97609A27F4 /* A00 = -1.054553560499505625520e-05 */
|
||||
.quad 0x3FF001688F56A3AF /* A01 = +1.000343856731187974773e+00 */
|
||||
.quad 0xBF7056F8EFB683EC /* A02 = -3.989193351487490407647e-03 */
|
||||
.quad 0xBFD438A5620F0F74 /* A03 = -3.159573991399533543500e-01 */
|
||||
.quad 0xBEEA145429EDD370 /* A00 = -1.243563138839952927732e-05 */
|
||||
.quad 0x3FF0019B4A242A67 /* A01 = +1.000392236341804297339e+00 */
|
||||
.quad 0xBF7207D31CA78D9B /* A02 = -4.401993423445739288258e-03 */
|
||||
.quad 0xBFD42568BA16E7CD /* A03 = -3.147832696228050619602e-01 */
|
||||
.quad 0xBEEE96370D52680F /* A00 = -1.458491207477835326165e-05 */
|
||||
.quad 0x3FF001D31D8E4115 /* A01 = +1.000445476009251821736e+00 */
|
||||
.quad 0xBF73D4CC11EDC094 /* A02 = -4.841611050196221316400e-03 */
|
||||
.quad 0xBFD411954D8664E7 /* A03 = -3.135731942252974469021e-01 */
|
||||
.quad 0xBEF338C046215EF8 /* A00 = -1.833122622260562810219e-05 */
|
||||
.quad 0x3FF00230C32C2EC1 /* A01 = +1.000534784691737621998e+00 */
|
||||
.quad 0xBF76BD019BCC5DAF /* A02 = -5.551344188254799492943e-03 */
|
||||
.quad 0xBFD3F2C7156DC21E /* A03 = -3.116929730668135389848e-01 */
|
||||
.quad 0xBEF9B15EAE411EAE /* A00 = -2.450261207822986676092e-05 */
|
||||
.quad 0x3FF002C2DF057A4D /* A01 = +1.000674124886830940184e+00 */
|
||||
.quad 0xBF7B08CCD9AC1E30 /* A02 = -6.600189396301511801646e-03 */
|
||||
.quad 0xBFD3C7A7A114FED8 /* A03 = -3.090609620157755976777e-01 */
|
||||
.quad 0xBF00E36483C373B3 /* A00 = -3.221178528332122595812e-05 */
|
||||
.quad 0x3FF0036F419480D7 /* A01 = +1.000838524028997644777e+00 */
|
||||
.quad 0xBF7FD255D1777007 /* A02 = -7.768950679260206403087e-03 */
|
||||
.quad 0xBFD39A453911D6CE /* A03 = -3.062909180947429588215e-01 */
|
||||
.quad 0xBF05DFA04DD12059 /* A00 = -4.172046622180685472624e-05 */
|
||||
.quad 0x3FF00438B2A03D8D /* A01 = +1.001030633695197069599e+00 */
|
||||
.quad 0xBF828F8DBB4A9D10 /* A02 = -9.062869337255224921890e-03 */
|
||||
.quad 0xBFD36AAB704697D9 /* A03 = -3.033856007044711255993e-01 */
|
||||
.quad 0xBF0BF3E0C647DEFB /* A00 = -5.331544597092331081714e-05 */
|
||||
.quad 0x3FF005221063D36D /* A01 = +1.001253189109060359741e+00 */
|
||||
.quad 0xBF857A2CB3C96102 /* A02 = -1.048693584122917590862e-02 */
|
||||
.quad 0xBFD338E65BBB4FEC /* A03 = -3.003478904549854444639e-01 */
|
||||
.quad 0xBF11A506ED7C9D31 /* A00 = -6.730894835681591541979e-05 */
|
||||
.quad 0x3FF0062E4D0EA92A /* A01 = +1.001508999829250345925e+00 */
|
||||
.quad 0xBF88AB82C2761AF3 /* A02 = -1.204588085125866091241e-02 */
|
||||
.quad 0xBFD305028D6BD206 /* A03 = -2.971807843271395688234e-01 */
|
||||
.quad 0xBF1607C0922D9BF1 /* A00 = -8.403885708006799337092e-05 */
|
||||
.quad 0x3FF007606C341961 /* A01 = +1.001800940198869449560e+00 */
|
||||
.quad 0xBF8C25E6DA487BCF /* A02 = -1.374416688582682892494e-02 */
|
||||
.quad 0xBFD2CF0D0EE8F7B5 /* A03 = -2.938873906713255768075e-01 */
|
||||
.quad 0xBF1B3A8480A0A16D /* A00 = -1.038688061788578038307e-04 */
|
||||
.quad 0x3FF008BB802D02D6 /* A01 = +1.002131939589323561535e+00 */
|
||||
.quad 0xBF8FEB8AE99FD100 /* A02 = -1.558598065819483124983e-02 */
|
||||
.quad 0xBFD297135BD0911B /* A03 = -2.904709240558688843059e-01 */
|
||||
.quad 0xBF20ABB9BDB75C65 /* A00 = -1.271881327357976163798e-04 */
|
||||
.quad 0x3FF00A42A76D8CD1 /* A01 = +1.002504972472525901495e+00 */
|
||||
.quad 0xBF91FF3D752BB9E6 /* A02 = -1.757522609380570560722e-02 */
|
||||
.quad 0xBFD25D235C1F88B4 /* A03 = -2.869346999779154305799e-01 */
|
||||
.quad 0xBF243D3254425461 /* A00 = -1.544116913733432829448e-04 */
|
||||
.quad 0x3FF00BF909D1795E /* A01 = +1.002923048355647051011e+00 */
|
||||
.quad 0xBF94304E04D44942 /* A02 = -1.971551804042204897316e-02 */
|
||||
.quad 0xBFD2214B5E61CFA6 /* A03 = -2.832821294498394371075e-01 */
|
||||
.quad 0xBF286070011B61CE /* A00 = -1.859795307186510085994e-04 */
|
||||
.quad 0x3FF00DE1D5E1627E /* A01 = +1.003389201612804537689e+00 */
|
||||
.quad 0xBF9689D5F4163F59 /* A02 = -2.201017668045266231780e-02 */
|
||||
.quad 0xBFD1E39A11C3B42C /* A03 = -2.795167134743816728104e-01 */
|
||||
.quad 0xBF2D250B366A79E8 /* A00 = -2.223564326486314902259e-04 */
|
||||
.quad 0x3FF010003E134001 /* A01 = +1.003906481248123094829e+00 */
|
||||
.quad 0xBF990C9FF91F6F81 /* A02 = -2.446222265267250853271e-02 */
|
||||
.quad 0xBFD1A41E80084CDC /* A03 = -2.756420374218586655246e-01 */
|
||||
.quad 0xBF314DB5DDC2A30E /* A00 = -2.640313157465248123865e-04 */
|
||||
.quad 0x3FF012577608921B /* A01 = +1.004477940624503018441e+00 */
|
||||
.quad 0xBF9BB9626875B0C9 /* A02 = -2.707437288829409385849e-02 */
|
||||
.quad 0xBFD162E80768A9D0 /* A03 = -2.716617653228725615122e-01 */
|
||||
.quad 0xBF346A6133808864 /* A00 = -3.115165050094957730625e-04 */
|
||||
.quad 0x3FF014EAAFCC88A3 /* A01 = +1.005106627192198898157e+00 */
|
||||
.quad 0xBF9E90BEF9BF7419 /* A02 = -2.984903716411588595059e-02 */
|
||||
.quad 0xBFD12006545F7FAD /* A03 = -2.675796340899932457269e-01 */
|
||||
.quad 0xBF37F180DC3848EA /* A00 = -3.653468704395550778821e-04 */
|
||||
.quad 0x3FF017BD19147861 /* A01 = +1.005795572250939295955e+00 */
|
||||
.quad 0xBFA0C9A14C702E07 /* A02 = -3.278831537326359207851e-02 */
|
||||
.quad 0xBFD0DB895B650092 /* A03 = -2.633994476818851682154e-01 */
|
||||
.quad 0xBF3BEC6AAC6D7635 /* A00 = -4.260788377246944457107e-04 */
|
||||
.quad 0x3FF01AD1D884E719 /* A01 = +1.006547780778822565040e+00 */
|
||||
.quad 0xBFA260B2A1B1434A /* A02 = -3.589399551186163439542e-02 */
|
||||
.quad 0xBFD09581529E93D6 /* A03 = -2.591250712233067465817e-01 */
|
||||
.quad 0xBF4164E26167882B /* A00 = -5.308251737086202562063e-04 */
|
||||
.quad 0x3FF01FEF14B62B81 /* A01 = +1.007796364693348545316e+00 */
|
||||
.quad 0xBFA4EB014538AA42 /* A02 = -4.085544557559163403315e-02 */
|
||||
.quad 0xBFD029D36FEAF41F /* A03 = -2.525528519580024222613e-01 */
|
||||
.quad 0xBF46F6FFF4E53DC8 /* A00 = -7.008313930700277652464e-04 */
|
||||
.quad 0x3FF027CBB51CBBA0 /* A01 = +1.009715754956893363214e+00 */
|
||||
.quad 0xBFA89DEC9FEC112E /* A02 = -4.807986690687680864098e-02 */
|
||||
.quad 0xBFCF2A99464D0DB4 /* A03 = -2.434875100390009317053e-01 */
|
||||
.quad 0xBF4DCC9C4F66A4D9 /* A00 = -9.094012482836712945103e-04 */
|
||||
.quad 0x3FF030E7CFCCD583 /* A01 = +1.011939822882909068014e+00 */
|
||||
.quad 0xBFACAA3B95814081 /* A02 = -5.598627281199331645611e-02 */
|
||||
.quad 0xBFCDF78F156BE7CF /* A03 = -2.341173987004467604844e-01 */
|
||||
.quad 0xBF5308ED74E5C7A6 /* A00 = -1.161796466103906435435e-03 */
|
||||
.quad 0x3FF03B5986412ECB /* A01 = +1.014489674026594512313e+00 */
|
||||
.quad 0xBFB087EBA88DCC3F /* A02 = -6.457398285947223148806e-02 */
|
||||
.quad 0xBFCCBB9BD134862F /* A03 = -2.244753619680052991736e-01 */
|
||||
.quad 0xBF57FA23C00DF4B5 /* A00 = -1.463446533505758208674e-03 */
|
||||
.quad 0x3FF0473558A1BCC0 /* A01 = +1.017384859292903342975e+00 */
|
||||
.quad 0xBFB2E702BC6360EF /* A02 = -7.383744334527241048871e-02 */
|
||||
.quad 0xBFCB77D546379288 /* A03 = -2.145945160729250122955e-01 */
|
||||
.quad 0xBF5DD12971557F71 /* A00 = -1.819887610814388068450e-03 */
|
||||
.quad 0x3FF0548DDF5000A8 /* A01 = +1.020643112482540360020e+00 */
|
||||
.quad 0xBFB571B63DA186E1 /* A02 = -8.376635555898871710045e-02 */
|
||||
.quad 0xBFCA2D5202605148 /* A03 = -2.045080672838912594358e-01 */
|
||||
.quad 0xBF6252B1AD5D4F17 /* A00 = -2.236697221556737096709e-03 */
|
||||
.quad 0x3FF063738A910BF7 /* A01 = +1.024280110622155737232e+00 */
|
||||
.quad 0xBFB8270C8E6B601B /* A02 = -9.434584118878357184013e-02 */
|
||||
.quad 0xBFC8DD27D950A07E /* A03 = -1.942491351230763441116e-01 */
|
||||
.quad 0xBF66470C91730CFC /* A00 = -2.719425723258004842786e-03 */
|
||||
.quad 0x3FF073F468FCF331 /* A01 = +1.028309259519300633556e+00 */
|
||||
.quad 0xBFBB05C2952191E4 /* A02 = -1.055566419686964629854e-01 */
|
||||
.quad 0xBFC7886A770DE2BD /* A03 = -1.838505822486435070662e-01 */
|
||||
.quad 0xBF6AD114AC8E98EC /* A00 = -3.273525599485007861467e-03 */
|
||||
.quad 0x3FF0861BF53E5226 /* A01 = +1.032741506559554434119e+00 */
|
||||
.quad 0xBFBE0C4F9B461507 /* A02 = -1.173753503881763554650e-01 */
|
||||
.quad 0xBFC6302A037CDE3A /* A03 = -1.733448521642786954722e-01 */
|
||||
.quad 0xBF6FFBDE2A6C2AF8 /* A00 = -3.904279630096648551207e-03 */
|
||||
.quad 0x3FF099F2EB8E7DA3 /* A01 = +1.037585182326304034106e+00 */
|
||||
.quad 0xBFC09C74D192DDF0 /* A02 = -1.297746680554463516444e-01 */
|
||||
.quad 0xBFC4D571D8E3079F /* A03 = -1.627638157861470424859e-01 */
|
||||
.quad 0xBF72E8FDC0B952AA /* A00 = -4.616728994353872309042e-03 */
|
||||
.quad 0x3FF0AF7F273C9533 /* A01 = +1.042845872181101141152e+00 */
|
||||
.quad 0xBFC244C512736F10 /* A02 = -1.427236881344176033792e-01 */
|
||||
.quad 0xBFC379474F58B902 /* A03 = -1.521386277613104298645e-01 */
|
||||
.quad 0xBF762EABAF17395B /* A00 = -5.415602341101023557701e-03 */
|
||||
.quad 0x3FF0C6C3886F63FB /* A01 = +1.048526318502125631582e+00 */
|
||||
.quad 0xBFC3FDF9918EA12A /* A02 = -1.561881981590514389957e-01 */
|
||||
.quad 0xBFC21CA89ECAB895 /* A03 = -1.414995932913753196036e-01 */
|
||||
.quad 0xBF79D387CE5B2BAE /* A00 = -6.305246822828998107258e-03 */
|
||||
.quad 0x3FF0DFBFE2346376 /* A01 = +1.054626353847394337748e+00 */
|
||||
.quad 0xBFC5C6DA43602620 /* A02 = -1.701309994680721970894e-01 */
|
||||
.quad 0xBFC0C08BD8DB6631 /* A03 = -1.308760460731704100557e-01 */
|
||||
.quad 0xBF7DDBA8E8DA9060 /* A00 = -7.289562037531366334164e-03 */
|
||||
.quad 0x3FF0FA70F0D1B464 /* A01 = +1.061142864894713433443e+00 */
|
||||
.quad 0xBFC79E18D92BAA7C /* A02 = -1.845122394946264732241e-01 */
|
||||
.quad 0xBFBECBBBF74C2669 /* A03 = -1.202962378266875381749e-01 */
|
||||
.quad 0xBF81254E76EA25DA /* A00 = -8.371937755572145950511e-03 */
|
||||
.quad 0x3FF116D05835EBD0 /* A01 = +1.068069786618014660462e+00 */
|
||||
.quad 0xBFC982539E2ED224 /* A02 = -1.992897531869327609755e-01 */
|
||||
.quad 0xBFBC1B043C350159 /* A03 = -1.097872397413132278254e-01 */
|
||||
.quad 0xBF8391ACBA863403 /* A00 = -9.555196230190082448686e-03 */
|
||||
.quad 0x3FF134D4AA477FE2 /* A01 = +1.075398125794884141015e+00 */
|
||||
.quad 0xBFCB7218609FEAFB /* A02 = -2.144194099235717521079e-01 */
|
||||
.quad 0xBFB970A16CB88329 /* A03 = -9.937485603633135211599e-02 */
|
||||
.quad 0xBF87935088E48E8B /* A00 = -1.151144902957603431692e-02 */
|
||||
.quad 0x3FF1649892AD7DD3 /* A01 = +1.087059567413110938716e+00 */
|
||||
.quad 0xBFCE6971DDE75409 /* A02 = -2.375929196847723912089e-01 */
|
||||
.quad 0xBFB58291E88CB251 /* A03 = -8.402358939628952472223e-02 */
|
||||
.quad 0xBF8DB3A62C325325 /* A00 = -1.450280973794233242702e-02 */
|
||||
.quad 0x3FF1A9C900C6DEEA /* A01 = +1.103951457056548068891e+00 */
|
||||
.quad 0xBFD13DBC65B0E08E /* A02 = -2.693930619311765140012e-01 */
|
||||
.quad 0xBFB06696F62696D1 /* A03 = -6.406539449252625362252e-02 */
|
||||
.quad 0xBF92583699F2E27A /* A00 = -1.791463198307716858659e-02 */
|
||||
.quad 0x3FF1F451B85AA9F0 /* A01 = +1.122148246892376022288e+00 */
|
||||
.quad 0xBFD34FD5F8288180 /* A02 = -3.017477916164565954205e-01 */
|
||||
.quad 0xBFA6FB692825B683 /* A03 = -4.488686194495718900788e-02 */
|
||||
.quad 0xBF9641C26E673D6F /* A00 = -2.173522757385398448959e-02 */
|
||||
.quad 0x3FF24364DA5E2B07 /* A01 = +1.141453602790251542487e+00 */
|
||||
.quad 0xBFD564A5A5EF5890 /* A02 = -3.342680092295120530821e-01 */
|
||||
.quad 0xBF9B43712011A982 /* A03 = -2.662445791467283467968e-02 */
|
||||
.quad 0xBF9A901038EC2F39 /* A00 = -2.594018313816024226548e-02 */
|
||||
.quad 0x3FF2961356DFFEBA /* A01 = +1.161639537196534011088e+00 */
|
||||
.quad 0xBFD775EBB17198C7 /* A02 = -3.665723069046972759644e-01 */
|
||||
.quad 0xBF833B1A926CD462 /* A03 = -9.390075295963199591975e-03 */
|
||||
.quad 0xBF9F396A6A461B91 /* A00 = -3.049246095317987084727e-02 */
|
||||
.quad 0x3FF2EB53BAEF534B /* A01 = +1.182452898229899629357e+00 */
|
||||
.quad 0xBFD97DABF8AD8BBD /* A02 = -3.982953957076310058660e-01 */
|
||||
.quad 0x3F7B8F6A3E0F8837 /* A03 = +6.728568086119371925713e-03 */
|
||||
.quad 0xBFA21878590F8BAA /* A00 = -3.534294211546946951064e-02 */
|
||||
.quad 0x3FF34209790236E1 /* A01 = +1.203622315111197105253e+00 */
|
||||
.quad 0xBFDB764C0E71BECB /* A02 = -4.290952817018306997277e-01 */
|
||||
.quad 0x3F962FE0C03F84C0 /* A03 = +2.166701482190513949888e-02 */
|
||||
.quad 0xBFA4B36B9AD27ECC /* A00 = -4.043136849327097492868e-02 */
|
||||
.quad 0x3FF3990C5B12FC16 /* A01 = +1.224865298994477935679e+00 */
|
||||
.quad 0xBFDD5AABB0D01390 /* A02 = -4.586590983092770912322e-01 */
|
||||
.quad 0x3FA21DAF5CA162DB /* A03 = +3.538272863142363083844e-02 */
|
||||
.quad 0xBFA7645E4D7BF28B /* A00 = -4.568762489177399105378e-02 */
|
||||
.quad 0x3FF3EF2FD51C0D9F /* A01 = +1.245895225962932562069e+00 */
|
||||
.quad 0xBFDF26377E1B686E /* A02 = -4.867075664057044503963e-01 */
|
||||
.quad 0x3FA8803E756EE812 /* A03 = +4.785342391501513914509e-02 */
|
||||
.quad 0xBFAA210925C64413 /* A00 = -5.103329263796054643398e-02 */
|
||||
.quad 0x3FF44349F897D8E7 /* A01 = +1.266427966181760345066e+00 */
|
||||
.quad 0xBFE06A7B02C6D8E2 /* A02 = -5.129981092675530707226e-01 */
|
||||
.quad 0x3FAE3F194734F5D0 /* A03 = +5.907515520309980505687e-02 */
|
||||
.quad 0xBFACDE48F8A19BBB /* A00 = -5.638340029764018351832e-02 */
|
||||
.quad 0x3FF49439D5466582 /* A01 = +1.286187966447272845727e+00 */
|
||||
.quad 0xBFE131C7C1063DDC /* A02 = -5.373266954429101183166e-01 */
|
||||
.quad 0x3FB1ADEEC36AD805 /* A03 = +6.906025191241844940482e-02 */
|
||||
.quad 0xBFAF905D8F585680 /* A00 = -6.164829611604449866036e-02 */
|
||||
.quad 0x3FF4E0ED1FD27F99 /* A01 = +1.304913639360142818546e+00 */
|
||||
.quad 0xBFE1E7A859DC1D3D /* A02 = -5.595285182070380836095e-01 */
|
||||
.quad 0x3FB3ED018E4642A1 /* A03 = +7.783517573831001679086e-02 */
|
||||
.quad 0xBFB11595104160BA /* A00 = -6.673556944713512906198e-02 */
|
||||
.quad 0x3FF528650340490B /* A01 = +1.322361958217302513319e+00 */
|
||||
.quad 0xBFE28B14B40BC974 /* A02 = -5.794776455425521000109e-01 */
|
||||
.quad 0x3FB5DF49F5BAF6D7 /* A03 = +8.543836831355676453281e-02 */
|
||||
.quad 0xBFB2513A97344BA4 /* A00 = -7.155195418844911836587e-02 */
|
||||
.quad 0x3FF569BA0DB5EE14 /* A01 = +1.338312200124055273420e+00 */
|
||||
.quad 0xBFE31B53A8B67B20 /* A02 = -5.970857901737396389308e-01 */
|
||||
.quad 0x3FB787F297BB0544 /* A03 = +9.191814617499455275507e-02 */
|
||||
.quad 0xBFB37512E848FAFA /* A00 = -7.600515528700305112331e-02 */
|
||||
.quad 0x3FF5A41F33B403C8 /* A01 = +1.352568819013173495591e+00 */
|
||||
.quad 0xBFE397F6EA9A58A5 /* A02 = -6.123003561103997904880e-01 */
|
||||
.quad 0x3FB8EAA9FF25CA06 /* A03 = +9.733068923177520814782e-02 */
|
||||
.quad 0xBFB47B3E603AFC5D /* A00 = -8.000554894805263217439e-02 */
|
||||
.quad 0x3FF5D6E3EDE40487 /* A01 = +1.364963464031718975988e+00 */
|
||||
.quad 0xBFE400D5BCA6D631 /* A02 = -6.251019177058819709103e-01 */
|
||||
.quad 0x3FBA0B830ED567FE /* A03 = +1.017381583418739132707e-01 */
|
||||
.quad 0xBFB5BBFE8AC90496 /* A00 = -8.489981544791400103200e-02 */
|
||||
.quad 0x3FF612BA70107E95 /* A01 = +1.379572332145390989311e+00 */
|
||||
.quad 0xBFE477EAF1FA7693 /* A02 = -6.396383978023599814478e-01 */
|
||||
.quad 0x3FBB4784B7C08A95 /* A03 = +1.065600346196709652391e-01 */
|
||||
.quad 0xBFB6D5D940743939 /* A00 = -8.920057128509463473254e-02 */
|
||||
.quad 0x3FF644A8748F70CE /* A01 = +1.391762214006166953340e+00 */
|
||||
.quad 0xBFE4D646AB07EA37 /* A02 = -6.511567440459832267763e-01 */
|
||||
.quad 0x3FBC354F4E1D5292 /* A03 = +1.101884427747086558913e-01 */
|
||||
.quad 0xBFB7223D19E4F3D1 /* A00 = -9.036619074045339206069e-02 */
|
||||
.quad 0x3FF6518FEB42B7FA /* A01 = +1.394912642466350494175e+00 */
|
||||
.quad 0xBFE4ED86CB87498C /* A02 = -6.539949393430091184598e-01 */
|
||||
.quad 0x3FBC6D29F28CCA9B /* A03 = +1.110407082713131127205e-01 */
|
||||
.quad 0xBFB6878652FF6312 /* A00 = -8.800544287022329936754e-02 */
|
||||
.quad 0x3FF63948C302D040 /* A01 = +1.388985406648330922508e+00 */
|
||||
.quad 0xBFE4C4E2E7904E17 /* A02 = -6.490339777687407218920e-01 */
|
||||
.quad 0x3FBC127356CA1ABE /* A03 = +1.096565329445224612481e-01 */
|
||||
.quad 0xBFB4F5D18B0C91D6 /* A00 = -8.187589306596207427980e-02 */
|
||||
.quad 0x3FF5FD27EB7DD0B8 /* A01 = +1.374305648697413673176e+00 */
|
||||
.quad 0xBFE464E01A2B2FC6 /* A02 = -6.373138915164353601739e-01 */
|
||||
.quad 0x3FBB460547674A30 /* A03 = +1.065371798825160976065e-01 */
|
||||
.quad 0xBFB26642FA16A685 /* A00 = -7.187288861919156890412e-02 */
|
||||
.quad 0x3FF59F9BEDE1C95A /* A01 = +1.351467065073470141812e+00 */
|
||||
.quad 0xBFE3D67920C8FBEA /* A02 = -6.199308052381387046381e-01 */
|
||||
.quad 0x3FBA24F6A8D3CBC1 /* A03 = +1.021265184570401413078e-01 */
|
||||
.quad 0xBFADB5294794F097 /* A00 = -5.802277563859197656582e-02 */
|
||||
.quad 0x3FF523EA7B9CF453 /* A01 = +1.321268542159732772845e+00 */
|
||||
.quad 0xBFE322A8B55E35DB /* A02 = -5.979808370918208160205e-01 */
|
||||
.quad 0x3FB8C8673B1B3E37 /* A03 = +9.680791085269722928697e-02 */
|
||||
.quad 0xBFA4B7D661965C6A /* A00 = -4.046506825687219699450e-02 */
|
||||
.quad 0x3FF48DE3E2CE3122 /* A01 = +1.284641157110919085227e+00 */
|
||||
.quad 0xBFE251FED1A7F445 /* A02 = -5.725092024655472622285e-01 */
|
||||
.quad 0x3FB745699FCABDB9 /* A03 = +9.090290213747821701507e-02 */
|
||||
.quad 0xBF93E60456E4EE1D /* A00 = -1.943213253365004902773e-02 */
|
||||
.quad 0x3FF3E1A14E628A59 /* A01 = +1.242585474196536532432e+00 */
|
||||
.quad 0xBFE16C5AB660E876 /* A02 = -5.444768488007543094653e-01 */
|
||||
.quad 0x3FB5AD33AA8C188F /* A03 = +8.467410005332197397987e-02 */
|
||||
.quad 0x3F738C17C47C7961 /* A00 = +4.772274820224659853951e-03 */
|
||||
.quad 0x3FF3234DDE3BD146 /* A01 = +1.196119182682268355933e+00 */
|
||||
.quad 0xBFE078C0D77A9D3B /* A02 = -5.147403915952176722826e-01 */
|
||||
.quad 0x3FB40D74B3E276B8 /* A03 = +7.833032027925923568290e-02 */
|
||||
.quad 0x3FA0474BECC689C7 /* A00 = +3.179394975019849550746e-02 */
|
||||
.quad 0x3FF256FB4FA7D18A /* A01 = +1.146235762743432307076e+00 */
|
||||
.quad 0xBFDEFA8E3FB285E2 /* A02 = -4.840427038235174395098e-01 */
|
||||
.quad 0x3FB270C007493D59 /* A03 = +7.203293016322244446403e-02 */
|
||||
.quad 0x3FAF5BD51E479BDC /* A00 = +6.124750132203590768931e-02 */
|
||||
.quad 0x3FF18081D0B53BC5 /* A01 = +1.093873801484492647162e+00 */
|
||||
.quad 0xBFDCFE2439BD0C03 /* A02 = -4.530115665294831006626e-01 */
|
||||
.quad 0x3FB0DEFE5A45AFDD /* A03 = +6.590261176978580437424e-02 */
|
||||
.quad 0x3FB7BD5D2806EA26 /* A00 = +9.273321368429118805032e-02 */
|
||||
.quad 0x3FF0A369E35B4440 /* A01 = +1.039895904647224256223e+00 */
|
||||
.quad 0xBFDB04BC5C9951E7 /* A02 = -4.221640495573226181669e-01 */
|
||||
.quad 0x3FAEBBBAA9D6DEEF /* A03 = +6.002600978120919278380e-02 */
|
||||
.quad 0x3FC01BE411098DBC /* A00 = +1.258511622610124502941e-01 */
|
||||
.quad 0x3FEF85BDABC031C1 /* A01 = +9.850757936961188621083e-01 */
|
||||
.quad 0xBFD91521375097C2 /* A02 = -3.919146576102968682065e-01 */
|
||||
.quad 0x3FABE26F0086D982 /* A03 = +5.446192628317005068883e-02 */
|
||||
.quad 0x3FC481D7FF5776B9 /* A00 = +1.602125164781023347604e-01 */
|
||||
.quad 0x3FEDC3506C1E7218 /* A01 = +9.300920592973538347792e-01 */
|
||||
.quad 0xBFD7349A88DA7D4F /* A02 = -3.625856720409119104964e-01 */
|
||||
.quad 0x3FA936E2DFF8E2AE /* A03 = +4.924687370334389358018e-02 */
|
||||
.quad 0x3FC90471F96FA27A /* A00 = +1.954481571149420671141e-01 */
|
||||
.quad 0x3FEC0451601987A2 /* A01 = +8.755270840595026360376e-01 */
|
||||
.quad 0xBFD5671CD4B898DC /* A02 = -3.344184949259110251063e-01 */
|
||||
.quad 0x3FA6BB9594603B67 /* A03 = +4.439990459660841243261e-02 */
|
||||
.quad 0x3FCFD8ADB9ED944C /* A00 = +2.488000066615846384011e-01 */
|
||||
.quad 0x3FE978C073F6809A /* A01 = +7.959902062321078108909e-01 */
|
||||
.quad 0xBFD2DF7E00BCD5A9 /* A02 = -2.948908812716931060471e-01 */
|
||||
.quad 0x3FA3614033D490B2 /* A03 = +3.785133965200894456959e-02 */
|
||||
.quad 0x3FD4846A12AFE5A0 /* A00 = +3.205819303981005674586e-01 */
|
||||
.quad 0x3FE63A1147D40472 /* A01 = +6.945883181471244061100e-01 */
|
||||
.quad 0xBFCFA2268AD34450 /* A02 = -2.471359422548027318101e-01 */
|
||||
.quad 0x3F9F150201D9FFE0 /* A03 = +3.035357605267552383310e-02 */
|
||||
.quad 0x3FD9018641F82BEB /* A00 = +3.907180446846598154131e-01 */
|
||||
.quad 0x3FE33B7C220FFBDC /* A01 = +6.010113396913498995389e-01 */
|
||||
.quad 0xBFCA4E4187E29C86 /* A02 = -2.055131829740483584423e-01 */
|
||||
.quad 0x3F98C30CED19F8F4 /* A03 = +2.418155858185229434287e-02 */
|
||||
.quad 0x3FDD4B8255BEB078 /* A00 = +4.577337109901757905561e-01 */
|
||||
.quad 0x3FE0858B19D3A49B /* A01 = +5.163016800335243905451e-01 */
|
||||
.quad 0xBFC5BC929EACE564 /* A02 = -1.698172831327539045176e-01 */
|
||||
.quad 0x3F93A083CE57DE2B /* A03 = +1.916700312537337677621e-02 */
|
||||
.quad 0x3FE0A8E5E039295C /* A00 = +5.206174258576470315063e-01 */
|
||||
.quad 0x3FDC35E1234583FE /* A01 = +4.407885403107342225937e-01 */
|
||||
.quad 0xBFC1DE034E31AEB9 /* A02 = -1.395877963835710222629e-01 */
|
||||
.quad 0x3F8EFDEBB3471BDC /* A03 = +1.513275280821162888101e-02 */
|
||||
.quad 0x3FE2851B603CB2A5 /* A00 = +5.787484054213406503564e-01 */
|
||||
.quad 0x3FD7F4A44ABBB286 /* A01 = +3.743067483726821853551e-01 */
|
||||
.quad 0xBFBD3EEB67087DE7 /* A02 = -1.142413260026767657385e-01 */
|
||||
.quad 0x3F8864F38329E8BD /* A03 = +1.191129917173260922836e-02 */
|
||||
.quad 0x3FE437DBE3C34AC1 /* A00 = +6.318187187665317283702e-01 */
|
||||
.quad 0x3FD43F6F789441B5 /* A01 = +3.163717916040938438194e-01 */
|
||||
.quad 0xBFB7D92E7901B9A4 /* A02 = -9.315767721429907277653e-02 */
|
||||
.quad 0x3F8327ED342308E1 /* A03 = +9.353497651663324544136e-03 */
|
||||
.quad 0x3FE5C0977766D55C /* A00 = +6.797597248138731451661e-01 */
|
||||
.quad 0x3FD10B42A764D8F9 /* A01 = +2.663122782427219115142e-01 */
|
||||
.quad 0xBFB3633351D3D70F /* A02 = -7.573242900602060456716e-02 */
|
||||
.quad 0x3F7E079E30FF899C /* A03 = +7.331483779099558922843e-03 */
|
||||
.quad 0x3FE7202CE08A88C4 /* A00 = +7.226776490754436288455e-01 */
|
||||
.quad 0x3FCC973EB5662B01 /* A01 = +2.233656297433626314319e-01 */
|
||||
.quad 0xBFAF70A455F9920B /* A02 = -6.140626477716545211782e-02 */
|
||||
.quad 0x3F77812411CE99B6 /* A03 = +5.738392731393584730859e-03 */
|
||||
.quad 0x3FE85879424095B1 /* A00 = +7.608000082006382003286e-01 */
|
||||
.quad 0x3FC7E73BD1674D84 /* A01 = +1.867441914060742336190e-01 */
|
||||
.quad 0xBFA96F84E4BF333B /* A02 = -4.967894832916504993525e-02 */
|
||||
.quad 0x3F72606DDCA6E117 /* A03 = +4.486493251924870105662e-03 */
|
||||
.quad 0x3FE96BFE4957F4DD /* A00 = +7.944327766887472330737e-01 */
|
||||
.quad 0x3FC3ED4780D25478 /* A01 = +1.556786898624158421711e-01 */
|
||||
.quad 0xBFA489C5F9A56B58 /* A02 = -4.011362717093075458408e-02 */
|
||||
.quad 0x3F6CB5DC17E9AD2A /* A03 = +3.504686231556104931972e-03 */
|
||||
.quad 0x3FEA5D9CB2F41234 /* A00 = +8.239272589858672724006e-01 */
|
||||
.quad 0x3FC091A758374DCF /* A01 = +1.294449978582705440555e-01 */
|
||||
.quad 0xBFA08E436D4B5CE0 /* A02 = -3.233538350257858517978e-02 */
|
||||
.quad 0x3F666997AD53E6B7 /* A03 = +2.735897297154145629133e-03 */
|
||||
.quad 0x3FEB3060342CB850 /* A00 = +8.496552485501158713532e-01 */
|
||||
.quad 0x3FBB7D30BBC7DC1B /* A01 = +1.073790033768634993860e-01 */
|
||||
.quad 0xBF9AA6BA3443D9E3 /* A02 = -2.602663940430173170060e-02 */
|
||||
.quad 0x3F617CA764B7850B /* A03 = +2.134634914668814050648e-03 */
|
||||
.quad 0x3FEBE759A6A0C7B8 /* A00 = +8.719909910635044170135e-01 */
|
||||
.quad 0x3FB6C10DE6A703FF /* A01 = +8.888327485239243264115e-02 */
|
||||
.quad 0xBF956C566D8BE1F6 /* A02 = -2.092108768099084498138e-02 */
|
||||
.quad 0x3F5B46D1A4A59CF8 /* A03 = +1.664833764687232917079e-03 */
|
||||
.quad 0x3FEC858494887A04 /* A00 = +8.912985707318630268503e-01 */
|
||||
.quad 0x3FB2CC31F543394D /* A01 = +7.342827070099140762682e-02 */
|
||||
.quad 0xBF9133477FF69137 /* A02 = -1.679717749142747504343e-02 */
|
||||
.quad 0x3F5544482FBB4DA5 /* A03 = +1.298017973501022466823e-03 */
|
||||
.quad 0x3FED0DB59D0E32E9 /* A00 = +9.079235141267335551518e-01 */
|
||||
.quad 0x3FAF006BAFFC6EF4 /* A01 = +6.055008433597022787787e-02 */
|
||||
.quad 0xBF8B97146FA2B97A /* A02 = -1.347175565419144252499e-02 */
|
||||
.quad 0x3F5093B01F4CDC69 /* A03 = +1.011774057770665211434e-03 */
|
||||
.quad 0x3FEDB487C3EC457C /* A00 = +9.282873942012623835751e-01 */
|
||||
.quad 0x3FA7390C09D0BD1D /* A01 = +4.535710925881118044112e-02 */
|
||||
.quad 0xBF83D9F7C3181106 /* A02 = -9.693084374710735778846e-03 */
|
||||
.quad 0x3F46E34A0A3C0E64 /* A03 = +6.984817050299072134500e-04 */
|
||||
.quad 0x3FEE5FFCB4E6EB00 /* A00 = +9.492171796076434020506e-01 */
|
||||
.quad 0x3F9F4913ED00AADF /* A01 = +3.055220731782070861526e-02 */
|
||||
.quad 0xBF79670BD0E59B5C /* A02 = -6.201788097633133961528e-03 */
|
||||
.quad 0x3F3BC998EBCAF96D /* A03 = +4.240034429975534616304e-04 */
|
||||
.quad 0x3FEEDBA41E9542FE /* A00 = +9.643116566968215064293e-01 */
|
||||
.quad 0x3F94F5DD18D9C24D /* A01 = +2.046914543319848858727e-02 */
|
||||
.quad 0xBF7034896AA122B9 /* A02 = -3.956352980886528904192e-03 */
|
||||
.quad 0x3F30DCCB47810B39 /* A03 = +2.573009765038273091199e-04 */
|
||||
.quad 0x3FEF33F2882520ED /* A00 = +9.750912341196716903724e-01 */
|
||||
.quad 0x3F8BF37F2CF553FF /* A01 = +1.364802699996836392315e-02 */
|
||||
.quad 0xBF649F6F05A69619 /* A02 = -2.517430152880317534986e-03 */
|
||||
.quad 0x3F247623C950AAC9 /* A03 = +1.561087307505231250044e-04 */
|
||||
.quad 0x3FEF727757751741 /* A00 = +9.827229221489021115943e-01 */
|
||||
.quad 0x3F828E67912C4400 /* A01 = +9.060677640748693306705e-03 */
|
||||
.quad 0xBF5A2F51A806CC2C /* A02 = -1.598195784123355826789e-03 */
|
||||
.quad 0x3F18D35D7687E613 /* A03 = +9.470231965016282719549e-05 */
|
||||
.quad 0x3FEF9E6325C5942A /* A00 = +9.880843866091073568469e-01 */
|
||||
.quad 0x3F788AB117618F76 /* A01 = +5.991641772286606867914e-03 */
|
||||
.quad 0xBF5096EAB0B1EA89 /* A02 = -1.012543859160305046233e-03 */
|
||||
.quad 0x3F0E1E50EC4435AB /* A03 = +5.744633156910412119652e-05 */
|
||||
.quad 0x3FEFBD0784049369 /* A00 = +9.918248728250605994461e-01 */
|
||||
.quad 0x3F702BBD8294035F /* A01 = +3.947963975634432264028e-03 */
|
||||
.quad 0xBF44FB55E0F00593 /* A02 = -6.403130845457509273330e-04 */
|
||||
.quad 0x3F0244DCD723230A /* A03 = +3.484534217219031730379e-05 */
|
||||
.quad 0x3FEFD245E2366A43 /* A00 = +9.944180887426415926811e-01 */
|
||||
.quad 0x3F653D82EC088433 /* A01 = +2.592807490387838333795e-03 */
|
||||
.quad 0xBF3A7DF75E013CB8 /* A02 = -4.042366908878036561859e-04 */
|
||||
.quad 0x3EF6298E69F991CD /* A03 = +2.113564425911141559972e-05 */
|
||||
.quad 0x3FEFE0EAA508BC69 /* A00 = +9.962056372950317539861e-01 */
|
||||
.quad 0x3F5BD0771AF3FDDA /* A01 = +1.697651208644282514598e-03 */
|
||||
.quad 0xBF30B2E1254DE571 /* A02 = -2.548026725928887099328e-04 */
|
||||
.quad 0x3EEAE28B70EC0256 /* A03 = +1.281973848454955042307e-05 */
|
||||
.quad 0x3FEFEAF5303D7F96 /* A00 = +9.974313680831865536192e-01 */
|
||||
.quad 0x3F5229111365657E /* A01 = +1.108423877289460134782e-03 */
|
||||
.quad 0xBF250572D04DFE66 /* A02 = -1.603796628408704519168e-04 */
|
||||
.quad 0x3EE04E89BB57C981 /* A03 = +7.775682983689149966743e-06 */
|
||||
.quad 0x3FEFF1CF52F1CF44 /* A00 = +9.982678051005469122003e-01 */
|
||||
.quad 0x3F47A71316147CEB /* A01 = +7.218211359577819110842e-04 */
|
||||
.quad 0xBF1A6D7604055719 /* A02 = -1.008132248946049582547e-04 */
|
||||
.quad 0x3ED3C8047586A85C /* A03 = +4.716233739913014633626e-06 */
|
||||
.quad 0x3FEFF6770369EF69 /* A00 = +9.988360468555416149528e-01 */
|
||||
.quad 0x3F3EBB261180FBF0 /* A01 = +4.689186039321105101130e-04 */
|
||||
.quad 0xBF1097754FE19D7F /* A02 = -6.329206004950480057066e-05 */
|
||||
.quad 0x3EC7FEFF83BCA0A7 /* A03 = +2.860556404988488738366e-06 */
|
||||
.quad 0x3FEFF99D42371AC4 /* A00 = +9.992204945818561334647e-01 */
|
||||
.quad 0x3F33EB2AEC271F59 /* A01 = +3.039340773764907474054e-04 */
|
||||
.quad 0xBF04CF18E0FC0D79 /* A02 = -3.968996690952969588805e-05 */
|
||||
.quad 0x3EBD1BDBD6019BE9 /* A03 = +1.735021065507727833886e-06 */
|
||||
.quad 0x3FEFFBBCA32B0D91 /* A00 = +9.994795977476532700123e-01 */
|
||||
.quad 0x3F29C41E1615110A /* A01 = +1.965796209707565346710e-04 */
|
||||
.quad 0xBEFA11F93D9DCB5A /* A02 = -2.486248909101414873235e-05 */
|
||||
.quad 0x3EB1A7CA4546F7A7 /* A03 = +1.052345642723709228769e-06 */
|
||||
.quad 0x3FEFFD298B8E8DE2 /* A00 = +9.996535993308806045121e-01 */
|
||||
.quad 0x3F20A1C42D523C5B /* A01 = +1.268913244172078754520e-04 */
|
||||
.quad 0xBEF0507A364AFAE4 /* A02 = -1.555859070622834605755e-05 */
|
||||
.quad 0x3EA56ACA17E7CDF4 /* A03 = +6.382806956848098872313e-07 */
|
||||
.quad 0x3FEFFE1DC82BA5A3 /* A00 = +9.997700604991915929176e-01 */
|
||||
.quad 0x3F156E73B90F1769 /* A01 = +8.175450626798714452801e-05 */
|
||||
.quad 0xBEE4663579D0A09F /* A02 = -9.727122057226747625365e-06 */
|
||||
.quad 0x3E99FAF6FEC5D4C1 /* A03 = +3.871371052824002996020e-07 */
|
||||
.quad 0x3FEFFEF8D0BB5E81 /* A00 = +9.998745037837154514548e-01 */
|
||||
.quad 0x3F06686DA18D39C3 /* A01 = +4.273972098777251447726e-05 */
|
||||
.quad 0xBED46BC298073E90 /* A02 = -4.868731025855742842491e-06 */
|
||||
.quad 0x3E88E42286B9D0FD /* A03 = +1.854535328530838170114e-07 */
|
||||
.quad 0x3FEFFF8DBC68DDC7 /* A00 = +9.999455146670975791423e-01 */
|
||||
.quad 0x3EF26B2953A80AF0 /* A01 = +1.756534514108903368909e-05 */
|
||||
.quad 0xBEBFC4472D580F83 /* A02 = -1.893443529411295465239e-06 */
|
||||
.quad 0x3E72505B4553D19F /* A03 = +6.822456673547912277047e-08 */
|
||||
.quad 0x3FEFFFCED1276609 /* A00 = +9.999765477215883935358e-01 */
|
||||
.quad 0x3EDE1A94C7CC58F5 /* A01 = +7.177313020153979672606e-06 */
|
||||
.quad 0xBEA8A2C988744E57 /* A02 = -7.342066660497443762363e-07 */
|
||||
.quad 0x3E5AF30036BBBAF4 /* A03 = +2.509841882843541084885e-08 */
|
||||
.quad 0x3FEFFFEAFE70FCFC /* A00 = +9.999899835164849370983e-01 */
|
||||
.quad 0x3EC879175E3549F5 /* A01 = +2.917410471128503564412e-06 */
|
||||
.quad 0xBE930E36677D1813 /* A02 = -2.839493400307523115929e-07 */
|
||||
.quad 0x3E43D4005B42D48F /* A03 = +9.233192745401904898013e-09 */
|
||||
.quad 0x3ff0000000000000
|
||||
.quad 0x0000000000000000
|
||||
.quad 0x0000000000000000
|
||||
.quad 0x0000000000000000
|
||||
.align 32
|
||||
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 /* _sSignMask */
|
||||
.align 32
|
||||
.long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff /* _sAbsMask */
|
||||
.align 32
|
||||
.long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask */
|
||||
.align 32
|
||||
.long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 /* _iExpMask */
|
||||
.align 32
|
||||
.long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 /* _iMinIdxOfsMask */
|
||||
.align 32
|
||||
.long 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000, 0x04280000 /* _iMaxIdxMask */
|
||||
.align 32
|
||||
.type __svml_stanh_data_internal,@object
|
||||
.size __svml_stanh_data_internal,.-__svml_stanh_data_internal
|
29
sysdeps/x86_64/fpu/svml_d_tanh2_core.S
Normal file
29
sysdeps/x86_64/fpu/svml_d_tanh2_core.S
Normal file
@ -0,0 +1,29 @@
|
||||
/* Function tanh vectorized with SSE2.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_d_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVbN2v_tanh)
|
||||
WRAPPER_IMPL_SSE2 tanh
|
||||
END (_ZGVbN2v_tanh)
|
||||
|
||||
#ifndef USE_MULTIARCH
|
||||
libmvec_hidden_def (_ZGVbN2v_tanh)
|
||||
#endif
|
29
sysdeps/x86_64/fpu/svml_d_tanh4_core.S
Normal file
29
sysdeps/x86_64/fpu/svml_d_tanh4_core.S
Normal file
@ -0,0 +1,29 @@
|
||||
/* Function tanh vectorized with AVX2, wrapper version.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_d_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVdN4v_tanh)
|
||||
WRAPPER_IMPL_AVX _ZGVbN2v_tanh
|
||||
END (_ZGVdN4v_tanh)
|
||||
|
||||
#ifndef USE_MULTIARCH
|
||||
libmvec_hidden_def (_ZGVdN4v_tanh)
|
||||
#endif
|
25
sysdeps/x86_64/fpu/svml_d_tanh4_core_avx.S
Normal file
25
sysdeps/x86_64/fpu/svml_d_tanh4_core_avx.S
Normal file
@ -0,0 +1,25 @@
|
||||
/* Function tanh vectorized in AVX ISA as wrapper to SSE4 ISA version.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_d_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVcN4v_tanh)
|
||||
WRAPPER_IMPL_AVX _ZGVbN2v_tanh
|
||||
END (_ZGVcN4v_tanh)
|
25
sysdeps/x86_64/fpu/svml_d_tanh8_core.S
Normal file
25
sysdeps/x86_64/fpu/svml_d_tanh8_core.S
Normal file
@ -0,0 +1,25 @@
|
||||
/* Function tanh vectorized with AVX-512, wrapper to AVX2.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_d_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVeN8v_tanh)
|
||||
WRAPPER_IMPL_AVX512 _ZGVdN4v_tanh
|
||||
END (_ZGVeN8v_tanh)
|
25
sysdeps/x86_64/fpu/svml_s_tanhf16_core.S
Normal file
25
sysdeps/x86_64/fpu/svml_s_tanhf16_core.S
Normal file
@ -0,0 +1,25 @@
|
||||
/* Function tanhf vectorized with AVX-512. Wrapper to AVX2 version.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_s_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVeN16v_tanhf)
|
||||
WRAPPER_IMPL_AVX512 _ZGVdN8v_tanhf
|
||||
END (_ZGVeN16v_tanhf)
|
29
sysdeps/x86_64/fpu/svml_s_tanhf4_core.S
Normal file
29
sysdeps/x86_64/fpu/svml_s_tanhf4_core.S
Normal file
@ -0,0 +1,29 @@
|
||||
/* Function tanhf vectorized with SSE2, wrapper version.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_s_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVbN4v_tanhf)
|
||||
WRAPPER_IMPL_SSE2 tanhf
|
||||
END (_ZGVbN4v_tanhf)
|
||||
|
||||
#ifndef USE_MULTIARCH
|
||||
libmvec_hidden_def (_ZGVbN4v_tanhf)
|
||||
#endif
|
29
sysdeps/x86_64/fpu/svml_s_tanhf8_core.S
Normal file
29
sysdeps/x86_64/fpu/svml_s_tanhf8_core.S
Normal file
@ -0,0 +1,29 @@
|
||||
/* Function tanhf vectorized with AVX2, wrapper version.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_s_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVdN8v_tanhf)
|
||||
WRAPPER_IMPL_AVX _ZGVbN4v_tanhf
|
||||
END (_ZGVdN8v_tanhf)
|
||||
|
||||
#ifndef USE_MULTIARCH
|
||||
libmvec_hidden_def (_ZGVdN8v_tanhf)
|
||||
#endif
|
25
sysdeps/x86_64/fpu/svml_s_tanhf8_core_avx.S
Normal file
25
sysdeps/x86_64/fpu/svml_s_tanhf8_core_avx.S
Normal file
@ -0,0 +1,25 @@
|
||||
/* Function tanhf vectorized in AVX ISA as wrapper to SSE4 ISA version.
|
||||
Copyright (C) 2021 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "svml_s_wrapper_impl.h"
|
||||
|
||||
.text
|
||||
ENTRY (_ZGVcN8v_tanhf)
|
||||
WRAPPER_IMPL_AVX _ZGVbN4v_tanhf
|
||||
END (_ZGVcN8v_tanhf)
|
1
sysdeps/x86_64/fpu/test-double-libmvec-tanh-avx.c
Normal file
1
sysdeps/x86_64/fpu/test-double-libmvec-tanh-avx.c
Normal file
@ -0,0 +1 @@
|
||||
#include "test-double-libmvec-tanh.c"
|
1
sysdeps/x86_64/fpu/test-double-libmvec-tanh-avx2.c
Normal file
1
sysdeps/x86_64/fpu/test-double-libmvec-tanh-avx2.c
Normal file
@ -0,0 +1 @@
|
||||
#include "test-double-libmvec-tanh.c"
|
1
sysdeps/x86_64/fpu/test-double-libmvec-tanh-avx512f.c
Normal file
1
sysdeps/x86_64/fpu/test-double-libmvec-tanh-avx512f.c
Normal file
@ -0,0 +1 @@
|
||||
#include "test-double-libmvec-tanh.c"
|
3
sysdeps/x86_64/fpu/test-double-libmvec-tanh.c
Normal file
3
sysdeps/x86_64/fpu/test-double-libmvec-tanh.c
Normal file
@ -0,0 +1,3 @@
|
||||
#define LIBMVEC_TYPE double
|
||||
#define LIBMVEC_FUNC tanh
|
||||
#include "test-vector-abi-arg1.h"
|
@ -44,6 +44,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVbN2v_log1p)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVbN2v_atanh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acosh), _ZGVbN2v_acosh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVbN2v_erf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVbN2v_tanh)
|
||||
|
||||
#define VEC_INT_TYPE __m128i
|
||||
|
||||
|
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVdN4v_log1p)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVdN4v_atanh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acosh), _ZGVdN4v_acosh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVdN4v_erf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVdN4v_tanh)
|
||||
|
||||
#ifndef __ILP32__
|
||||
# define VEC_INT_TYPE __m256i
|
||||
|
@ -44,6 +44,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVcN4v_log1p)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVcN4v_atanh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acosh), _ZGVcN4v_acosh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVcN4v_erf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVcN4v_tanh)
|
||||
|
||||
#define VEC_INT_TYPE __m128i
|
||||
|
||||
|
@ -44,6 +44,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1p), _ZGVeN8v_log1p)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanh), _ZGVeN8v_atanh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acosh), _ZGVeN8v_acosh)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVeN8v_erf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVeN8v_tanh)
|
||||
|
||||
#ifndef __ILP32__
|
||||
# define VEC_INT_TYPE __m512i
|
||||
|
1
sysdeps/x86_64/fpu/test-float-libmvec-tanhf-avx.c
Normal file
1
sysdeps/x86_64/fpu/test-float-libmvec-tanhf-avx.c
Normal file
@ -0,0 +1 @@
|
||||
#include "test-float-libmvec-tanhf.c"
|
1
sysdeps/x86_64/fpu/test-float-libmvec-tanhf-avx2.c
Normal file
1
sysdeps/x86_64/fpu/test-float-libmvec-tanhf-avx2.c
Normal file
@ -0,0 +1 @@
|
||||
#include "test-float-libmvec-tanhf.c"
|
1
sysdeps/x86_64/fpu/test-float-libmvec-tanhf-avx512f.c
Normal file
1
sysdeps/x86_64/fpu/test-float-libmvec-tanhf-avx512f.c
Normal file
@ -0,0 +1 @@
|
||||
#include "test-float-libmvec-tanhf.c"
|
3
sysdeps/x86_64/fpu/test-float-libmvec-tanhf.c
Normal file
3
sysdeps/x86_64/fpu/test-float-libmvec-tanhf.c
Normal file
@ -0,0 +1,3 @@
|
||||
#define LIBMVEC_TYPE float
|
||||
#define LIBMVEC_FUNC tanhf
|
||||
#include "test-vector-abi-arg1.h"
|
@ -44,6 +44,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVeN16v_log1pf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVeN16v_atanhf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acoshf), _ZGVeN16v_acoshf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVeN16v_erff)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVeN16v_tanhf)
|
||||
|
||||
#define VEC_INT_TYPE __m512i
|
||||
|
||||
|
@ -44,6 +44,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVbN4v_log1pf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVbN4v_atanhf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acoshf), _ZGVbN4v_acoshf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVbN4v_erff)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVbN4v_tanhf)
|
||||
|
||||
#define VEC_INT_TYPE __m128i
|
||||
|
||||
|
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVdN8v_log1pf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVdN8v_atanhf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acoshf), _ZGVdN8v_acoshf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVdN8v_erff)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVdN8v_tanhf)
|
||||
|
||||
/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
|
||||
#undef VECTOR_WRAPPER_fFF
|
||||
|
@ -44,6 +44,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (log1pf), _ZGVcN8v_log1pf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (atanhf), _ZGVcN8v_atanhf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (acoshf), _ZGVcN8v_acoshf)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVcN8v_erff)
|
||||
VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVcN8v_tanhf)
|
||||
|
||||
#define VEC_INT_TYPE __m128i
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user