x86-64: Add vector tan/tanf implementation to libmvec

Implement vectorized tan/tanf containing SSE, AVX, AVX2 and AVX512 versions for libmvec as per vector ABI. It also contains accuracy and ABI tests for vector tan/tanf with regenerated ulps. Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
2024-11-21 12:30:06 +00:00 · 2021-12-29 10:19:39 -08:00 · 2021-12-29 10:19:39 -08:00 · c21c7bc24e
commit c21c7bc24e
parent 8881cca8fb
50 changed files with 21913 additions and 1 deletions
--- a/bits/libm-simd-decl-stubs.h
+++ b/bits/libm-simd-decl-stubs.h
@ -318,4 +318,15 @@
 #define __DECL_SIMD_erfcf32x
 #define __DECL_SIMD_erfcf64x
 #define __DECL_SIMD_erfcf128x
+
+#define __DECL_SIMD_tan
+#define __DECL_SIMD_tanf
+#define __DECL_SIMD_tanl
+#define __DECL_SIMD_tanf16
+#define __DECL_SIMD_tanf32
+#define __DECL_SIMD_tanf64
+#define __DECL_SIMD_tanf128
+#define __DECL_SIMD_tanf32x
+#define __DECL_SIMD_tanf64x
+#define __DECL_SIMD_tanf128x
 #endif
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@ -63,7 +63,7 @@ __MATHCALL_VEC (cos,, (_Mdouble_ __x));
 /* Sine of X.  */
 __MATHCALL_VEC (sin,, (_Mdouble_ __x));
 /* Tangent of X.  */
-__MATHCALL (tan,, (_Mdouble_ __x));
+__MATHCALL_VEC (tan,, (_Mdouble_ __x));

 /* Hyperbolic functions.  */

--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@ -63,6 +63,7 @@ GLIBC_2.35 _ZGVbN2v_log10 F
 GLIBC_2.35 _ZGVbN2v_log1p F
 GLIBC_2.35 _ZGVbN2v_log2 F
 GLIBC_2.35 _ZGVbN2v_sinh F
+GLIBC_2.35 _ZGVbN2v_tan F
 GLIBC_2.35 _ZGVbN2v_tanh F
 GLIBC_2.35 _ZGVbN2vv_atan2 F
 GLIBC_2.35 _ZGVbN2vv_hypot F
@ -83,6 +84,7 @@ GLIBC_2.35 _ZGVbN4v_log10f F
 GLIBC_2.35 _ZGVbN4v_log1pf F
 GLIBC_2.35 _ZGVbN4v_log2f F
 GLIBC_2.35 _ZGVbN4v_sinhf F
+GLIBC_2.35 _ZGVbN4v_tanf F
 GLIBC_2.35 _ZGVbN4v_tanhf F
 GLIBC_2.35 _ZGVbN4vv_atan2f F
 GLIBC_2.35 _ZGVbN4vv_hypotf F
@ -103,6 +105,7 @@ GLIBC_2.35 _ZGVcN4v_log10 F
 GLIBC_2.35 _ZGVcN4v_log1p F
 GLIBC_2.35 _ZGVcN4v_log2 F
 GLIBC_2.35 _ZGVcN4v_sinh F
+GLIBC_2.35 _ZGVcN4v_tan F
 GLIBC_2.35 _ZGVcN4v_tanh F
 GLIBC_2.35 _ZGVcN4vv_atan2 F
 GLIBC_2.35 _ZGVcN4vv_hypot F
@ -123,6 +126,7 @@ GLIBC_2.35 _ZGVcN8v_log10f F
 GLIBC_2.35 _ZGVcN8v_log1pf F
 GLIBC_2.35 _ZGVcN8v_log2f F
 GLIBC_2.35 _ZGVcN8v_sinhf F
+GLIBC_2.35 _ZGVcN8v_tanf F
 GLIBC_2.35 _ZGVcN8v_tanhf F
 GLIBC_2.35 _ZGVcN8vv_atan2f F
 GLIBC_2.35 _ZGVcN8vv_hypotf F
@ -143,6 +147,7 @@ GLIBC_2.35 _ZGVdN4v_log10 F
 GLIBC_2.35 _ZGVdN4v_log1p F
 GLIBC_2.35 _ZGVdN4v_log2 F
 GLIBC_2.35 _ZGVdN4v_sinh F
+GLIBC_2.35 _ZGVdN4v_tan F
 GLIBC_2.35 _ZGVdN4v_tanh F
 GLIBC_2.35 _ZGVdN4vv_atan2 F
 GLIBC_2.35 _ZGVdN4vv_hypot F
@ -163,6 +168,7 @@ GLIBC_2.35 _ZGVdN8v_log10f F
 GLIBC_2.35 _ZGVdN8v_log1pf F
 GLIBC_2.35 _ZGVdN8v_log2f F
 GLIBC_2.35 _ZGVdN8v_sinhf F
+GLIBC_2.35 _ZGVdN8v_tanf F
 GLIBC_2.35 _ZGVdN8v_tanhf F
 GLIBC_2.35 _ZGVdN8vv_atan2f F
 GLIBC_2.35 _ZGVdN8vv_hypotf F
@ -183,6 +189,7 @@ GLIBC_2.35 _ZGVeN16v_log10f F
 GLIBC_2.35 _ZGVeN16v_log1pf F
 GLIBC_2.35 _ZGVeN16v_log2f F
 GLIBC_2.35 _ZGVeN16v_sinhf F
+GLIBC_2.35 _ZGVeN16v_tanf F
 GLIBC_2.35 _ZGVeN16v_tanhf F
 GLIBC_2.35 _ZGVeN16vv_atan2f F
 GLIBC_2.35 _ZGVeN16vv_hypotf F
@ -203,6 +210,7 @@ GLIBC_2.35 _ZGVeN8v_log10 F
 GLIBC_2.35 _ZGVeN8v_log1p F
 GLIBC_2.35 _ZGVeN8v_log2 F
 GLIBC_2.35 _ZGVeN8v_sinh F
+GLIBC_2.35 _ZGVeN8v_tan F
 GLIBC_2.35 _ZGVeN8v_tanh F
 GLIBC_2.35 _ZGVeN8vv_atan2 F
 GLIBC_2.35 _ZGVeN8vv_hypot F
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@ -138,6 +138,10 @@
 #  define __DECL_SIMD_erfc __DECL_SIMD_x86_64
 #  undef __DECL_SIMD_erfcf
 #  define __DECL_SIMD_erfcf __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_tan
+#  define __DECL_SIMD_tan __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_tanf
+#  define __DECL_SIMD_tanf __DECL_SIMD_x86_64

 # endif
 #endif
--- a/sysdeps/x86/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/x86/fpu/finclude/math-vector-fortran.h
@ -68,6 +68,8 @@
 !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (erfc) attributes simd (notinbranch) if('x86_64')
 !GCC$ builtin (erfcf) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (tan) attributes simd (notinbranch) if('x86_64')
+!GCC$ builtin (tanf) attributes simd (notinbranch) if('x86_64')

 !GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@ -121,3 +123,5 @@
 !GCC$ builtin (asinhf) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (erfc) attributes simd (notinbranch) if('x32')
 !GCC$ builtin (erfcf) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (tan) attributes simd (notinbranch) if('x32')
+!GCC$ builtin (tanf) attributes simd (notinbranch) if('x32')
--- a/sysdeps/x86_64/fpu/Makeconfig
+++ b/sysdeps/x86_64/fpu/Makeconfig
@ -47,6 +47,7 @@ libmvec-funcs = \
  sin \
  sincos \
  sinh \
+  tan \
  tanh \

 # Define libmvec function for benchtests directory.
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@ -31,6 +31,7 @@ libmvec {
    _ZGVbN2v_log1p; _ZGVcN4v_log1p; _ZGVdN4v_log1p; _ZGVeN8v_log1p;
    _ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2;
    _ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh;
+    _ZGVbN2v_tan; _ZGVcN4v_tan; _ZGVdN4v_tan; _ZGVeN8v_tan;
    _ZGVbN2v_tanh; _ZGVcN4v_tanh; _ZGVdN4v_tanh; _ZGVeN8v_tanh;
    _ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2;
    _ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot;
@ -51,6 +52,7 @@ libmvec {
    _ZGVbN4v_log1pf; _ZGVcN8v_log1pf; _ZGVdN8v_log1pf; _ZGVeN16v_log1pf;
    _ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f;
    _ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf;
+    _ZGVbN4v_tanf; _ZGVcN8v_tanf; _ZGVdN8v_tanf; _ZGVeN16v_tanf;
    _ZGVbN4v_tanhf; _ZGVcN8v_tanhf; _ZGVdN8v_tanhf; _ZGVeN16v_tanhf;
    _ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f;
    _ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf;
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@ -2080,6 +2080,26 @@ float: 1
 float128: 1
 ldouble: 2

+Function: "tan_vlen16":
+float: 1
+
+Function: "tan_vlen2":
+double: 2
+
+Function: "tan_vlen4":
+double: 2
+float: 2
+
+Function: "tan_vlen4_avx2":
+double: 1
+
+Function: "tan_vlen8":
+double: 2
+float: 2
+
+Function: "tan_vlen8_avx2":
+float: 2
+
 Function: "tanh":
 double: 2
 float: 2
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core-sse2.S
@ -0,0 +1,20 @@
+/* SSE2 version of vectorized tan, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_tan _ZGVbN2v_tan_sse2
+#include "../svml_d_tan2_core.S"
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core.c
@ -0,0 +1,27 @@
+/* Multiple versions of vectorized tan, vector length is 2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_tan
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_tan, __GI__ZGVbN2v_tan, __redirect__ZGVbN2v_tan)
+  __attribute__ ((visibility ("hidden")));
+#endif
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan2_core_sse4.S
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan4_core-sse.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan4_core-sse.S
@ -0,0 +1,20 @@
+/* SSE version of vectorized tan, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4v_tan _ZGVdN4v_tan_sse_wrapper
+#include "../svml_d_tan4_core.S"
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan4_core.c
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan4_core.c
@ -0,0 +1,27 @@
+/* Multiple versions of vectorized tan, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4v_tan
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_tan, __GI__ZGVdN4v_tan, __redirect__ZGVdN4v_tan)
+  __attribute__ ((visibility ("hidden")));
+#endif
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan4_core_avx2.S
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan8_core-avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan8_core-avx2.S
@ -0,0 +1,20 @@
+/* AVX2 version of vectorized tan, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8v_tan _ZGVeN8v_tan_avx2_wrapper
+#include "../svml_d_tan8_core.S"
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan8_core.c
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan8_core.c
@ -0,0 +1,27 @@
+/* Multiple versions of vectorized tan, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8v_tan
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_tan, __GI__ZGVeN8v_tan, __redirect__ZGVeN8v_tan)
+  __attribute__ ((visibility ("hidden")));
+#endif
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_tan8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_tan8_core_avx512.S
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core-avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core-avx2.S
@ -0,0 +1,20 @@
+/* AVX2 version of vectorized tanf.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16v_tanf _ZGVeN16v_tanf_avx2_wrapper
+#include "../svml_s_tanf16_core.S"
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core.c
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core.c
@ -0,0 +1,28 @@
+/* Multiple versions of vectorized tanf, vector length is 16.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16v_tanf
+#include "ifunc-mathvec-avx512-skx.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_tanf, __GI__ZGVeN16v_tanf,
+	       __redirect__ZGVeN16v_tanf)
+  __attribute__ ((visibility ("hidden")));
+#endif
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf16_core_avx512.S
@ -0,0 +1,927 @@
+/* Function tanf vectorized with AVX-512.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   https://www.gnu.org/licenses/.  */
+
+/*
+ * ALGORITHM DESCRIPTION:
+ *
+ *      ( optimized for throughput, with small table lookup, works when HW FMA is available )
+ *
+ *       Implementation reduces argument x to |R|<pi/64
+ *       32-entry tables used to store high and low parts of tan(x0)
+ *       Argument x = N*pi + x0 + (R);   x0 = k*pi/32, with k in {0, 1, ..., 31}
+ *       (very large arguments reduction resolved in _vsreduction_core.i)
+ *       Compute result as (tan(x0) + tan(R))/(1-tan(x0)*tan(R))
+ *       _HA_ version keeps extra precision for numerator, denominator, and during
+ *       final NR-iteration computing quotient.
+ *
+ *
+ */
+
+/* Offsets for data table __svml_stan_data_internal
+ */
+#define _sInvPI_uisa                  	0
+#define _sPI1_uisa                    	64
+#define _sPI2_uisa                    	128
+#define _sPI3_uisa                    	192
+#define Th_tbl_uisa                   	256
+#define _sPC3_uisa                    	384
+#define _sPC5_uisa                    	448
+#define _sRangeReductionVal_uisa      	512
+#define _sAbsMask                     	576
+#define _sRangeVal                    	640
+#define _sRShifter                    	704
+#define _sOne                         	768
+#define _sRangeReductionVal           	832
+#define _sPI1                         	896
+#define _sPI2                         	960
+#define _sPI3                         	1024
+
+#include <sysdep.h>
+
+        .text
+	.section .text.exex512,"ax",@progbits
+ENTRY(_ZGVeN16v_tanf_skx)
+        pushq     %rbp
+        cfi_def_cfa_offset(16)
+        movq      %rsp, %rbp
+        cfi_def_cfa(6, 16)
+        cfi_offset(6, -16)
+        andq      $-64, %rsp
+        subq      $192, %rsp
+        xorl      %edx, %edx
+
+/* Large values check */
+        vmovups   _sRangeReductionVal_uisa+__svml_stan_data_internal(%rip), %zmm10
+
+/*
+ *
+ * Main path
+ *
+ * start arg. reduction
+ */
+        vmovups   _sRShifter+__svml_stan_data_internal(%rip), %zmm1
+        vmovups   _sPI1_uisa+__svml_stan_data_internal(%rip), %zmm4
+        vmovups   _sPI2_uisa+__svml_stan_data_internal(%rip), %zmm2
+        vmovups   _sPI3_uisa+__svml_stan_data_internal(%rip), %zmm3
+        vmovaps   %zmm0, %zmm11
+        vandps    _sAbsMask+__svml_stan_data_internal(%rip), %zmm11, %zmm0
+        vcmpps    $22, {sae}, %zmm10, %zmm0, %k6
+        vmovups   __svml_stan_data_internal(%rip), %zmm10
+
+/*
+ *
+ * End of main path
+ */
+
+        kortestw  %k6, %k6
+        vfmadd213ps {rn-sae}, %zmm1, %zmm11, %zmm10
+        vsubps    {rn-sae}, %zmm1, %zmm10, %zmm5
+        vfnmadd213ps {rn-sae}, %zmm11, %zmm5, %zmm4
+        vfnmadd231ps {rn-sae}, %zmm5, %zmm2, %zmm4
+        vfnmadd213ps {rn-sae}, %zmm4, %zmm3, %zmm5
+
+/* Go to auxilary branch */
+        jne       L(AUX_BRANCH)
+                                # LOE rbx r12 r13 r14 r15 edx zmm0 zmm5 zmm10 zmm11 k6
+
+/* Return from auxilary branch
+ * for out of main path inputs
+ */
+
+L(AUX_BRANCH_RETURN):
+/* Table lookup */
+        vmovups   Th_tbl_uisa+__svml_stan_data_internal(%rip), %zmm3
+        vmovups   _sPC3_uisa+__svml_stan_data_internal(%rip), %zmm0
+        vmulps    {rn-sae}, %zmm5, %zmm5, %zmm1
+        vpermt2ps Th_tbl_uisa+64+__svml_stan_data_internal(%rip), %zmm10, %zmm3
+        vmovups   _sPC5_uisa+__svml_stan_data_internal(%rip), %zmm10
+        vfmadd231ps {rn-sae}, %zmm1, %zmm10, %zmm0
+        vmulps    {rn-sae}, %zmm5, %zmm0, %zmm4
+        vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm4
+
+/*
+ * Computer Denominator:
+ * sDenominator - sDlow ~= 1-(sTh+sTl)*(sP+sPlow)
+ */
+        vmovups   _sOne+__svml_stan_data_internal(%rip), %zmm5
+        vmulps    {rn-sae}, %zmm4, %zmm3, %zmm7
+
+/*
+ * Compute Numerator:
+ * sNumerator + sNlow ~= sTh+sTl+sP+sPlow
+ */
+        vaddps    {rn-sae}, %zmm3, %zmm4, %zmm8
+        vsubps    {rn-sae}, %zmm7, %zmm5, %zmm9
+        vsubps    {rn-sae}, %zmm3, %zmm8, %zmm2
+
+/*
+ * Now computes (sNumerator + sNlow)/(sDenominator - sDlow)
+ * Choose NR iteration instead of hardware division
+ */
+        vrcp14ps  %zmm9, %zmm14
+        vsubps    {rn-sae}, %zmm5, %zmm9, %zmm6
+        vsubps    {rn-sae}, %zmm2, %zmm4, %zmm13
+        vmulps    {rn-sae}, %zmm8, %zmm14, %zmm15
+        vaddps    {rn-sae}, %zmm7, %zmm6, %zmm12
+
+/* One NR iteration to refine sQuotient */
+        vfmsub213ps {rn-sae}, %zmm8, %zmm15, %zmm9
+        vfnmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm12
+        vsubps    {rn-sae}, %zmm13, %zmm12, %zmm0
+        vfnmadd213ps {rn-sae}, %zmm15, %zmm14, %zmm0
+        testl     %edx, %edx
+
+/* Go to special inputs processing branch */
+        jne       L(SPECIAL_VALUES_BRANCH)
+                                # LOE rbx r12 r13 r14 r15 edx zmm0 zmm11
+
+/* Restore registers
+ * and exit the function
+ */
+
+L(EXIT):
+        movq      %rbp, %rsp
+        popq      %rbp
+        cfi_def_cfa(7, 8)
+        cfi_restore(6)
+        ret
+        cfi_def_cfa(6, 16)
+        cfi_offset(6, -16)
+
+/* Branch to process
+ * special inputs
+ */
+
+L(SPECIAL_VALUES_BRANCH):
+        vmovups   %zmm11, 64(%rsp)
+        vmovups   %zmm0, 128(%rsp)
+                                # LOE rbx r12 r13 r14 r15 edx zmm0
+
+        xorl      %eax, %eax
+                                # LOE rbx r12 r13 r14 r15 eax edx
+
+        vzeroupper
+        movq      %r12, 16(%rsp)
+        /*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus)  */
+        .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+        movl      %eax, %r12d
+        movq      %r13, 8(%rsp)
+        /*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus)  */
+        .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+        movl      %edx, %r13d
+        movq      %r14, (%rsp)
+        /*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus)  */
+        .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+                                # LOE rbx r15 r12d r13d
+
+/* Range mask
+ * bits check
+ */
+
+L(RANGEMASK_CHECK):
+        btl       %r12d, %r13d
+
+/* Call scalar math function */
+        jc        L(SCALAR_MATH_CALL)
+                                # LOE rbx r15 r12d r13d
+
+/* Special inputs
+ * processing loop
+ */
+
+L(SPECIAL_VALUES_LOOP):
+        incl      %r12d
+        cmpl      $16, %r12d
+
+/* Check bits in range mask */
+        jl        L(RANGEMASK_CHECK)
+                                # LOE rbx r15 r12d r13d
+
+        movq      16(%rsp), %r12
+        cfi_restore(12)
+        movq      8(%rsp), %r13
+        cfi_restore(13)
+        movq      (%rsp), %r14
+        cfi_restore(14)
+        vmovups   128(%rsp), %zmm0
+
+/* Go to exit */
+        jmp       L(EXIT)
+        /*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus)  */
+        .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
+        /*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus)  */
+        .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
+        /*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus)  */
+        .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
+                                # LOE rbx r12 r13 r14 r15 zmm0
+
+/* Scalar math fucntion call
+ * to process special input
+ */
+
+L(SCALAR_MATH_CALL):
+        movl      %r12d, %r14d
+        movss     64(%rsp,%r14,4), %xmm0
+        call      tanf@PLT
+                                # LOE rbx r14 r15 r12d r13d xmm0
+
+        movss     %xmm0, 128(%rsp,%r14,4)
+
+/* Process special inputs in loop */
+        jmp       L(SPECIAL_VALUES_LOOP)
+        cfi_restore(12)
+        cfi_restore(13)
+        cfi_restore(14)
+                                # LOE rbx r15 r12d r13d
+
+/* Auxilary branch
+ * for out of main path inputs
+ */
+
+L(AUX_BRANCH):
+        vmovups   _sRangeVal+__svml_stan_data_internal(%rip), %zmm6
+
+/*
+ * Get the (2^a / 2pi) mod 1 values from the table.
+ * Because doesn't have I-type gather, we need a trivial cast
+ */
+        lea       __svml_stan_reduction_data_internal(%rip), %rax
+        vmovups   %zmm5, (%rsp)
+        vandps    %zmm0, %zmm6, %zmm14
+        vcmpps    $0, {sae}, %zmm6, %zmm14, %k0
+
+/*
+ * Break the P_xxx and m into 16-bit chunks ready for
+ * the long multiplication via 16x16->32 multiplications
+ */
+        vmovups   .FLT_15(%rip), %zmm6
+        kxnorw    %k0, %k0, %k1
+        kxnorw    %k0, %k0, %k2
+        kxnorw    %k0, %k0, %k3
+        kmovw     %k0, %edx
+        vpandd    .FLT_12(%rip), %zmm11, %zmm5
+        vpsrld    $23, %zmm5, %zmm7
+        vpslld    $1, %zmm7, %zmm8
+        vpaddd    %zmm7, %zmm8, %zmm9
+        vpslld    $2, %zmm9, %zmm4
+        vpxord    %zmm3, %zmm3, %zmm3
+        vpxord    %zmm15, %zmm15, %zmm15
+        vpxord    %zmm2, %zmm2, %zmm2
+        vgatherdps (%rax,%zmm4), %zmm3{%k1}
+        vgatherdps 4(%rax,%zmm4), %zmm15{%k2}
+        vgatherdps 8(%rax,%zmm4), %zmm2{%k3}
+        vpsrld    $16, %zmm3, %zmm5
+        vpsrld    $16, %zmm2, %zmm13
+
+/*
+ * Also get the significand as an integer
+ * NB: adding in the integer bit is wrong for denorms!
+ * To make this work for denorms we should do something slightly different
+ */
+        vpandd    .FLT_13(%rip), %zmm11, %zmm0
+        vpaddd    .FLT_14(%rip), %zmm0, %zmm1
+        vpsrld    $16, %zmm15, %zmm0
+        vpsrld    $16, %zmm1, %zmm8
+        vpandd    %zmm6, %zmm3, %zmm9
+        vpandd    %zmm6, %zmm15, %zmm12
+        vpandd    %zmm6, %zmm2, %zmm7
+        vpandd    %zmm6, %zmm1, %zmm14
+
+/* Now do the big multiplication and carry propagation */
+        vpmulld   %zmm9, %zmm8, %zmm4
+        vpmulld   %zmm0, %zmm8, %zmm3
+        vpmulld   %zmm12, %zmm8, %zmm2
+        vpmulld   %zmm13, %zmm8, %zmm1
+        vpmulld   %zmm7, %zmm8, %zmm8
+        vpmulld   %zmm5, %zmm14, %zmm7
+        vpmulld   %zmm9, %zmm14, %zmm5
+        vpmulld   %zmm0, %zmm14, %zmm9
+        vpmulld   %zmm12, %zmm14, %zmm0
+        vpmulld   %zmm13, %zmm14, %zmm12
+        vpsrld    $16, %zmm12, %zmm14
+        vpsrld    $16, %zmm0, %zmm13
+        vpsrld    $16, %zmm9, %zmm15
+        vpsrld    $16, %zmm5, %zmm12
+        vpsrld    $16, %zmm8, %zmm8
+        vpaddd    %zmm14, %zmm1, %zmm1
+        vpaddd    %zmm13, %zmm2, %zmm2
+        vpaddd    %zmm15, %zmm3, %zmm15
+        vpaddd    %zmm12, %zmm4, %zmm3
+        vpandd    %zmm6, %zmm0, %zmm13
+        vpaddd    %zmm1, %zmm13, %zmm4
+        vpaddd    %zmm4, %zmm8, %zmm14
+        vpsrld    $16, %zmm14, %zmm0
+        vpandd    %zmm6, %zmm9, %zmm9
+        vpaddd    %zmm2, %zmm9, %zmm1
+        vpaddd    %zmm1, %zmm0, %zmm8
+
+/*
+ * Now round at the 2^-8 bit position for reduction mod pi/2^7
+ * instead of the original 2pi (but still with the same 2pi scaling).
+ * Use a shifter of 2^15 + 2^14.
+ * The N we get is our final version; it has an offset of
+ * 2^8 because of the implicit integer bit, and anyway for negative
+ * starting value it's a 2s complement thing. But we need to mask
+ * off the exponent part anyway so it's fine.
+ */
+        vmovups   .FLT_18(%rip), %zmm1
+        vpandd    %zmm6, %zmm7, %zmm7
+        vpaddd    %zmm3, %zmm7, %zmm13
+        vpsrld    $16, %zmm8, %zmm3
+        vpandd    %zmm6, %zmm5, %zmm5
+        vpaddd    %zmm15, %zmm5, %zmm2
+        vpaddd    %zmm2, %zmm3, %zmm15
+        vpsrld    $16, %zmm15, %zmm12
+        vpaddd    %zmm13, %zmm12, %zmm5
+
+/* Assemble reduced argument from the pieces */
+        vpandd    %zmm6, %zmm14, %zmm9
+        vpandd    %zmm6, %zmm15, %zmm7
+        vpslld    $16, %zmm5, %zmm6
+        vpslld    $16, %zmm8, %zmm5
+        vpaddd    %zmm7, %zmm6, %zmm4
+        vpaddd    %zmm9, %zmm5, %zmm9
+        vpsrld    $9, %zmm4, %zmm6
+
+/*
+ * We want to incorporate the original sign now too.
+ * Do it here for convenience in getting the right N value,
+ * though we could wait right to the end if we were prepared
+ * to modify the sign of N later too.
+ * So get the appropriate sign mask now (or sooner).
+ */
+        vpandd    .FLT_16(%rip), %zmm11, %zmm0
+        vpandd    .FLT_21(%rip), %zmm9, %zmm13
+        vpslld    $5, %zmm13, %zmm14
+
+/*
+ * Create floating-point high part, implicitly adding integer bit 1
+ * Incorporate overall sign at this stage too.
+ */
+        vpxord    .FLT_17(%rip), %zmm0, %zmm8
+        vpord     %zmm8, %zmm6, %zmm2
+        vaddps    {rn-sae}, %zmm2, %zmm1, %zmm12
+        vsubps    {rn-sae}, %zmm1, %zmm12, %zmm3
+        vsubps    {rn-sae}, %zmm3, %zmm2, %zmm7
+
+/*
+ * Create floating-point low and medium parts, respectively
+ * lo_17, ... lo_0, 0, ..., 0
+ * hi_8, ... hi_0, lo_31, ..., lo_18
+ * then subtract off the implicitly added integer bits,
+ * 2^-46 and 2^-23, respectively.
+ * Put the original sign into all of them at this stage.
+ */
+        vpxord    .FLT_20(%rip), %zmm0, %zmm6
+        vpord     %zmm6, %zmm14, %zmm15
+        vpandd    .FLT_23(%rip), %zmm4, %zmm4
+        vsubps    {rn-sae}, %zmm6, %zmm15, %zmm8
+        vandps    .FLT_26(%rip), %zmm11, %zmm15
+        vpsrld    $18, %zmm9, %zmm6
+
+/*
+ * If the magnitude of the input is <= 2^-20, then
+ * just pass through the input, since no reduction will be needed and
+ * the main path will only work accurately if the reduced argument is
+ * about >= 2^-40 (which it is for all large pi multiples)
+ */
+        vmovups   .FLT_27(%rip), %zmm14
+        vcmpps    $26, {sae}, %zmm14, %zmm15, %k4
+        vcmpps    $22, {sae}, %zmm14, %zmm15, %k5
+        vpxord    .FLT_22(%rip), %zmm0, %zmm1
+        vpslld    $14, %zmm4, %zmm0
+        vpord     %zmm6, %zmm0, %zmm0
+        vpord     %zmm1, %zmm0, %zmm4
+        vsubps    {rn-sae}, %zmm1, %zmm4, %zmm2
+        vpternlogd $255, %zmm6, %zmm6, %zmm6
+
+/* Now add them up into 2 reasonably aligned pieces */
+        vaddps    {rn-sae}, %zmm2, %zmm7, %zmm13
+        vsubps    {rn-sae}, %zmm13, %zmm7, %zmm7
+        vaddps    {rn-sae}, %zmm7, %zmm2, %zmm3
+
+/*
+ * The output is _VRES_R (high) + _VRES_E (low), and the integer part is _VRES_IND
+ * Set sRp2 = _VRES_R^2 and then resume the original code.
+ */
+        vmovups   .FLT_28(%rip), %zmm2
+        vaddps    {rn-sae}, %zmm8, %zmm3, %zmm1
+        vmovups   .FLT_25(%rip), %zmm8
+
+/* Grab our final N value as an integer, appropriately masked mod 2^8 */
+        vpandd    .FLT_19(%rip), %zmm12, %zmm5
+
+/*
+ * Now multiply those numbers all by 2 pi, reasonably accurately.
+ * (RHi + RLo) * (pi_lead + pi_trail) ~=
+ * RHi * pi_lead + (RHi * pi_trail + RLo * pi_lead)
+ */
+        vmovups   .FLT_24(%rip), %zmm12
+        vmulps    {rn-sae}, %zmm12, %zmm13, %zmm0
+        vmovaps   %zmm12, %zmm9
+        vfmsub213ps {rn-sae}, %zmm0, %zmm13, %zmm9
+        vfmadd213ps {rn-sae}, %zmm9, %zmm8, %zmm13
+        vmovaps   %zmm6, %zmm8
+        vfmadd213ps {rn-sae}, %zmm13, %zmm12, %zmm1
+        vpandnd   %zmm15, %zmm15, %zmm8{%k4}
+        vpandnd   %zmm15, %zmm15, %zmm6{%k5}
+        vandps    %zmm11, %zmm6, %zmm14
+        vandps    %zmm0, %zmm8, %zmm15
+        vandps    %zmm1, %zmm8, %zmm12
+        vorps     %zmm15, %zmm14, %zmm6
+        vpsrld    $31, %zmm6, %zmm3
+        vpsubd    %zmm3, %zmm2, %zmm4
+        vpaddd    %zmm4, %zmm5, %zmm7
+        vpsrld    $2, %zmm7, %zmm13
+        vpslld    $2, %zmm13, %zmm9
+
+/*
+ *
+ * End of large arguments path
+ *
+ * Merge results from main and large paths:
+ */
+        vblendmps %zmm13, %zmm10, %zmm10{%k6}
+        vpsubd    %zmm9, %zmm5, %zmm5
+        vmovups   .FLT_29(%rip), %zmm9
+        vcvtdq2ps {rn-sae}, %zmm5, %zmm0
+        vmovups   .FLT_30(%rip), %zmm5
+        vfmadd231ps {rn-sae}, %zmm0, %zmm5, %zmm12
+        vmovups   (%rsp), %zmm5
+        vaddps    {rn-sae}, %zmm6, %zmm12, %zmm6
+        vfmadd213ps {rn-sae}, %zmm6, %zmm9, %zmm0
+        vblendmps %zmm0, %zmm5, %zmm5{%k6}
+
+/* Return to main vector processing path */
+        jmp       L(AUX_BRANCH_RETURN)
+                                # LOE rbx r12 r13 r14 r15 edx zmm5 zmm10 zmm11
+END(_ZGVeN16v_tanf_skx)
+
+        .section .rodata, "a"
+        .align 64
+
+.FLT_12:
+        .long	0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000,0x7f800000
+        .type	.FLT_12,@object
+        .size	.FLT_12,64
+        .align 64
+
+.FLT_13:
+        .long	0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff,0x007fffff
+        .type	.FLT_13,@object
+        .size	.FLT_13,64
+        .align 64
+
+.FLT_14:
+        .long	0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000,0x00800000
+        .type	.FLT_14,@object
+        .size	.FLT_14,64
+        .align 64
+
+.FLT_15:
+        .long	0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff,0x0000ffff
+        .type	.FLT_15,@object
+        .size	.FLT_15,64
+        .align 64
+
+.FLT_16:
+        .long	0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000,0x80000000
+        .type	.FLT_16,@object
+        .size	.FLT_16,64
+        .align 64
+
+.FLT_17:
+        .long	0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000,0x3f800000
+        .type	.FLT_17,@object
+        .size	.FLT_17,64
+        .align 64
+
+.FLT_18:
+        .long	0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000,0x47400000
+        .type	.FLT_18,@object
+        .size	.FLT_18,64
+        .align 64
+
+.FLT_19:
+        .long	0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff,0x000000ff
+        .type	.FLT_19,@object
+        .size	.FLT_19,64
+        .align 64
+
+.FLT_20:
+        .long	0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000,0x28800000
+        .type	.FLT_20,@object
+        .size	.FLT_20,64
+        .align 64
+
+.FLT_21:
+        .long	0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff
+        .type	.FLT_21,@object
+        .size	.FLT_21,64
+        .align 64
+
+.FLT_22:
+        .long	0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000,0x34000000
+        .type	.FLT_22,@object
+        .size	.FLT_22,64
+        .align 64
+
+.FLT_23:
+        .long	0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff,0x000001ff
+        .type	.FLT_23,@object
+        .size	.FLT_23,64
+        .align 64
+
+.FLT_24:
+        .long	0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb,0x40c90fdb
+        .type	.FLT_24,@object
+        .size	.FLT_24,64
+        .align 64
+
+.FLT_25:
+        .long	0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e,0xb43bbd2e
+        .type	.FLT_25,@object
+        .size	.FLT_25,64
+        .align 64
+
+.FLT_26:
+        .long	0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff
+        .type	.FLT_26,@object
+        .size	.FLT_26,64
+        .align 64
+
+.FLT_27:
+        .long	0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000,0x35800000
+        .type	.FLT_27,@object
+        .size	.FLT_27,64
+        .align 64
+
+.FLT_28:
+        .long	0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002,0x00000002
+        .type	.FLT_28,@object
+        .size	.FLT_28,64
+        .align 64
+
+.FLT_29:
+        .long	0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb,0x3cc90fdb
+        .type	.FLT_29,@object
+        .size	.FLT_29,64
+        .align 64
+
+.FLT_30:
+        .long	0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e,0xb03bbd2e
+        .type	.FLT_30,@object
+        .size	.FLT_30,64
+        .align 64
+
+#ifdef __svml_stan_data_internal_typedef
+typedef unsigned int VUINT32;
+    typedef struct
+    {
+        __declspec(align(64)) VUINT32 _sInvPI_uisa[16][1];
+        __declspec(align(64)) VUINT32 _sPI1_uisa[16][1];
+        __declspec(align(64)) VUINT32 _sPI2_uisa[16][1];
+        __declspec(align(64)) VUINT32 _sPI3_uisa[16][1];
+        __declspec(align(64)) VUINT32 Th_tbl_uisa[32][1];
+        __declspec(align(64)) VUINT32 _sPC3_uisa[16][1];
+        __declspec(align(64)) VUINT32 _sPC5_uisa[16][1];
+        __declspec(align(64)) VUINT32 _sRangeReductionVal_uisa[16][1];
+        __declspec(align(64)) VUINT32 _sAbsMask[16][1];
+        __declspec(align(64)) VUINT32 _sRangeVal[16][1];
+        __declspec(align(64)) VUINT32 _sRShifter[16][1];
+        __declspec(align(64)) VUINT32 _sOne[16][1];
+        __declspec(align(64)) VUINT32 _sRangeReductionVal[16][1];
+        __declspec(align(64)) VUINT32 _sPI1[16][1];
+        __declspec(align(64)) VUINT32 _sPI2[16][1];
+        __declspec(align(64)) VUINT32 _sPI3[16][1];
+    } __svml_stan_data_internal;
+#endif
+__svml_stan_data_internal:
+        /* UISA */
+        .long 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983         /* _sInvPI_uisa */
+        .align 64
+        .long 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda         /* _sPI1_uisa */
+        .align 64
+        .long 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168         /* _sPI2_uisa */
+        .align 64
+        .long 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5         /* _sPI3_uisa */
+        /* Th_tbl_uisa for i from 0 to 31 do printsingle(tan(i*Pi/32)); */
+        .align 64
+        .long 0x80000000, 0x3dc9b5dc, 0x3e4bafaf, 0x3e9b5042
+        .long 0x3ed413cd, 0x3f08d5b9, 0x3f2b0dc1, 0x3f521801
+        .long 0x3f800000, 0x3f9bf7ec, 0x3fbf90c7, 0x3fef789e
+        .long 0x401a827a, 0x4052facf, 0x40a0dff7, 0x41227363
+        .long 0xff7fffff, 0xc1227363, 0xc0a0dff7, 0xc052facf
+        .long 0xc01a827a, 0xbfef789e, 0xbfbf90c7, 0xbf9bf7ec
+        .long 0xbf800000, 0xbf521801, 0xbf2b0dc1, 0xbf08d5b9
+        .long 0xbed413cd, 0xbe9b5042, 0xbe4bafaf, 0xbdc9b5dc
+        .align 64
+        .long 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6          /* _sPC3_uisa */
+        .align 64
+        .long 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888          /* _sPC5_uisa */
+        .align 64
+        .long 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000          /* _sRangeReductionVal_uisa */
+        .align 64
+        .long 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF          /* _sAbsMask  */
+        .align 64
+        .long 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000          /* _sRangeVal  */
+        .align 64
+        .long 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000          /* _sRShifter  */
+        .align 64
+        .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000          /* _sOne */
+        .align 64
+        .long 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000          /* _sRangeVal */
+        .align 64
+        .long 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000         /* _sPI1  */
+        .align 64
+        .long 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000         /* _sPI2  */
+        .align 64
+        .long 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000         /* _sPI3  */
+        .align 64
+        .type	__svml_stan_data_internal,@object
+        .size	__svml_stan_data_internal,.-__svml_stan_data_internal
+        .align 64
+
+#ifdef __svml_stan_reduction_data_internal_typedef
+typedef unsigned int VUINT32;
+typedef struct
+{
+        __declspec(align(64)) VUINT32 _sPtable[256][3][1];
+} __svml_stan_reduction_data_internal;
+#endif
+__svml_stan_reduction_data_internal:
+        /*     P_hi                  P_med               P_lo                */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 0 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 1 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 2 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 3 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 4 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 5 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 6 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 7 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 8 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 9 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 10 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 11 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 12 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 13 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 14 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 15 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 16 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 17 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 18 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 19 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 20 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 21 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 22 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 23 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 24 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 25 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 26 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 27 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 28 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 29 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 30 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 31 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 32 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 33 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 34 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 35 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 36 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 37 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 38 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 39 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 40 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 41 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 42 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 43 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 44 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 45 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 46 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 47 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 48 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 49 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 50 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 51 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 52 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 53 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 54 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 55 */
+        .long 0x00000000, 0x00000000, 0x00000000  /* 56 */
+        .long 0x00000000, 0x00000000, 0x00000001  /* 57 */
+        .long 0x00000000, 0x00000000, 0x00000002  /* 58 */
+        .long 0x00000000, 0x00000000, 0x00000005  /* 59 */
+        .long 0x00000000, 0x00000000, 0x0000000A  /* 60 */
+        .long 0x00000000, 0x00000000, 0x00000014  /* 61 */
+        .long 0x00000000, 0x00000000, 0x00000028  /* 62 */
+        .long 0x00000000, 0x00000000, 0x00000051  /* 63 */
+        .long 0x00000000, 0x00000000, 0x000000A2  /* 64 */
+        .long 0x00000000, 0x00000000, 0x00000145  /* 65 */
+        .long 0x00000000, 0x00000000, 0x0000028B  /* 66 */
+        .long 0x00000000, 0x00000000, 0x00000517  /* 67 */
+        .long 0x00000000, 0x00000000, 0x00000A2F  /* 68 */
+        .long 0x00000000, 0x00000000, 0x0000145F  /* 69 */
+        .long 0x00000000, 0x00000000, 0x000028BE  /* 70 */
+        .long 0x00000000, 0x00000000, 0x0000517C  /* 71 */
+        .long 0x00000000, 0x00000000, 0x0000A2F9  /* 72 */
+        .long 0x00000000, 0x00000000, 0x000145F3  /* 73 */
+        .long 0x00000000, 0x00000000, 0x00028BE6  /* 74 */
+        .long 0x00000000, 0x00000000, 0x000517CC  /* 75 */
+        .long 0x00000000, 0x00000000, 0x000A2F98  /* 76 */
+        .long 0x00000000, 0x00000000, 0x00145F30  /* 77 */
+        .long 0x00000000, 0x00000000, 0x0028BE60  /* 78 */
+        .long 0x00000000, 0x00000000, 0x00517CC1  /* 79 */
+        .long 0x00000000, 0x00000000, 0x00A2F983  /* 80 */
+        .long 0x00000000, 0x00000000, 0x0145F306  /* 81 */
+        .long 0x00000000, 0x00000000, 0x028BE60D  /* 82 */
+        .long 0x00000000, 0x00000000, 0x0517CC1B  /* 83 */
+        .long 0x00000000, 0x00000000, 0x0A2F9836  /* 84 */
+        .long 0x00000000, 0x00000000, 0x145F306D  /* 85 */
+        .long 0x00000000, 0x00000000, 0x28BE60DB  /* 86 */
+        .long 0x00000000, 0x00000000, 0x517CC1B7  /* 87 */
+        .long 0x00000000, 0x00000000, 0xA2F9836E  /* 88 */
+        .long 0x00000000, 0x00000001, 0x45F306DC  /* 89 */
+        .long 0x00000000, 0x00000002, 0x8BE60DB9  /* 90 */
+        .long 0x00000000, 0x00000005, 0x17CC1B72  /* 91 */
+        .long 0x00000000, 0x0000000A, 0x2F9836E4  /* 92 */
+        .long 0x00000000, 0x00000014, 0x5F306DC9  /* 93 */
+        .long 0x00000000, 0x00000028, 0xBE60DB93  /* 94 */
+        .long 0x00000000, 0x00000051, 0x7CC1B727  /* 95 */
+        .long 0x00000000, 0x000000A2, 0xF9836E4E  /* 96 */
+        .long 0x00000000, 0x00000145, 0xF306DC9C  /* 97 */
+        .long 0x00000000, 0x0000028B, 0xE60DB939  /* 98 */
+        .long 0x00000000, 0x00000517, 0xCC1B7272  /* 99 */
+        .long 0x00000000, 0x00000A2F, 0x9836E4E4  /* 100 */
+        .long 0x00000000, 0x0000145F, 0x306DC9C8  /* 101 */
+        .long 0x00000000, 0x000028BE, 0x60DB9391  /* 102 */
+        .long 0x00000000, 0x0000517C, 0xC1B72722  /* 103 */
+        .long 0x00000000, 0x0000A2F9, 0x836E4E44  /* 104 */
+        .long 0x00000000, 0x000145F3, 0x06DC9C88  /* 105 */
+        .long 0x00000000, 0x00028BE6, 0x0DB93910  /* 106 */
+        .long 0x00000000, 0x000517CC, 0x1B727220  /* 107 */
+        .long 0x00000000, 0x000A2F98, 0x36E4E441  /* 108 */
+        .long 0x00000000, 0x00145F30, 0x6DC9C882  /* 109 */
+        .long 0x00000000, 0x0028BE60, 0xDB939105  /* 110 */
+        .long 0x00000000, 0x00517CC1, 0xB727220A  /* 111 */
+        .long 0x00000000, 0x00A2F983, 0x6E4E4415  /* 112 */
+        .long 0x00000000, 0x0145F306, 0xDC9C882A  /* 113 */
+        .long 0x00000000, 0x028BE60D, 0xB9391054  /* 114 */
+        .long 0x00000000, 0x0517CC1B, 0x727220A9  /* 115 */
+        .long 0x00000000, 0x0A2F9836, 0xE4E44152  /* 116 */
+        .long 0x00000000, 0x145F306D, 0xC9C882A5  /* 117 */
+        .long 0x00000000, 0x28BE60DB, 0x9391054A  /* 118 */
+        .long 0x00000000, 0x517CC1B7, 0x27220A94  /* 119 */
+        .long 0x00000000, 0xA2F9836E, 0x4E441529  /* 120 */
+        .long 0x00000001, 0x45F306DC, 0x9C882A53  /* 121 */
+        .long 0x00000002, 0x8BE60DB9, 0x391054A7  /* 122 */
+        .long 0x00000005, 0x17CC1B72, 0x7220A94F  /* 123 */
+        .long 0x0000000A, 0x2F9836E4, 0xE441529F  /* 124 */
+        .long 0x00000014, 0x5F306DC9, 0xC882A53F  /* 125 */
+        .long 0x00000028, 0xBE60DB93, 0x91054A7F  /* 126 */
+        .long 0x00000051, 0x7CC1B727, 0x220A94FE  /* 127 */
+        .long 0x000000A2, 0xF9836E4E, 0x441529FC  /* 128 */
+        .long 0x00000145, 0xF306DC9C, 0x882A53F8  /* 129 */
+        .long 0x0000028B, 0xE60DB939, 0x1054A7F0  /* 130 */
+        .long 0x00000517, 0xCC1B7272, 0x20A94FE1  /* 131 */
+        .long 0x00000A2F, 0x9836E4E4, 0x41529FC2  /* 132 */
+        .long 0x0000145F, 0x306DC9C8, 0x82A53F84  /* 133 */
+        .long 0x000028BE, 0x60DB9391, 0x054A7F09  /* 134 */
+        .long 0x0000517C, 0xC1B72722, 0x0A94FE13  /* 135 */
+        .long 0x0000A2F9, 0x836E4E44, 0x1529FC27  /* 136 */
+        .long 0x000145F3, 0x06DC9C88, 0x2A53F84E  /* 137 */
+        .long 0x00028BE6, 0x0DB93910, 0x54A7F09D  /* 138 */
+        .long 0x000517CC, 0x1B727220, 0xA94FE13A  /* 139 */
+        .long 0x000A2F98, 0x36E4E441, 0x529FC275  /* 140 */
+        .long 0x00145F30, 0x6DC9C882, 0xA53F84EA  /* 141 */
+        .long 0x0028BE60, 0xDB939105, 0x4A7F09D5  /* 142 */
+        .long 0x00517CC1, 0xB727220A, 0x94FE13AB  /* 143 */
+        .long 0x00A2F983, 0x6E4E4415, 0x29FC2757  /* 144 */
+        .long 0x0145F306, 0xDC9C882A, 0x53F84EAF  /* 145 */
+        .long 0x028BE60D, 0xB9391054, 0xA7F09D5F  /* 146 */
+        .long 0x0517CC1B, 0x727220A9, 0x4FE13ABE  /* 147 */
+        .long 0x0A2F9836, 0xE4E44152, 0x9FC2757D  /* 148 */
+        .long 0x145F306D, 0xC9C882A5, 0x3F84EAFA  /* 149 */
+        .long 0x28BE60DB, 0x9391054A, 0x7F09D5F4  /* 150 */
+        .long 0x517CC1B7, 0x27220A94, 0xFE13ABE8  /* 151 */
+        .long 0xA2F9836E, 0x4E441529, 0xFC2757D1  /* 152 */
+        .long 0x45F306DC, 0x9C882A53, 0xF84EAFA3  /* 153 */
+        .long 0x8BE60DB9, 0x391054A7, 0xF09D5F47  /* 154 */
+        .long 0x17CC1B72, 0x7220A94F, 0xE13ABE8F  /* 155 */
+        .long 0x2F9836E4, 0xE441529F, 0xC2757D1F  /* 156 */
+        .long 0x5F306DC9, 0xC882A53F, 0x84EAFA3E  /* 157 */
+        .long 0xBE60DB93, 0x91054A7F, 0x09D5F47D  /* 158 */
+        .long 0x7CC1B727, 0x220A94FE, 0x13ABE8FA  /* 159 */
+        .long 0xF9836E4E, 0x441529FC, 0x2757D1F5  /* 160 */
+        .long 0xF306DC9C, 0x882A53F8, 0x4EAFA3EA  /* 161 */
+        .long 0xE60DB939, 0x1054A7F0, 0x9D5F47D4  /* 162 */
+        .long 0xCC1B7272, 0x20A94FE1, 0x3ABE8FA9  /* 163 */
+        .long 0x9836E4E4, 0x41529FC2, 0x757D1F53  /* 164 */
+        .long 0x306DC9C8, 0x82A53F84, 0xEAFA3EA6  /* 165 */
+        .long 0x60DB9391, 0x054A7F09, 0xD5F47D4D  /* 166 */
+        .long 0xC1B72722, 0x0A94FE13, 0xABE8FA9A  /* 167 */
+        .long 0x836E4E44, 0x1529FC27, 0x57D1F534  /* 168 */
+        .long 0x06DC9C88, 0x2A53F84E, 0xAFA3EA69  /* 169 */
+        .long 0x0DB93910, 0x54A7F09D, 0x5F47D4D3  /* 170 */
+        .long 0x1B727220, 0xA94FE13A, 0xBE8FA9A6  /* 171 */
+        .long 0x36E4E441, 0x529FC275, 0x7D1F534D  /* 172 */
+        .long 0x6DC9C882, 0xA53F84EA, 0xFA3EA69B  /* 173 */
+        .long 0xDB939105, 0x4A7F09D5, 0xF47D4D37  /* 174 */
+        .long 0xB727220A, 0x94FE13AB, 0xE8FA9A6E  /* 175 */
+        .long 0x6E4E4415, 0x29FC2757, 0xD1F534DD  /* 176 */
+        .long 0xDC9C882A, 0x53F84EAF, 0xA3EA69BB  /* 177 */
+        .long 0xB9391054, 0xA7F09D5F, 0x47D4D377  /* 178 */
+        .long 0x727220A9, 0x4FE13ABE, 0x8FA9A6EE  /* 179 */
+        .long 0xE4E44152, 0x9FC2757D, 0x1F534DDC  /* 180 */
+        .long 0xC9C882A5, 0x3F84EAFA, 0x3EA69BB8  /* 181 */
+        .long 0x9391054A, 0x7F09D5F4, 0x7D4D3770  /* 182 */
+        .long 0x27220A94, 0xFE13ABE8, 0xFA9A6EE0  /* 183 */
+        .long 0x4E441529, 0xFC2757D1, 0xF534DDC0  /* 184 */
+        .long 0x9C882A53, 0xF84EAFA3, 0xEA69BB81  /* 185 */
+        .long 0x391054A7, 0xF09D5F47, 0xD4D37703  /* 186 */
+        .long 0x7220A94F, 0xE13ABE8F, 0xA9A6EE06  /* 187 */
+        .long 0xE441529F, 0xC2757D1F, 0x534DDC0D  /* 188 */
+        .long 0xC882A53F, 0x84EAFA3E, 0xA69BB81B  /* 189 */
+        .long 0x91054A7F, 0x09D5F47D, 0x4D377036  /* 190 */
+        .long 0x220A94FE, 0x13ABE8FA, 0x9A6EE06D  /* 191 */
+        .long 0x441529FC, 0x2757D1F5, 0x34DDC0DB  /* 192 */
+        .long 0x882A53F8, 0x4EAFA3EA, 0x69BB81B6  /* 193 */
+        .long 0x1054A7F0, 0x9D5F47D4, 0xD377036D  /* 194 */
+        .long 0x20A94FE1, 0x3ABE8FA9, 0xA6EE06DB  /* 195 */
+        .long 0x41529FC2, 0x757D1F53, 0x4DDC0DB6  /* 196 */
+        .long 0x82A53F84, 0xEAFA3EA6, 0x9BB81B6C  /* 197 */
+        .long 0x054A7F09, 0xD5F47D4D, 0x377036D8  /* 198 */
+        .long 0x0A94FE13, 0xABE8FA9A, 0x6EE06DB1  /* 199 */
+        .long 0x1529FC27, 0x57D1F534, 0xDDC0DB62  /* 200 */
+        .long 0x2A53F84E, 0xAFA3EA69, 0xBB81B6C5  /* 201 */
+        .long 0x54A7F09D, 0x5F47D4D3, 0x77036D8A  /* 202 */
+        .long 0xA94FE13A, 0xBE8FA9A6, 0xEE06DB14  /* 203 */
+        .long 0x529FC275, 0x7D1F534D, 0xDC0DB629  /* 204 */
+        .long 0xA53F84EA, 0xFA3EA69B, 0xB81B6C52  /* 205 */
+        .long 0x4A7F09D5, 0xF47D4D37, 0x7036D8A5  /* 206 */
+        .long 0x94FE13AB, 0xE8FA9A6E, 0xE06DB14A  /* 207 */
+        .long 0x29FC2757, 0xD1F534DD, 0xC0DB6295  /* 208 */
+        .long 0x53F84EAF, 0xA3EA69BB, 0x81B6C52B  /* 209 */
+        .long 0xA7F09D5F, 0x47D4D377, 0x036D8A56  /* 210 */
+        .long 0x4FE13ABE, 0x8FA9A6EE, 0x06DB14AC  /* 211 */
+        .long 0x9FC2757D, 0x1F534DDC, 0x0DB62959  /* 212 */
+        .long 0x3F84EAFA, 0x3EA69BB8, 0x1B6C52B3  /* 213 */
+        .long 0x7F09D5F4, 0x7D4D3770, 0x36D8A566  /* 214 */
+        .long 0xFE13ABE8, 0xFA9A6EE0, 0x6DB14ACC  /* 215 */
+        .long 0xFC2757D1, 0xF534DDC0, 0xDB629599  /* 216 */
+        .long 0xF84EAFA3, 0xEA69BB81, 0xB6C52B32  /* 217 */
+        .long 0xF09D5F47, 0xD4D37703, 0x6D8A5664  /* 218 */
+        .long 0xE13ABE8F, 0xA9A6EE06, 0xDB14ACC9  /* 219 */
+        .long 0xC2757D1F, 0x534DDC0D, 0xB6295993  /* 220 */
+        .long 0x84EAFA3E, 0xA69BB81B, 0x6C52B327  /* 221 */
+        .long 0x09D5F47D, 0x4D377036, 0xD8A5664F  /* 222 */
+        .long 0x13ABE8FA, 0x9A6EE06D, 0xB14ACC9E  /* 223 */
+        .long 0x2757D1F5, 0x34DDC0DB, 0x6295993C  /* 224 */
+        .long 0x4EAFA3EA, 0x69BB81B6, 0xC52B3278  /* 225 */
+        .long 0x9D5F47D4, 0xD377036D, 0x8A5664F1  /* 226 */
+        .long 0x3ABE8FA9, 0xA6EE06DB, 0x14ACC9E2  /* 227 */
+        .long 0x757D1F53, 0x4DDC0DB6, 0x295993C4  /* 228 */
+        .long 0xEAFA3EA6, 0x9BB81B6C, 0x52B32788  /* 229 */
+        .long 0xD5F47D4D, 0x377036D8, 0xA5664F10  /* 230 */
+        .long 0xABE8FA9A, 0x6EE06DB1, 0x4ACC9E21  /* 231 */
+        .long 0x57D1F534, 0xDDC0DB62, 0x95993C43  /* 232 */
+        .long 0xAFA3EA69, 0xBB81B6C5, 0x2B327887  /* 233 */
+        .long 0x5F47D4D3, 0x77036D8A, 0x5664F10E  /* 234 */
+        .long 0xBE8FA9A6, 0xEE06DB14, 0xACC9E21C  /* 235 */
+        .long 0x7D1F534D, 0xDC0DB629, 0x5993C439  /* 236 */
+        .long 0xFA3EA69B, 0xB81B6C52, 0xB3278872  /* 237 */
+        .long 0xF47D4D37, 0x7036D8A5, 0x664F10E4  /* 238 */
+        .long 0xE8FA9A6E, 0xE06DB14A, 0xCC9E21C8  /* 239 */
+        .long 0xD1F534DD, 0xC0DB6295, 0x993C4390  /* 240 */
+        .long 0xA3EA69BB, 0x81B6C52B, 0x32788720  /* 241 */
+        .long 0x47D4D377, 0x036D8A56, 0x64F10E41  /* 242 */
+        .long 0x8FA9A6EE, 0x06DB14AC, 0xC9E21C82  /* 243 */
+        .long 0x1F534DDC, 0x0DB62959, 0x93C43904  /* 244 */
+        .long 0x3EA69BB8, 0x1B6C52B3, 0x27887208  /* 245 */
+        .long 0x7D4D3770, 0x36D8A566, 0x4F10E410  /* 246 */
+        .long 0xFA9A6EE0, 0x6DB14ACC, 0x9E21C820  /* 247 */
+        .long 0xF534DDC0, 0xDB629599, 0x3C439041  /* 248 */
+        .long 0xEA69BB81, 0xB6C52B32, 0x78872083  /* 249 */
+        .long 0xD4D37703, 0x6D8A5664, 0xF10E4107  /* 250 */
+        .long 0xA9A6EE06, 0xDB14ACC9, 0xE21C820F  /* 251 */
+        .long 0x534DDC0D, 0xB6295993, 0xC439041F  /* 252 */
+        .long 0xA69BB81B, 0x6C52B327, 0x8872083F  /* 253 */
+        .long 0x4D377036, 0xD8A5664F, 0x10E4107F  /* 254 */
+        .long 0x9A6EE06D, 0xB14ACC9E, 0x21C820FF /* 255 */
+        .align 64
+        .type	__svml_stan_reduction_data_internal,@object
+        .size	__svml_stan_reduction_data_internal,.-__svml_stan_reduction_data_internal
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf4_core-sse2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf4_core-sse2.S
@ -0,0 +1,20 @@
+/* SSE2 version of vectorized tanf, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4v_tanf _ZGVbN4v_tanf_sse2
+#include "../svml_s_tanf4_core.S"
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf4_core.c
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf4_core.c
@ -0,0 +1,28 @@
+/* Multiple versions of vectorized tanf, vector length is 4.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4v_tanf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_tanf, __GI__ZGVbN4v_tanf,
+	       __redirect__ZGVbN4v_tanf)
+  __attribute__ ((visibility ("hidden")));
+#endif
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf4_core_sse4.S
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf8_core-sse.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf8_core-sse.S
@ -0,0 +1,20 @@
+/* SSE version of vectorized tanf, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8v_tanf _ZGVdN8v_tanf_sse_wrapper
+#include "../svml_s_tanf8_core.S"
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf8_core.c
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf8_core.c
@ -0,0 +1,28 @@
+/* Multiple versions of vectorized tanf, vector length is 8.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8v_tanf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_tanf, __GI__ZGVdN8v_tanf,
+	       __redirect__ZGVdN8v_tanf)
+  __attribute__ ((visibility ("hidden")));
+#endif
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_tanf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_tanf8_core_avx2.S
--- a/sysdeps/x86_64/fpu/svml_d_tan2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_tan2_core.S
@ -0,0 +1,29 @@
+/* Function tan vectorized with SSE2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVbN2v_tan)
+WRAPPER_IMPL_SSE2 tan
+END (_ZGVbN2v_tan)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN2v_tan)
+#endif
--- a/sysdeps/x86_64/fpu/svml_d_tan4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_tan4_core.S
@ -0,0 +1,29 @@
+/* Function tan vectorized with AVX2, wrapper version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVdN4v_tan)
+WRAPPER_IMPL_AVX _ZGVbN2v_tan
+END (_ZGVdN4v_tan)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN4v_tan)
+#endif
--- a/sysdeps/x86_64/fpu/svml_d_tan4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_tan4_core_avx.S
@ -0,0 +1,25 @@
+/* Function tan vectorized in AVX ISA as wrapper to SSE4 ISA version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVcN4v_tan)
+WRAPPER_IMPL_AVX _ZGVbN2v_tan
+END (_ZGVcN4v_tan)
--- a/sysdeps/x86_64/fpu/svml_d_tan8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_tan8_core.S
@ -0,0 +1,25 @@
+/* Function tan vectorized with AVX-512, wrapper to AVX2.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_d_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVeN8v_tan)
+WRAPPER_IMPL_AVX512 _ZGVdN4v_tan
+END (_ZGVeN8v_tan)
--- a/sysdeps/x86_64/fpu/svml_s_tanf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_tanf16_core.S
@ -0,0 +1,25 @@
+/* Function tanf vectorized with AVX-512. Wrapper to AVX2 version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVeN16v_tanf)
+WRAPPER_IMPL_AVX512 _ZGVdN8v_tanf
+END (_ZGVeN16v_tanf)
--- a/sysdeps/x86_64/fpu/svml_s_tanf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_tanf4_core.S
@ -0,0 +1,29 @@
+/* Function tanf vectorized with SSE2, wrapper version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVbN4v_tanf)
+WRAPPER_IMPL_SSE2 tanf
+END (_ZGVbN4v_tanf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN4v_tanf)
+#endif
--- a/sysdeps/x86_64/fpu/svml_s_tanf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_tanf8_core.S
@ -0,0 +1,29 @@
+/* Function tanf vectorized with AVX2, wrapper version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVdN8v_tanf)
+WRAPPER_IMPL_AVX _ZGVbN4v_tanf
+END (_ZGVdN8v_tanf)
+
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN8v_tanf)
+#endif
--- a/sysdeps/x86_64/fpu/svml_s_tanf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_tanf8_core_avx.S
@ -0,0 +1,25 @@
+/* Function tanf vectorized in AVX ISA as wrapper to SSE4 ISA version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
+
+        .text
+ENTRY (_ZGVcN8v_tanf)
+WRAPPER_IMPL_AVX _ZGVbN4v_tanf
+END (_ZGVcN8v_tanf)
--- a/sysdeps/x86_64/fpu/test-double-libmvec-tan-avx.c
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-tan-avx.c
@ -0,0 +1 @@
+#include "test-double-libmvec-tan.c"
--- a/sysdeps/x86_64/fpu/test-double-libmvec-tan-avx2.c
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-tan-avx2.c
@ -0,0 +1 @@
+#include "test-double-libmvec-tan.c"
--- a/sysdeps/x86_64/fpu/test-double-libmvec-tan-avx512f.c
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-tan-avx512f.c
@ -0,0 +1 @@
+#include "test-double-libmvec-tan.c"
--- a/sysdeps/x86_64/fpu/test-double-libmvec-tan.c
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-tan.c
@ -0,0 +1,3 @@
+#define LIBMVEC_TYPE double
+#define LIBMVEC_FUNC tan
+#include "test-vector-abi-arg1.h"
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVbN2v_erf)
 VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVbN2v_tanh)
 VECTOR_WRAPPER (WRAPPER_NAME (asinh), _ZGVbN2v_asinh)
 VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVbN2v_erfc)
+VECTOR_WRAPPER (WRAPPER_NAME (tan), _ZGVbN2v_tan)

 #define VEC_INT_TYPE __m128i

--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@ -50,6 +50,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVdN4v_erf)
 VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVdN4v_tanh)
 VECTOR_WRAPPER (WRAPPER_NAME (asinh), _ZGVdN4v_asinh)
 VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVdN4v_erfc)
+VECTOR_WRAPPER (WRAPPER_NAME (tan), _ZGVdN4v_tan)

 #ifndef __ILP32__
 # define VEC_INT_TYPE __m256i
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVcN4v_erf)
 VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVcN4v_tanh)
 VECTOR_WRAPPER (WRAPPER_NAME (asinh), _ZGVcN4v_asinh)
 VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVcN4v_erfc)
+VECTOR_WRAPPER (WRAPPER_NAME (tan), _ZGVcN4v_tan)

 #define VEC_INT_TYPE __m128i

--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erf), _ZGVeN8v_erf)
 VECTOR_WRAPPER (WRAPPER_NAME (tanh), _ZGVeN8v_tanh)
 VECTOR_WRAPPER (WRAPPER_NAME (asinh), _ZGVeN8v_asinh)
 VECTOR_WRAPPER (WRAPPER_NAME (erfc), _ZGVeN8v_erfc)
+VECTOR_WRAPPER (WRAPPER_NAME (tan), _ZGVeN8v_tan)

 #ifndef __ILP32__
 # define VEC_INT_TYPE __m512i
--- a/sysdeps/x86_64/fpu/test-float-libmvec-tanf-avx.c
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-tanf-avx.c
@ -0,0 +1 @@
+#include "test-float-libmvec-tanf.c"
--- a/sysdeps/x86_64/fpu/test-float-libmvec-tanf-avx2.c
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-tanf-avx2.c
@ -0,0 +1 @@
+#include "test-float-libmvec-tanf.c"
--- a/sysdeps/x86_64/fpu/test-float-libmvec-tanf-avx512f.c
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-tanf-avx512f.c
@ -0,0 +1 @@
+#include "test-float-libmvec-tanf.c"
--- a/sysdeps/x86_64/fpu/test-float-libmvec-tanf.c
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-tanf.c
@ -0,0 +1,3 @@
+#define LIBMVEC_TYPE float
+#define LIBMVEC_FUNC tanf
+#include "test-vector-abi-arg1.h"
--- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVeN16v_erff)
 VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVeN16v_tanhf)
 VECTOR_WRAPPER (WRAPPER_NAME (asinhf), _ZGVeN16v_asinhf)
 VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVeN16v_erfcf)
+VECTOR_WRAPPER (WRAPPER_NAME (tanf), _ZGVeN16v_tanf)

 #define VEC_INT_TYPE __m512i

--- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVbN4v_erff)
 VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVbN4v_tanhf)
 VECTOR_WRAPPER (WRAPPER_NAME (asinhf), _ZGVbN4v_asinhf)
 VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVbN4v_erfcf)
+VECTOR_WRAPPER (WRAPPER_NAME (tanf), _ZGVbN4v_tanf)

 #define VEC_INT_TYPE __m128i

--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@ -50,6 +50,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVdN8v_erff)
 VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVdN8v_tanhf)
 VECTOR_WRAPPER (WRAPPER_NAME (asinhf), _ZGVdN8v_asinhf)
 VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVdN8v_erfcf)
+VECTOR_WRAPPER (WRAPPER_NAME (tanf), _ZGVdN8v_tanf)

 /* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf.  */
 #undef VECTOR_WRAPPER_fFF
--- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@ -47,6 +47,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (erff), _ZGVcN8v_erff)
 VECTOR_WRAPPER (WRAPPER_NAME (tanhf), _ZGVcN8v_tanhf)
 VECTOR_WRAPPER (WRAPPER_NAME (asinhf), _ZGVcN8v_asinhf)
 VECTOR_WRAPPER (WRAPPER_NAME (erfcf), _ZGVcN8v_erfcf)
+VECTOR_WRAPPER (WRAPPER_NAME (tanf), _ZGVcN8v_tanf)

 #define VEC_INT_TYPE __m128i