x86-64: Add vector log10/log10f implementation to libmvec

Implement vectorized log10/log10f containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI.  It also contains
accuracy and ABI tests for vector log10/log10f with regenerated ulps.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
Sunil K Pandey 2021-12-29 09:23:37 -08:00
parent 2941a24f8c
commit 8f8566026d
50 changed files with 3758 additions and 1 deletions

View File

@ -219,4 +219,15 @@
#define __DECL_SIMD_atan2f32x
#define __DECL_SIMD_atan2f64x
#define __DECL_SIMD_atan2f128x
#define __DECL_SIMD_log10
#define __DECL_SIMD_log10f
#define __DECL_SIMD_log10l
#define __DECL_SIMD_log10f16
#define __DECL_SIMD_log10f32
#define __DECL_SIMD_log10f64
#define __DECL_SIMD_log10f128
#define __DECL_SIMD_log10f32x
#define __DECL_SIMD_log10f64x
#define __DECL_SIMD_log10f128x
#endif

View File

@ -104,7 +104,7 @@ __MATHCALL (ldexp,, (_Mdouble_ __x, int __exponent));
__MATHCALL_VEC (log,, (_Mdouble_ __x));
/* Base-ten logarithm of X. */
__MATHCALL (log10,, (_Mdouble_ __x));
__MATHCALL_VEC (log10,, (_Mdouble_ __x));
/* Break VALUE into integral and fractional parts. */
__MATHCALL (modf,, (_Mdouble_ __x, _Mdouble_ *__iptr)) __nonnull ((2));

View File

@ -54,6 +54,7 @@ GLIBC_2.35 _ZGVbN2v_cosh F
GLIBC_2.35 _ZGVbN2v_exp10 F
GLIBC_2.35 _ZGVbN2v_exp2 F
GLIBC_2.35 _ZGVbN2v_expm1 F
GLIBC_2.35 _ZGVbN2v_log10 F
GLIBC_2.35 _ZGVbN2v_sinh F
GLIBC_2.35 _ZGVbN2vv_atan2 F
GLIBC_2.35 _ZGVbN2vv_hypot F
@ -65,6 +66,7 @@ GLIBC_2.35 _ZGVbN4v_coshf F
GLIBC_2.35 _ZGVbN4v_exp10f F
GLIBC_2.35 _ZGVbN4v_exp2f F
GLIBC_2.35 _ZGVbN4v_expm1f F
GLIBC_2.35 _ZGVbN4v_log10f F
GLIBC_2.35 _ZGVbN4v_sinhf F
GLIBC_2.35 _ZGVbN4vv_atan2f F
GLIBC_2.35 _ZGVbN4vv_hypotf F
@ -76,6 +78,7 @@ GLIBC_2.35 _ZGVcN4v_cosh F
GLIBC_2.35 _ZGVcN4v_exp10 F
GLIBC_2.35 _ZGVcN4v_exp2 F
GLIBC_2.35 _ZGVcN4v_expm1 F
GLIBC_2.35 _ZGVcN4v_log10 F
GLIBC_2.35 _ZGVcN4v_sinh F
GLIBC_2.35 _ZGVcN4vv_atan2 F
GLIBC_2.35 _ZGVcN4vv_hypot F
@ -87,6 +90,7 @@ GLIBC_2.35 _ZGVcN8v_coshf F
GLIBC_2.35 _ZGVcN8v_exp10f F
GLIBC_2.35 _ZGVcN8v_exp2f F
GLIBC_2.35 _ZGVcN8v_expm1f F
GLIBC_2.35 _ZGVcN8v_log10f F
GLIBC_2.35 _ZGVcN8v_sinhf F
GLIBC_2.35 _ZGVcN8vv_atan2f F
GLIBC_2.35 _ZGVcN8vv_hypotf F
@ -98,6 +102,7 @@ GLIBC_2.35 _ZGVdN4v_cosh F
GLIBC_2.35 _ZGVdN4v_exp10 F
GLIBC_2.35 _ZGVdN4v_exp2 F
GLIBC_2.35 _ZGVdN4v_expm1 F
GLIBC_2.35 _ZGVdN4v_log10 F
GLIBC_2.35 _ZGVdN4v_sinh F
GLIBC_2.35 _ZGVdN4vv_atan2 F
GLIBC_2.35 _ZGVdN4vv_hypot F
@ -109,6 +114,7 @@ GLIBC_2.35 _ZGVdN8v_coshf F
GLIBC_2.35 _ZGVdN8v_exp10f F
GLIBC_2.35 _ZGVdN8v_exp2f F
GLIBC_2.35 _ZGVdN8v_expm1f F
GLIBC_2.35 _ZGVdN8v_log10f F
GLIBC_2.35 _ZGVdN8v_sinhf F
GLIBC_2.35 _ZGVdN8vv_atan2f F
GLIBC_2.35 _ZGVdN8vv_hypotf F
@ -120,6 +126,7 @@ GLIBC_2.35 _ZGVeN16v_coshf F
GLIBC_2.35 _ZGVeN16v_exp10f F
GLIBC_2.35 _ZGVeN16v_exp2f F
GLIBC_2.35 _ZGVeN16v_expm1f F
GLIBC_2.35 _ZGVeN16v_log10f F
GLIBC_2.35 _ZGVeN16v_sinhf F
GLIBC_2.35 _ZGVeN16vv_atan2f F
GLIBC_2.35 _ZGVeN16vv_hypotf F
@ -131,6 +138,7 @@ GLIBC_2.35 _ZGVeN8v_cosh F
GLIBC_2.35 _ZGVeN8v_exp10 F
GLIBC_2.35 _ZGVeN8v_exp2 F
GLIBC_2.35 _ZGVeN8v_expm1 F
GLIBC_2.35 _ZGVeN8v_log10 F
GLIBC_2.35 _ZGVeN8v_sinh F
GLIBC_2.35 _ZGVeN8vv_atan2 F
GLIBC_2.35 _ZGVeN8vv_hypot F

View File

@ -102,6 +102,10 @@
# define __DECL_SIMD_atan2 __DECL_SIMD_x86_64
# undef __DECL_SIMD_atan2f
# define __DECL_SIMD_atan2f __DECL_SIMD_x86_64
# undef __DECL_SIMD_log10
# define __DECL_SIMD_log10 __DECL_SIMD_x86_64
# undef __DECL_SIMD_log10f
# define __DECL_SIMD_log10f __DECL_SIMD_x86_64
# endif
#endif

View File

@ -50,6 +50,8 @@
!GCC$ builtin (cbrtf) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (atan2) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (atan2f) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log10) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log10f) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@ -85,3 +87,5 @@
!GCC$ builtin (cbrtf) attributes simd (notinbranch) if('x32')
!GCC$ builtin (atan2) attributes simd (notinbranch) if('x32')
!GCC$ builtin (atan2f) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log10) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log10f) attributes simd (notinbranch) if('x32')

View File

@ -35,6 +35,7 @@ libmvec-funcs = \
expm1 \
hypot \
log \
log10 \
pow \
sin \
sincos \

View File

@ -22,6 +22,7 @@ libmvec {
_ZGVbN2v_exp10; _ZGVcN4v_exp10; _ZGVdN4v_exp10; _ZGVeN8v_exp10;
_ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2;
_ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1;
_ZGVbN2v_log10; _ZGVcN4v_log10; _ZGVdN4v_log10; _ZGVeN8v_log10;
_ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh;
_ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2;
_ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot;
@ -33,6 +34,7 @@ libmvec {
_ZGVbN4v_exp10f; _ZGVcN8v_exp10f; _ZGVdN8v_exp10f; _ZGVeN16v_exp10f;
_ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f;
_ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f;
_ZGVbN4v_log10f; _ZGVcN8v_log10f; _ZGVdN8v_log10f; _ZGVeN16v_log10f;
_ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf;
_ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f;
_ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf;

View File

@ -1641,6 +1641,26 @@ float: 2
float128: 1
ldouble: 1
Function: "log10_vlen16":
float: 1
Function: "log10_vlen2":
double: 1
Function: "log10_vlen4":
double: 1
float: 1
Function: "log10_vlen4_avx2":
double: 1
Function: "log10_vlen8":
double: 1
float: 1
Function: "log10_vlen8_avx2":
float: 1
Function: "log1p":
double: 1
float: 1

View File

@ -0,0 +1,20 @@
/* SSE2 version of vectorized log10, vector length is 2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVbN2v_log10 _ZGVbN2v_log10_sse2
#include "../svml_d_log102_core.S"

View File

@ -0,0 +1,27 @@
/* Multiple versions of vectorized log10, vector length is 2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVbN2v_log10
#include "ifunc-mathvec-sse4_1.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVbN2v_log10, __GI__ZGVbN2v_log10, __redirect__ZGVbN2v_log10)
__attribute__ ((visibility ("hidden")));
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
/* SSE version of vectorized log10, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVdN4v_log10 _ZGVdN4v_log10_sse_wrapper
#include "../svml_d_log104_core.S"

View File

@ -0,0 +1,27 @@
/* Multiple versions of vectorized log10, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVdN4v_log10
#include "ifunc-mathvec-avx2.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVdN4v_log10, __GI__ZGVdN4v_log10, __redirect__ZGVdN4v_log10)
__attribute__ ((visibility ("hidden")));
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
/* AVX2 version of vectorized log10, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVeN8v_log10 _ZGVeN8v_log10_avx2_wrapper
#include "../svml_d_log108_core.S"

View File

@ -0,0 +1,27 @@
/* Multiple versions of vectorized log10, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVeN8v_log10
#include "ifunc-mathvec-avx512-skx.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVeN8v_log10, __GI__ZGVeN8v_log10, __redirect__ZGVeN8v_log10)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,299 @@
/* Function log10 vectorized with AVX-512.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log10(x) = k*log10(2.0) - log10(Rcp) + poly_approximation(R)
* log10(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_dlog10_data_internal_avx512
*/
#define Log_tbl 0
#define One 128
#define C075 192
#define poly_coeff9 256
#define poly_coeff8 320
#define poly_coeff7 384
#define poly_coeff6 448
#define poly_coeff5 512
#define poly_coeff4 576
#define poly_coeff3 640
#define poly_coeff2 704
#define poly_coeff1 768
#define L2 832
#include <sysdep.h>
.text
.section .text.evex512,"ax",@progbits
ENTRY(_ZGVeN8v_log10_skx)
pushq %rbp
cfi_def_cfa_offset(16)
movq %rsp, %rbp
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
andq $-64, %rsp
subq $192, %rsp
vmovaps %zmm0, %zmm7
vgetmantpd $8, {sae}, %zmm7, %zmm6
vmovups One+__svml_dlog10_data_internal_avx512(%rip), %zmm3
vmovups poly_coeff5+__svml_dlog10_data_internal_avx512(%rip), %zmm12
vmovups poly_coeff3+__svml_dlog10_data_internal_avx512(%rip), %zmm13
/* Start polynomial evaluation */
vmovups poly_coeff9+__svml_dlog10_data_internal_avx512(%rip), %zmm10
vmovups poly_coeff8+__svml_dlog10_data_internal_avx512(%rip), %zmm1
vmovups poly_coeff7+__svml_dlog10_data_internal_avx512(%rip), %zmm11
vmovups poly_coeff6+__svml_dlog10_data_internal_avx512(%rip), %zmm14
/* Prepare exponent correction: DblRcp<0.75? */
vmovups C075+__svml_dlog10_data_internal_avx512(%rip), %zmm2
/* Table lookup */
vmovups __svml_dlog10_data_internal_avx512(%rip), %zmm5
/* GetExp(x) */
vgetexppd {sae}, %zmm7, %zmm0
/* DblRcp ~ 1/Mantissa */
vrcp14pd %zmm6, %zmm8
/* x<=0? */
vfpclasspd $94, %zmm7, %k0
/* round DblRcp to 4 fractional bits (RN mode, no Precision exception) */
vrndscalepd $88, {sae}, %zmm8, %zmm4
vmovups poly_coeff4+__svml_dlog10_data_internal_avx512(%rip), %zmm8
kmovw %k0, %edx
/* Reduced argument: R = DblRcp*Mantissa - 1 */
vfmsub213pd {rn-sae}, %zmm3, %zmm4, %zmm6
vcmppd $17, {sae}, %zmm2, %zmm4, %k1
vfmadd231pd {rn-sae}, %zmm6, %zmm12, %zmm8
vmovups poly_coeff2+__svml_dlog10_data_internal_avx512(%rip), %zmm12
vfmadd231pd {rn-sae}, %zmm6, %zmm10, %zmm1
vfmadd231pd {rn-sae}, %zmm6, %zmm11, %zmm14
vmovups poly_coeff1+__svml_dlog10_data_internal_avx512(%rip), %zmm2
/* R^2 */
vmulpd {rn-sae}, %zmm6, %zmm6, %zmm15
vfmadd231pd {rn-sae}, %zmm6, %zmm13, %zmm12
/* Prepare table index */
vpsrlq $48, %zmm4, %zmm9
/* add 1 to Expon if DblRcp<0.75 */
vaddpd {rn-sae}, %zmm3, %zmm0, %zmm0{%k1}
vmulpd {rn-sae}, %zmm15, %zmm15, %zmm13
vfmadd213pd {rn-sae}, %zmm14, %zmm15, %zmm1
vfmadd213pd {rn-sae}, %zmm12, %zmm15, %zmm8
vpermt2pd Log_tbl+64+__svml_dlog10_data_internal_avx512(%rip), %zmm9, %zmm5
/* polynomial */
vfmadd213pd {rn-sae}, %zmm8, %zmm13, %zmm1
vfmadd213pd {rn-sae}, %zmm2, %zmm6, %zmm1
vfmadd213pd {rn-sae}, %zmm5, %zmm1, %zmm6
vmovups L2+__svml_dlog10_data_internal_avx512(%rip), %zmm1
vfmadd213pd {rn-sae}, %zmm6, %zmm1, %zmm0
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r12 r13 r14 r15 edx zmm0 zmm7
/* Restore registers
* and exit the function
*/
L(EXIT):
movq %rbp, %rsp
popq %rbp
cfi_def_cfa(7, 8)
cfi_restore(6)
ret
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
vmovups %zmm7, 64(%rsp)
vmovups %zmm0, 128(%rsp)
# LOE rbx r12 r13 r14 r15 edx zmm0
xorl %eax, %eax
# LOE rbx r12 r13 r14 r15 eax edx
vzeroupper
movq %r12, 16(%rsp)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
movl %eax, %r12d
movq %r13, 8(%rsp)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
movl %edx, %r13d
movq %r14, (%rsp)
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $8, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
vmovups 128(%rsp), %zmm0
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r12 r13 r14 r15 zmm0
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp,%r14,8), %xmm0
call log10@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp,%r14,8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVeN8v_log10_skx)
.section .rodata, "a"
.align 64
#ifdef __svml_dlog10_data_internal_avx512_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(64)) VUINT32 Log_tbl[16][2];
__declspec(align(64)) VUINT32 One[8][2];
__declspec(align(64)) VUINT32 C075[8][2];
__declspec(align(64)) VUINT32 poly_coeff9[8][2];
__declspec(align(64)) VUINT32 poly_coeff8[8][2];
__declspec(align(64)) VUINT32 poly_coeff7[8][2];
__declspec(align(64)) VUINT32 poly_coeff6[8][2];
__declspec(align(64)) VUINT32 poly_coeff5[8][2];
__declspec(align(64)) VUINT32 poly_coeff4[8][2];
__declspec(align(64)) VUINT32 poly_coeff3[8][2];
__declspec(align(64)) VUINT32 poly_coeff2[8][2];
__declspec(align(64)) VUINT32 poly_coeff1[8][2];
__declspec(align(64)) VUINT32 L2[8][2];
} __svml_dlog10_data_internal_avx512;
#endif
__svml_dlog10_data_internal_avx512:
/*== Log_tbl ==*/
.quad 0x0000000000000000
.quad 0xbf9af5f92b00e610
.quad 0xbfaa30a9d609efea
.quad 0xbfb31b3055c47118
.quad 0xbfb8cf183886480d
.quad 0xbfbe3bc1ab0e19fe
.quad 0xbfc1b3e71ec94f7b
.quad 0xbfc42c7e7fe3fc02
.quad 0x3fbffbfc2bbc7803
.quad 0x3fbb721cd17157e3
.quad 0x3fb715d0ce367afc
.quad 0x3fb2e3a740b7800f
.quad 0x3fadb11ed766abf4
.quad 0x3fa5e3966b7e9295
.quad 0x3f9cb38fccd8bfdb
.quad 0x3f8c3d0837784c41
/*== One ==*/
.align 64
.quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
/*== 0.75 ==*/
.align 64
.quad 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000
/*== poly_coeff9 ==*/
.align 64
.quad 0x3fa8c2d828480370, 0x3fa8c2d828480370, 0x3fa8c2d828480370, 0x3fa8c2d828480370, 0x3fa8c2d828480370, 0x3fa8c2d828480370, 0x3fa8c2d828480370, 0x3fa8c2d828480370
/*== poly_coeff8 ==*/
.align 64
.quad 0xbfabd80d96029814, 0xbfabd80d96029814, 0xbfabd80d96029814, 0xbfabd80d96029814, 0xbfabd80d96029814, 0xbfabd80d96029814, 0xbfabd80d96029814, 0xbfabd80d96029814
/*== poly_coeff7 ==*/
.align 64
.quad 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2, 0x3fafc3f6f38b58a2
/*== poly_coeff6 ==*/
.align 64
.quad 0xbfb287a63464dc80, 0xbfb287a63464dc80, 0xbfb287a63464dc80, 0xbfb287a63464dc80, 0xbfb287a63464dc80, 0xbfb287a63464dc80, 0xbfb287a63464dc80, 0xbfb287a63464dc80
/*== poly_coeff5 ==*/
.align 64
.quad 0x3fb63c62777f27d9, 0x3fb63c62777f27d9, 0x3fb63c62777f27d9, 0x3fb63c62777f27d9, 0x3fb63c62777f27d9, 0x3fb63c62777f27d9, 0x3fb63c62777f27d9, 0x3fb63c62777f27d9
/*== poly_coeff4 ==*/
.align 64
.quad 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3, 0xbfbbcb7b153c06a3
/*== poly_coeff3 ==*/
.align 64
.quad 0x3fc287a7636f428c, 0x3fc287a7636f428c, 0x3fc287a7636f428c, 0x3fc287a7636f428c, 0x3fc287a7636f428c, 0x3fc287a7636f428c, 0x3fc287a7636f428c, 0x3fc287a7636f428c
/*== poly_coeff2 ==*/
.align 64
.quad 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db, 0xbfcbcb7b1526e4db
/*== poly_coeff1 ==*/
.align 64
.quad 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e, 0x3fdbcb7b1526e50e
/*== L2 ==*/
.align 64
.quad 0x3fd34413509f79ff, 0x3fd34413509f79ff, 0x3fd34413509f79ff, 0x3fd34413509f79ff, 0x3fd34413509f79ff, 0x3fd34413509f79ff, 0x3fd34413509f79ff, 0x3fd34413509f79ff
.align 64
.type __svml_dlog10_data_internal_avx512,@object
.size __svml_dlog10_data_internal_avx512,.-__svml_dlog10_data_internal_avx512

View File

@ -0,0 +1,20 @@
/* AVX2 version of vectorized log10f.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVeN16v_log10f _ZGVeN16v_log10f_avx2_wrapper
#include "../svml_s_log10f16_core.S"

View File

@ -0,0 +1,28 @@
/* Multiple versions of vectorized log10f, vector length is 16.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVeN16v_log10f
#include "ifunc-mathvec-avx512-skx.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVeN16v_log10f, __GI__ZGVeN16v_log10f,
__redirect__ZGVeN16v_log10f)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,238 @@
/* Function log10f vectorized with AVX-512.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log10(x) = k*log10(2.0) - log10(Rcp) + poly_approximation(R)
* log10(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_slog10_data_internal_avx512
*/
#define One 0
#define coeff4 64
#define coeff3 128
#define coeff2 192
#define coeff1 256
#define L2 320
#include <sysdep.h>
.text
.section .text.exex512,"ax",@progbits
ENTRY(_ZGVeN16v_log10f_skx)
pushq %rbp
cfi_def_cfa_offset(16)
movq %rsp, %rbp
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
andq $-64, %rsp
subq $192, %rsp
vgetmantps $11, {sae}, %zmm0, %zmm3
vmovups __svml_slog10_data_internal_avx512(%rip), %zmm1
vgetexpps {sae}, %zmm0, %zmm5
vmovups L2+__svml_slog10_data_internal_avx512(%rip), %zmm10
vpsrld $19, %zmm3, %zmm7
vgetexpps {sae}, %zmm3, %zmm6
vsubps {rn-sae}, %zmm1, %zmm3, %zmm11
vpermps coeff4+__svml_slog10_data_internal_avx512(%rip), %zmm7, %zmm1
vpermps coeff3+__svml_slog10_data_internal_avx512(%rip), %zmm7, %zmm2
vsubps {rn-sae}, %zmm6, %zmm5, %zmm9
vpermps coeff2+__svml_slog10_data_internal_avx512(%rip), %zmm7, %zmm4
vpermps coeff1+__svml_slog10_data_internal_avx512(%rip), %zmm7, %zmm8
/* x<=0? */
vfpclassps $94, %zmm0, %k0
vfmadd213ps {rn-sae}, %zmm2, %zmm11, %zmm1
vmulps {rn-sae}, %zmm10, %zmm9, %zmm12
vfmadd213ps {rn-sae}, %zmm4, %zmm11, %zmm1
kmovw %k0, %edx
vfmadd213ps {rn-sae}, %zmm8, %zmm11, %zmm1
vfmadd213ps {rn-sae}, %zmm12, %zmm11, %zmm1
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r12 r13 r14 r15 edx zmm0 zmm1
/* Restore registers
* and exit the function
*/
L(EXIT):
vmovaps %zmm1, %zmm0
movq %rbp, %rsp
popq %rbp
cfi_def_cfa(7, 8)
cfi_restore(6)
ret
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
vmovups %zmm0, 64(%rsp)
vmovups %zmm1, 128(%rsp)
# LOE rbx r12 r13 r14 r15 edx zmm1
xorl %eax, %eax
# LOE rbx r12 r13 r14 r15 eax edx
vzeroupper
movq %r12, 16(%rsp)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
movl %eax, %r12d
movq %r13, 8(%rsp)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
movl %edx, %r13d
movq %r14, (%rsp)
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $16, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
vmovups 128(%rsp), %zmm1
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r12 r13 r14 r15 zmm1
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp,%r14,4), %xmm0
call log10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp,%r14,4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVeN16v_log10f_skx)
.section .rodata, "a"
.align 64
#ifdef __svml_slog10_data_internal_avx512_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(64)) VUINT32 One[16][1];
__declspec(align(64)) VUINT32 coeff4[16][1];
__declspec(align(64)) VUINT32 coeff3[16][1];
__declspec(align(64)) VUINT32 coeff2[16][1];
__declspec(align(64)) VUINT32 coeff1[16][1];
__declspec(align(64)) VUINT32 L2[16][1];
} __svml_slog10_data_internal_avx512;
#endif
__svml_slog10_data_internal_avx512:
/*== One ==*/
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
// c4
.align 64
.long 0xbdc9ae9b, 0xbda6fcf4
.long 0xbd8bac76, 0xbd6bca30
.long 0xbd48a99b, 0xbd2c0a9f
.long 0xbd1480db, 0xbd00faf2
.long 0xbe823aa9, 0xbe656348
.long 0xbe4afbb9, 0xbe346895
.long 0xbe20ffff, 0xbe103a0b
.long 0xbe01a91c, 0xbde9e84e
// c3
.align 64
.long 0x3e13d888, 0x3e10a87c
.long 0x3e0b95c3, 0x3e057f0b
.long 0x3dfde038, 0x3df080d9
.long 0x3de34c1e, 0x3dd68333
.long 0x3dac6e8e, 0x3dd54a51
.long 0x3df30f40, 0x3e04235d
.long 0x3e0b7033, 0x3e102c90
.long 0x3e12ebad, 0x3e141ff8
// c2
.align 64
.long 0xbe5e5a9b, 0xbe5e2677
.long 0xbe5d83f5, 0xbe5c6016
.long 0xbe5abd0b, 0xbe58a6fd
.long 0xbe562e02, 0xbe5362f8
.long 0xbe68e27c, 0xbe646747
.long 0xbe619a73, 0xbe5ff05a
.long 0xbe5f0570, 0xbe5e92d0
.long 0xbe5e662b, 0xbe5e5c08
// c1
.align 64
.long 0x3ede5bd8, 0x3ede5b45
.long 0x3ede57d8, 0x3ede4eb1
.long 0x3ede3d37, 0x3ede2166
.long 0x3eddf9d9, 0x3eddc5bb
.long 0x3ede08ed, 0x3ede32e7
.long 0x3ede4967, 0x3ede5490
.long 0x3ede597f, 0x3ede5b50
.long 0x3ede5bca, 0x3ede5bd9
/*== L2 ==*/
.align 64
.long 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b
.align 64
.type __svml_slog10_data_internal_avx512,@object
.size __svml_slog10_data_internal_avx512,.-__svml_slog10_data_internal_avx512

View File

@ -0,0 +1,20 @@
/* SSE2 version of vectorized log10f, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVbN4v_log10f _ZGVbN4v_log10f_sse2
#include "../svml_s_log10f4_core.S"

View File

@ -0,0 +1,28 @@
/* Multiple versions of vectorized log10f, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVbN4v_log10f
#include "ifunc-mathvec-sse4_1.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVbN4v_log10f, __GI__ZGVbN4v_log10f,
__redirect__ZGVbN4v_log10f)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,243 @@
/* Function log10f vectorized with SSE4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log10(x) = k*log10(2.0) - log10(Rcp) + poly_approximation(R)
* log10(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_slog10_data_internal
*/
#define MinNorm 0
#define MaxNorm 16
#define L2H 32
#define L2L 48
#define iBrkValue 64
#define iOffExpoMask 80
#define One 96
#define sPoly 112
#define L2 256
#include <sysdep.h>
.text
.section .text.sse4,"ax",@progbits
ENTRY(_ZGVbN4v_log10f_sse4)
subq $72, %rsp
cfi_def_cfa_offset(80)
movaps %xmm0, %xmm1
/* reduction: compute r,n */
movdqu iBrkValue+__svml_slog10_data_internal(%rip), %xmm2
movaps %xmm0, %xmm4
movdqu iOffExpoMask+__svml_slog10_data_internal(%rip), %xmm10
psubd %xmm2, %xmm1
pand %xmm1, %xmm10
psrad $23, %xmm1
paddd %xmm2, %xmm10
movaps %xmm0, %xmm3
movups sPoly+__svml_slog10_data_internal(%rip), %xmm5
movups sPoly+32+__svml_slog10_data_internal(%rip), %xmm6
movups sPoly+64+__svml_slog10_data_internal(%rip), %xmm7
movups sPoly+96+__svml_slog10_data_internal(%rip), %xmm9
cvtdq2ps %xmm1, %xmm12
cmpltps MinNorm+__svml_slog10_data_internal(%rip), %xmm4
cmpnleps MaxNorm+__svml_slog10_data_internal(%rip), %xmm3
subps One+__svml_slog10_data_internal(%rip), %xmm10
mulps %xmm10, %xmm5
movaps %xmm10, %xmm8
mulps %xmm10, %xmm6
mulps %xmm10, %xmm8
addps sPoly+16+__svml_slog10_data_internal(%rip), %xmm5
mulps %xmm10, %xmm7
addps sPoly+48+__svml_slog10_data_internal(%rip), %xmm6
mulps %xmm10, %xmm9
mulps %xmm8, %xmm5
addps sPoly+80+__svml_slog10_data_internal(%rip), %xmm7
addps sPoly+112+__svml_slog10_data_internal(%rip), %xmm9
addps %xmm5, %xmm6
mulps %xmm8, %xmm6
orps %xmm3, %xmm4
/* combine and get argument value range mask */
movmskps %xmm4, %edx
movups L2L+__svml_slog10_data_internal(%rip), %xmm1
addps %xmm6, %xmm7
mulps %xmm12, %xmm1
mulps %xmm7, %xmm8
movups L2H+__svml_slog10_data_internal(%rip), %xmm11
addps %xmm8, %xmm9
mulps %xmm11, %xmm12
mulps %xmm10, %xmm9
addps sPoly+128+__svml_slog10_data_internal(%rip), %xmm9
mulps %xmm9, %xmm10
addps %xmm10, %xmm1
addps %xmm12, %xmm1
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm1
/* Restore registers
* and exit the function
*/
L(EXIT):
movaps %xmm1, %xmm0
addq $72, %rsp
cfi_def_cfa_offset(8)
ret
cfi_def_cfa_offset(80)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
movups %xmm0, 32(%rsp)
movups %xmm1, 48(%rsp)
# LOE rbx rbp r12 r13 r14 r15 edx
xorl %eax, %eax
movq %r12, 16(%rsp)
cfi_offset(12, -64)
movl %eax, %r12d
movq %r13, 8(%rsp)
cfi_offset(13, -72)
movl %edx, %r13d
movq %r14, (%rsp)
cfi_offset(14, -80)
# LOE rbx rbp r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx rbp r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $4, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx rbp r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
movups 48(%rsp), %xmm1
/* Go to exit */
jmp L(EXIT)
cfi_offset(12, -64)
cfi_offset(13, -72)
cfi_offset(14, -80)
# LOE rbx rbp r12 r13 r14 r15 xmm1
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp,%r14,4), %xmm0
call log10f@PLT
# LOE rbx rbp r14 r15 r12d r13d xmm0
movss %xmm0, 48(%rsp,%r14,4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx rbp r15 r12d r13d
END(_ZGVbN4v_log10f_sse4)
.section .rodata, "a"
.align 16
#ifdef __svml_slog10_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(16)) VUINT32 MinNorm[4][1];
__declspec(align(16)) VUINT32 MaxNorm[4][1];
__declspec(align(16)) VUINT32 L2H[4][1];
__declspec(align(16)) VUINT32 L2L[4][1];
__declspec(align(16)) VUINT32 iBrkValue[4][1];
__declspec(align(16)) VUINT32 iOffExpoMask[4][1];
__declspec(align(16)) VUINT32 One[4][1];
__declspec(align(16)) VUINT32 sPoly[9][4][1];
__declspec(align(16)) VUINT32 L2[4][1];
} __svml_slog10_data_internal;
#endif
__svml_slog10_data_internal:
/*== MinNorm ==*/
.long 0x00800000, 0x00800000, 0x00800000, 0x00800000
/*== MaxNorm ==*/
.align 16
.long 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
/*== L2H ==*/
.align 16
.long 0x3e9a2100, 0x3e9a2100, 0x3e9a2100, 0x3e9a2100
/*== L2L ==*/
.align 16
.long 0xb64AF600, 0xb64AF600, 0xb64AF600, 0xb64AF600
/*== iBrkValue = SP 2/3 ==*/
.align 16
.long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
/*== iOffExpoMask = SP significand mask ==*/
.align 16
.long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
/*== sOne = SP 1.0 ==*/
.align 16
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
/*== spoly[9] ==*/
.align 16
.long 0x3d8063B4, 0x3d8063B4, 0x3d8063B4, 0x3d8063B4 /* coeff9 */
.long 0xbd890073, 0xbd890073, 0xbd890073, 0xbd890073 /* coeff8 */
.long 0x3d775317, 0x3d775317, 0x3d775317, 0x3d775317 /* coeff7 */
.long 0xbd91FB27, 0xbd91FB27, 0xbd91FB27, 0xbd91FB27 /* coeff6 */
.long 0x3dB20B96, 0x3dB20B96, 0x3dB20B96, 0x3dB20B96 /* coeff5 */
.long 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20 /* coeff4 */
.long 0x3e143CE5, 0x3e143CE5, 0x3e143CE5, 0x3e143CE5 /* coeff3 */
.long 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5 /* coeff2 */
.long 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9 /* coeff1 */
/*== L2 ==*/
.align 16
.long 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b
.align 16
.type __svml_slog10_data_internal,@object
.size __svml_slog10_data_internal,.-__svml_slog10_data_internal

View File

@ -0,0 +1,20 @@
/* SSE version of vectorized log10f, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVdN8v_log10f _ZGVdN8v_log10f_sse_wrapper
#include "../svml_s_log10f8_core.S"

View File

@ -0,0 +1,28 @@
/* Multiple versions of vectorized log10f, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVdN8v_log10f
#include "ifunc-mathvec-avx2.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVdN8v_log10f, __GI__ZGVdN8v_log10f,
__redirect__ZGVdN8v_log10f)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,243 @@
/* Function log10f vectorized with AVX2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log10(x) = k*log10(2.0) - log10(Rcp) + poly_approximation(R)
* log10(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_slog10_data_internal
*/
#define MinNorm 0
#define MaxNorm 32
#define L2H 64
#define L2L 96
#define iBrkValue 128
#define iOffExpoMask 160
#define One 192
#define sPoly 224
#define L2 512
#include <sysdep.h>
.text
.section .text.avx2,"ax",@progbits
ENTRY(_ZGVdN8v_log10f_avx2)
pushq %rbp
cfi_def_cfa_offset(16)
movq %rsp, %rbp
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
andq $-32, %rsp
subq $96, %rsp
/* reduction: compute r,n */
vmovups iBrkValue+__svml_slog10_data_internal(%rip), %ymm4
vmovups sPoly+__svml_slog10_data_internal(%rip), %ymm15
vmovups sPoly+64+__svml_slog10_data_internal(%rip), %ymm9
vmovups sPoly+128+__svml_slog10_data_internal(%rip), %ymm10
vmovups sPoly+192+__svml_slog10_data_internal(%rip), %ymm12
vpsubd %ymm4, %ymm0, %ymm1
vcmplt_oqps MinNorm+__svml_slog10_data_internal(%rip), %ymm0, %ymm5
vcmpnle_uqps MaxNorm+__svml_slog10_data_internal(%rip), %ymm0, %ymm6
vpand iOffExpoMask+__svml_slog10_data_internal(%rip), %ymm1, %ymm3
vpsrad $23, %ymm1, %ymm2
vpaddd %ymm4, %ymm3, %ymm8
vcvtdq2ps %ymm2, %ymm1
vsubps One+__svml_slog10_data_internal(%rip), %ymm8, %ymm13
vmulps L2L+__svml_slog10_data_internal(%rip), %ymm1, %ymm14
vfmadd213ps sPoly+32+__svml_slog10_data_internal(%rip), %ymm13, %ymm15
vfmadd213ps sPoly+96+__svml_slog10_data_internal(%rip), %ymm13, %ymm9
vmulps %ymm13, %ymm13, %ymm11
vfmadd213ps sPoly+160+__svml_slog10_data_internal(%rip), %ymm13, %ymm10
vfmadd213ps sPoly+224+__svml_slog10_data_internal(%rip), %ymm13, %ymm12
vfmadd213ps %ymm9, %ymm11, %ymm15
vfmadd213ps %ymm10, %ymm11, %ymm15
vfmadd213ps %ymm12, %ymm11, %ymm15
vfmadd213ps sPoly+256+__svml_slog10_data_internal(%rip), %ymm13, %ymm15
vfmadd213ps %ymm14, %ymm13, %ymm15
vorps %ymm6, %ymm5, %ymm7
/* combine and get argument value range mask */
vmovmskps %ymm7, %edx
vfmadd132ps L2H+__svml_slog10_data_internal(%rip), %ymm15, %ymm1
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r12 r13 r14 r15 edx ymm0 ymm1
/* Restore registers
* and exit the function
*/
L(EXIT):
vmovaps %ymm1, %ymm0
movq %rbp, %rsp
popq %rbp
cfi_def_cfa(7, 8)
cfi_restore(6)
ret
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
vmovups %ymm0, 32(%rsp)
vmovups %ymm1, 64(%rsp)
# LOE rbx r12 r13 r14 r15 edx ymm1
xorl %eax, %eax
# LOE rbx r12 r13 r14 r15 eax edx
vzeroupper
movq %r12, 16(%rsp)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
movl %eax, %r12d
movq %r13, 8(%rsp)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
movl %edx, %r13d
movq %r14, (%rsp)
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $8, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
vmovups 64(%rsp), %ymm1
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
# LOE rbx r12 r13 r14 r15 ymm1
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp,%r14,4), %xmm0
call log10f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp,%r14,4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVdN8v_log10f_avx2)
.section .rodata, "a"
.align 32
#ifdef __svml_slog10_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(32)) VUINT32 MinNorm[8][1];
__declspec(align(32)) VUINT32 MaxNorm[8][1];
__declspec(align(32)) VUINT32 L2H[8][1];
__declspec(align(32)) VUINT32 L2L[8][1];
__declspec(align(32)) VUINT32 iBrkValue[8][1];
__declspec(align(32)) VUINT32 iOffExpoMask[8][1];
__declspec(align(32)) VUINT32 One[8][1];
__declspec(align(32)) VUINT32 sPoly[9][8][1];
__declspec(align(32)) VUINT32 L2[8][1];
} __svml_slog10_data_internal;
#endif
__svml_slog10_data_internal:
/*== MinNorm ==*/
.long 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000
/*== MaxNorm ==*/
.align 32
.long 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
/*== L2H ==*/
.align 32
.long 0x3e9a2100, 0x3e9a2100, 0x3e9a2100, 0x3e9a2100, 0x3e9a2100, 0x3e9a2100, 0x3e9a2100, 0x3e9a2100
/*== L2L ==*/
.align 32
.long 0xb64AF600, 0xb64AF600, 0xb64AF600, 0xb64AF600, 0xb64AF600, 0xb64AF600, 0xb64AF600, 0xb64AF600
/*== iBrkValue = SP 2/3 ==*/
.align 32
.long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
/*== iOffExpoMask = SP significand mask ==*/
.align 32
.long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
/*== sOne = SP 1.0 ==*/
.align 32
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
/*== spoly[9] ==*/
.align 32
.long 0x3d8063B4, 0x3d8063B4, 0x3d8063B4, 0x3d8063B4, 0x3d8063B4, 0x3d8063B4, 0x3d8063B4, 0x3d8063B4 /* coeff9 */
.long 0xbd890073, 0xbd890073, 0xbd890073, 0xbd890073, 0xbd890073, 0xbd890073, 0xbd890073, 0xbd890073 /* coeff8 */
.long 0x3d775317, 0x3d775317, 0x3d775317, 0x3d775317, 0x3d775317, 0x3d775317, 0x3d775317, 0x3d775317 /* coeff7 */
.long 0xbd91FB27, 0xbd91FB27, 0xbd91FB27, 0xbd91FB27, 0xbd91FB27, 0xbd91FB27, 0xbd91FB27, 0xbd91FB27 /* coeff6 */
.long 0x3dB20B96, 0x3dB20B96, 0x3dB20B96, 0x3dB20B96, 0x3dB20B96, 0x3dB20B96, 0x3dB20B96, 0x3dB20B96 /* coeff5 */
.long 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20, 0xbdDE6E20 /* coeff4 */
.long 0x3e143CE5, 0x3e143CE5, 0x3e143CE5, 0x3e143CE5, 0x3e143CE5, 0x3e143CE5, 0x3e143CE5, 0x3e143CE5 /* coeff3 */
.long 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5, 0xbe5E5BC5 /* coeff2 */
.long 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9, 0x3eDE5BD9 /* coeff1 */
/*== L2 ==*/
.align 32
.long 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b, 0x3e9a209b
.align 32
.type __svml_slog10_data_internal,@object
.size __svml_slog10_data_internal,.-__svml_slog10_data_internal

View File

@ -0,0 +1,29 @@
/* Function log10 vectorized with SSE2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVbN2v_log10)
WRAPPER_IMPL_SSE2 log10
END (_ZGVbN2v_log10)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVbN2v_log10)
#endif

View File

@ -0,0 +1,29 @@
/* Function log10 vectorized with AVX2, wrapper version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVdN4v_log10)
WRAPPER_IMPL_AVX _ZGVbN2v_log10
END (_ZGVdN4v_log10)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVdN4v_log10)
#endif

View File

@ -0,0 +1,25 @@
/* Function log10 vectorized in AVX ISA as wrapper to SSE4 ISA version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVcN4v_log10)
WRAPPER_IMPL_AVX _ZGVbN2v_log10
END (_ZGVcN4v_log10)

View File

@ -0,0 +1,25 @@
/* Function log10 vectorized with AVX-512, wrapper to AVX2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVeN8v_log10)
WRAPPER_IMPL_AVX512 _ZGVdN4v_log10
END (_ZGVeN8v_log10)

View File

@ -0,0 +1,25 @@
/* Function log10f vectorized with AVX-512. Wrapper to AVX2 version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVeN16v_log10f)
WRAPPER_IMPL_AVX512 _ZGVdN8v_log10f
END (_ZGVeN16v_log10f)

View File

@ -0,0 +1,29 @@
/* Function log10f vectorized with SSE2, wrapper version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVbN4v_log10f)
WRAPPER_IMPL_SSE2 log10f
END (_ZGVbN4v_log10f)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVbN4v_log10f)
#endif

View File

@ -0,0 +1,29 @@
/* Function log10f vectorized with AVX2, wrapper version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVdN8v_log10f)
WRAPPER_IMPL_AVX _ZGVbN4v_log10f
END (_ZGVdN8v_log10f)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVdN8v_log10f)
#endif

View File

@ -0,0 +1,25 @@
/* Function log10f vectorized in AVX ISA as wrapper to SSE4 ISA version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVcN8v_log10f)
WRAPPER_IMPL_AVX _ZGVbN4v_log10f
END (_ZGVcN8v_log10f)

View File

@ -0,0 +1 @@
#include "test-double-libmvec-log10.c"

View File

@ -0,0 +1 @@
#include "test-double-libmvec-log10.c"

View File

@ -0,0 +1 @@
#include "test-double-libmvec-log10.c"

View File

@ -0,0 +1,3 @@
#define LIBMVEC_TYPE double
#define LIBMVEC_FUNC log10
#include "test-vector-abi-arg1.h"

View File

@ -38,6 +38,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVbN2v_expm1)
VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVbN2v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVbN2v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10)
#define VEC_INT_TYPE __m128i

View File

@ -41,6 +41,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVdN4v_expm1)
VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVdN4v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVdN4v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10)
#ifndef __ILP32__
# define VEC_INT_TYPE __m256i

View File

@ -38,6 +38,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVcN4v_expm1)
VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVcN4v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVcN4v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10)
#define VEC_INT_TYPE __m128i

View File

@ -38,6 +38,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1), _ZGVeN8v_expm1)
VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVeN8v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVeN8v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10)
#ifndef __ILP32__
# define VEC_INT_TYPE __m512i

View File

@ -0,0 +1 @@
#include "test-float-libmvec-log10f.c"

View File

@ -0,0 +1 @@
#include "test-float-libmvec-log10f.c"

View File

@ -0,0 +1 @@
#include "test-float-libmvec-log10f.c"

View File

@ -0,0 +1,3 @@
#define LIBMVEC_TYPE float
#define LIBMVEC_FUNC log10f
#include "test-vector-abi-arg1.h"

View File

@ -38,6 +38,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVeN16v_expm1f)
VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVeN16v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVeN16v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f)
#define VEC_INT_TYPE __m512i

View File

@ -38,6 +38,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVbN4v_expm1f)
VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVbN4v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVbN4v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f)
#define VEC_INT_TYPE __m128i

View File

@ -41,6 +41,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVdN8v_expm1f)
VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVdN8v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVdN8v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f)
/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
#undef VECTOR_WRAPPER_fFF

View File

@ -38,6 +38,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (expm1f), _ZGVcN8v_expm1f)
VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVcN8v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVcN8v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f)
#define VEC_INT_TYPE __m128i