x86-64: Add vector log2/log2f implementation to libmvec

Implement vectorized log2/log2f containing SSE, AVX, AVX2 and
AVX512 versions for libmvec as per vector ABI.  It also contains
accuracy and ABI tests for vector log2/log2f with regenerated ulps.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
Sunil K Pandey 2021-12-29 09:29:44 -08:00
parent 8f8566026d
commit 7e1722fec8
50 changed files with 4208 additions and 1 deletions

View File

@ -230,4 +230,15 @@
#define __DECL_SIMD_log10f32x
#define __DECL_SIMD_log10f64x
#define __DECL_SIMD_log10f128x
#define __DECL_SIMD_log2
#define __DECL_SIMD_log2f
#define __DECL_SIMD_log2l
#define __DECL_SIMD_log2f16
#define __DECL_SIMD_log2f32
#define __DECL_SIMD_log2f64
#define __DECL_SIMD_log2f128
#define __DECL_SIMD_log2f32x
#define __DECL_SIMD_log2f64x
#define __DECL_SIMD_log2f128x
#endif

View File

@ -130,7 +130,7 @@ __MATHCALL (logb,, (_Mdouble_ __x));
__MATHCALL_VEC (exp2,, (_Mdouble_ __x));
/* Compute base-2 logarithm of X. */
__MATHCALL (log2,, (_Mdouble_ __x));
__MATHCALL_VEC (log2,, (_Mdouble_ __x));
#endif

View File

@ -55,6 +55,7 @@ GLIBC_2.35 _ZGVbN2v_exp10 F
GLIBC_2.35 _ZGVbN2v_exp2 F
GLIBC_2.35 _ZGVbN2v_expm1 F
GLIBC_2.35 _ZGVbN2v_log10 F
GLIBC_2.35 _ZGVbN2v_log2 F
GLIBC_2.35 _ZGVbN2v_sinh F
GLIBC_2.35 _ZGVbN2vv_atan2 F
GLIBC_2.35 _ZGVbN2vv_hypot F
@ -67,6 +68,7 @@ GLIBC_2.35 _ZGVbN4v_exp10f F
GLIBC_2.35 _ZGVbN4v_exp2f F
GLIBC_2.35 _ZGVbN4v_expm1f F
GLIBC_2.35 _ZGVbN4v_log10f F
GLIBC_2.35 _ZGVbN4v_log2f F
GLIBC_2.35 _ZGVbN4v_sinhf F
GLIBC_2.35 _ZGVbN4vv_atan2f F
GLIBC_2.35 _ZGVbN4vv_hypotf F
@ -79,6 +81,7 @@ GLIBC_2.35 _ZGVcN4v_exp10 F
GLIBC_2.35 _ZGVcN4v_exp2 F
GLIBC_2.35 _ZGVcN4v_expm1 F
GLIBC_2.35 _ZGVcN4v_log10 F
GLIBC_2.35 _ZGVcN4v_log2 F
GLIBC_2.35 _ZGVcN4v_sinh F
GLIBC_2.35 _ZGVcN4vv_atan2 F
GLIBC_2.35 _ZGVcN4vv_hypot F
@ -91,6 +94,7 @@ GLIBC_2.35 _ZGVcN8v_exp10f F
GLIBC_2.35 _ZGVcN8v_exp2f F
GLIBC_2.35 _ZGVcN8v_expm1f F
GLIBC_2.35 _ZGVcN8v_log10f F
GLIBC_2.35 _ZGVcN8v_log2f F
GLIBC_2.35 _ZGVcN8v_sinhf F
GLIBC_2.35 _ZGVcN8vv_atan2f F
GLIBC_2.35 _ZGVcN8vv_hypotf F
@ -103,6 +107,7 @@ GLIBC_2.35 _ZGVdN4v_exp10 F
GLIBC_2.35 _ZGVdN4v_exp2 F
GLIBC_2.35 _ZGVdN4v_expm1 F
GLIBC_2.35 _ZGVdN4v_log10 F
GLIBC_2.35 _ZGVdN4v_log2 F
GLIBC_2.35 _ZGVdN4v_sinh F
GLIBC_2.35 _ZGVdN4vv_atan2 F
GLIBC_2.35 _ZGVdN4vv_hypot F
@ -115,6 +120,7 @@ GLIBC_2.35 _ZGVdN8v_exp10f F
GLIBC_2.35 _ZGVdN8v_exp2f F
GLIBC_2.35 _ZGVdN8v_expm1f F
GLIBC_2.35 _ZGVdN8v_log10f F
GLIBC_2.35 _ZGVdN8v_log2f F
GLIBC_2.35 _ZGVdN8v_sinhf F
GLIBC_2.35 _ZGVdN8vv_atan2f F
GLIBC_2.35 _ZGVdN8vv_hypotf F
@ -127,6 +133,7 @@ GLIBC_2.35 _ZGVeN16v_exp10f F
GLIBC_2.35 _ZGVeN16v_exp2f F
GLIBC_2.35 _ZGVeN16v_expm1f F
GLIBC_2.35 _ZGVeN16v_log10f F
GLIBC_2.35 _ZGVeN16v_log2f F
GLIBC_2.35 _ZGVeN16v_sinhf F
GLIBC_2.35 _ZGVeN16vv_atan2f F
GLIBC_2.35 _ZGVeN16vv_hypotf F
@ -139,6 +146,7 @@ GLIBC_2.35 _ZGVeN8v_exp10 F
GLIBC_2.35 _ZGVeN8v_exp2 F
GLIBC_2.35 _ZGVeN8v_expm1 F
GLIBC_2.35 _ZGVeN8v_log10 F
GLIBC_2.35 _ZGVeN8v_log2 F
GLIBC_2.35 _ZGVeN8v_sinh F
GLIBC_2.35 _ZGVeN8vv_atan2 F
GLIBC_2.35 _ZGVeN8vv_hypot F

View File

@ -106,6 +106,10 @@
# define __DECL_SIMD_log10 __DECL_SIMD_x86_64
# undef __DECL_SIMD_log10f
# define __DECL_SIMD_log10f __DECL_SIMD_x86_64
# undef __DECL_SIMD_log2
# define __DECL_SIMD_log2 __DECL_SIMD_x86_64
# undef __DECL_SIMD_log2f
# define __DECL_SIMD_log2f __DECL_SIMD_x86_64
# endif
#endif

View File

@ -52,6 +52,8 @@
!GCC$ builtin (atan2f) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log10) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log10f) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log2) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (log2f) attributes simd (notinbranch) if('x86_64')
!GCC$ builtin (cos) attributes simd (notinbranch) if('x32')
!GCC$ builtin (cosf) attributes simd (notinbranch) if('x32')
@ -89,3 +91,5 @@
!GCC$ builtin (atan2f) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log10) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log10f) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log2) attributes simd (notinbranch) if('x32')
!GCC$ builtin (log2f) attributes simd (notinbranch) if('x32')

View File

@ -36,6 +36,7 @@ libmvec-funcs = \
hypot \
log \
log10 \
log2 \
pow \
sin \
sincos \

View File

@ -23,6 +23,7 @@ libmvec {
_ZGVbN2v_exp2; _ZGVcN4v_exp2; _ZGVdN4v_exp2; _ZGVeN8v_exp2;
_ZGVbN2v_expm1; _ZGVcN4v_expm1; _ZGVdN4v_expm1; _ZGVeN8v_expm1;
_ZGVbN2v_log10; _ZGVcN4v_log10; _ZGVdN4v_log10; _ZGVeN8v_log10;
_ZGVbN2v_log2; _ZGVcN4v_log2; _ZGVdN4v_log2; _ZGVeN8v_log2;
_ZGVbN2v_sinh; _ZGVcN4v_sinh; _ZGVdN4v_sinh; _ZGVeN8v_sinh;
_ZGVbN2vv_atan2; _ZGVcN4vv_atan2; _ZGVdN4vv_atan2; _ZGVeN8vv_atan2;
_ZGVbN2vv_hypot; _ZGVcN4vv_hypot; _ZGVdN4vv_hypot; _ZGVeN8vv_hypot;
@ -35,6 +36,7 @@ libmvec {
_ZGVbN4v_exp2f; _ZGVcN8v_exp2f; _ZGVdN8v_exp2f; _ZGVeN16v_exp2f;
_ZGVbN4v_expm1f; _ZGVcN8v_expm1f; _ZGVdN8v_expm1f; _ZGVeN16v_expm1f;
_ZGVbN4v_log10f; _ZGVcN8v_log10f; _ZGVdN8v_log10f; _ZGVeN16v_log10f;
_ZGVbN4v_log2f; _ZGVcN8v_log2f; _ZGVdN8v_log2f; _ZGVeN16v_log2f;
_ZGVbN4v_sinhf; _ZGVcN8v_sinhf; _ZGVdN8v_sinhf; _ZGVeN16v_sinhf;
_ZGVbN4vv_atan2f; _ZGVcN8vv_atan2f; _ZGVdN8vv_atan2f; _ZGVeN16vv_atan2f;
_ZGVbN4vv_hypotf; _ZGVcN8vv_hypotf; _ZGVdN8vv_hypotf; _ZGVeN16vv_hypotf;

View File

@ -1709,6 +1709,26 @@ float: 3
float128: 1
ldouble: 1
Function: "log2_vlen16":
float: 1
Function: "log2_vlen2":
double: 1
Function: "log2_vlen4":
double: 1
float: 1
Function: "log2_vlen4_avx2":
double: 1
Function: "log2_vlen8":
double: 1
float: 1
Function: "log2_vlen8_avx2":
float: 1
Function: "log_downward":
float: 2
float128: 1

View File

@ -0,0 +1,20 @@
/* SSE2 version of vectorized log2, vector length is 2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVbN2v_log2 _ZGVbN2v_log2_sse2
#include "../svml_d_log22_core.S"

View File

@ -0,0 +1,27 @@
/* Multiple versions of vectorized log2, vector length is 2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVbN2v_log2
#include "ifunc-mathvec-sse4_1.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVbN2v_log2, __GI__ZGVbN2v_log2, __redirect__ZGVbN2v_log2)
__attribute__ ((visibility ("hidden")));
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
/* SSE version of vectorized log2, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVdN4v_log2 _ZGVdN4v_log2_sse_wrapper
#include "../svml_d_log24_core.S"

View File

@ -0,0 +1,27 @@
/* Multiple versions of vectorized log2, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVdN4v_log2
#include "ifunc-mathvec-avx2.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVdN4v_log2, __GI__ZGVdN4v_log2, __redirect__ZGVdN4v_log2)
__attribute__ ((visibility ("hidden")));
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
/* AVX2 version of vectorized log2, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVeN8v_log2 _ZGVeN8v_log2_avx2_wrapper
#include "../svml_d_log28_core.S"

View File

@ -0,0 +1,27 @@
/* Multiple versions of vectorized log2, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVeN8v_log2
#include "ifunc-mathvec-avx512-skx.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVeN8v_log2, __GI__ZGVeN8v_log2, __redirect__ZGVeN8v_log2)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,293 @@
/* Function log2 vectorized with AVX-512.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log2(x) = k - log2(Rcp) + poly_approximation(R)
* log2(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_dlog2_data_internal_avx512
*/
#define Log_tbl 0
#define One 128
#define C075 192
#define poly_coeff9 256
#define poly_coeff8 320
#define poly_coeff7 384
#define poly_coeff6 448
#define poly_coeff5 512
#define poly_coeff4 576
#define poly_coeff3 640
#define poly_coeff2 704
#define poly_coeff1 768
#include <sysdep.h>
.text
.section .text.evex512,"ax",@progbits
ENTRY(_ZGVeN8v_log2_skx)
pushq %rbp
cfi_def_cfa_offset(16)
movq %rsp, %rbp
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
andq $-64, %rsp
subq $192, %rsp
vmovaps %zmm0, %zmm7
vgetmantpd $8, {sae}, %zmm7, %zmm6
vmovups One+__svml_dlog2_data_internal_avx512(%rip), %zmm2
vmovups poly_coeff5+__svml_dlog2_data_internal_avx512(%rip), %zmm12
vmovups poly_coeff3+__svml_dlog2_data_internal_avx512(%rip), %zmm13
/* Start polynomial evaluation */
vmovups poly_coeff9+__svml_dlog2_data_internal_avx512(%rip), %zmm10
vmovups poly_coeff8+__svml_dlog2_data_internal_avx512(%rip), %zmm0
vmovups poly_coeff7+__svml_dlog2_data_internal_avx512(%rip), %zmm11
vmovups poly_coeff6+__svml_dlog2_data_internal_avx512(%rip), %zmm14
/* Prepare exponent correction: DblRcp<0.75? */
vmovups C075+__svml_dlog2_data_internal_avx512(%rip), %zmm1
/* Table lookup */
vmovups __svml_dlog2_data_internal_avx512(%rip), %zmm4
/* GetExp(x) */
vgetexppd {sae}, %zmm7, %zmm5
/* DblRcp ~ 1/Mantissa */
vrcp14pd %zmm6, %zmm8
/* x<=0? */
vfpclasspd $94, %zmm7, %k0
/* round DblRcp to 4 fractional bits (RN mode, no Precision exception) */
vrndscalepd $88, {sae}, %zmm8, %zmm3
vmovups poly_coeff4+__svml_dlog2_data_internal_avx512(%rip), %zmm8
kmovw %k0, %edx
/* Reduced argument: R = DblRcp*Mantissa - 1 */
vfmsub213pd {rn-sae}, %zmm2, %zmm3, %zmm6
vcmppd $17, {sae}, %zmm1, %zmm3, %k1
vfmadd231pd {rn-sae}, %zmm6, %zmm12, %zmm8
vmovups poly_coeff2+__svml_dlog2_data_internal_avx512(%rip), %zmm12
vfmadd231pd {rn-sae}, %zmm6, %zmm10, %zmm0
vfmadd231pd {rn-sae}, %zmm6, %zmm11, %zmm14
vmovups poly_coeff1+__svml_dlog2_data_internal_avx512(%rip), %zmm1
/* R^2 */
vmulpd {rn-sae}, %zmm6, %zmm6, %zmm15
vfmadd231pd {rn-sae}, %zmm6, %zmm13, %zmm12
/* Prepare table index */
vpsrlq $48, %zmm3, %zmm9
/* add 1 to Expon if DblRcp<0.75 */
vaddpd {rn-sae}, %zmm2, %zmm5, %zmm5{%k1}
vmulpd {rn-sae}, %zmm15, %zmm15, %zmm13
vfmadd213pd {rn-sae}, %zmm14, %zmm15, %zmm0
vfmadd213pd {rn-sae}, %zmm12, %zmm15, %zmm8
vpermt2pd Log_tbl+64+__svml_dlog2_data_internal_avx512(%rip), %zmm9, %zmm4
/* polynomial */
vfmadd213pd {rn-sae}, %zmm8, %zmm13, %zmm0
vfmadd213pd {rn-sae}, %zmm1, %zmm6, %zmm0
vfmadd213pd {rn-sae}, %zmm4, %zmm0, %zmm6
vaddpd {rn-sae}, %zmm6, %zmm5, %zmm0
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r12 r13 r14 r15 edx zmm0 zmm7
/* Restore registers
* and exit the function
*/
L(EXIT):
movq %rbp, %rsp
popq %rbp
cfi_def_cfa(7, 8)
cfi_restore(6)
ret
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
vmovups %zmm7, 64(%rsp)
vmovups %zmm0, 128(%rsp)
# LOE rbx r12 r13 r14 r15 edx zmm0
xorl %eax, %eax
# LOE rbx r12 r13 r14 r15 eax edx
vzeroupper
movq %r12, 16(%rsp)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
movl %eax, %r12d
movq %r13, 8(%rsp)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
movl %edx, %r13d
movq %r14, (%rsp)
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $8, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
vmovups 128(%rsp), %zmm0
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r12 r13 r14 r15 zmm0
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movsd 64(%rsp,%r14,8), %xmm0
call log2@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movsd %xmm0, 128(%rsp,%r14,8)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVeN8v_log2_skx)
.section .rodata, "a"
.align 64
#ifdef __svml_dlog2_data_internal_avx512_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(64)) VUINT32 Log_tbl[16][2];
__declspec(align(64)) VUINT32 One[8][2];
__declspec(align(64)) VUINT32 C075[8][2];
__declspec(align(64)) VUINT32 poly_coeff9[8][2];
__declspec(align(64)) VUINT32 poly_coeff8[8][2];
__declspec(align(64)) VUINT32 poly_coeff7[8][2];
__declspec(align(64)) VUINT32 poly_coeff6[8][2];
__declspec(align(64)) VUINT32 poly_coeff5[8][2];
__declspec(align(64)) VUINT32 poly_coeff4[8][2];
__declspec(align(64)) VUINT32 poly_coeff3[8][2];
__declspec(align(64)) VUINT32 poly_coeff2[8][2];
__declspec(align(64)) VUINT32 poly_coeff1[8][2];
} __svml_dlog2_data_internal_avx512;
#endif
__svml_dlog2_data_internal_avx512:
/*== Log_tbl ==*/
.quad 0x0000000000000000
.quad 0xbfb663f6fac91316
.quad 0xbfc5c01a39fbd688
.quad 0xbfcfbc16b902680a
.quad 0xbfd49a784bcd1b8b
.quad 0xbfd91bba891f1709
.quad 0xbfdd6753e032ea0f
.quad 0xbfe0c10500d63aa6
.quad 0x3fda8ff971810a5e
.quad 0x3fd6cb0f6865c8ea
.quad 0x3fd32bfee370ee68
.quad 0x3fcf5fd8a9063e35
.quad 0x3fc8a8980abfbd32
.quad 0x3fc22dadc2ab3497
.quad 0x3fb7d60496cfbb4c
.quad 0x3fa77394c9d958d5
/*== One ==*/
.align 64
.quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
/*== C075 0.75 ==*/
.align 64
.quad 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000, 0x3fe8000000000000
/*== poly_coeff9 ==*/
.align 64
.quad 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12, 0x3fc4904bda0e1d12
/*== poly_coeff8 ==*/
.align 64
.quad 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce, 0xbfc71fb84deb5cce
/*== poly_coeff7 ==*/
.align 64
.quad 0x3fca617351818613, 0x3fca617351818613, 0x3fca617351818613, 0x3fca617351818613, 0x3fca617351818613, 0x3fca617351818613, 0x3fca617351818613, 0x3fca617351818613
/*== poly_coeff6 ==*/
.align 64
.quad 0xbfcec707e4e3144c, 0xbfcec707e4e3144c, 0xbfcec707e4e3144c, 0xbfcec707e4e3144c, 0xbfcec707e4e3144c, 0xbfcec707e4e3144c, 0xbfcec707e4e3144c, 0xbfcec707e4e3144c
/*== poly_coeff5 ==*/
.align 64
.quad 0x3fd2776c5114d91a, 0x3fd2776c5114d91a, 0x3fd2776c5114d91a, 0x3fd2776c5114d91a, 0x3fd2776c5114d91a, 0x3fd2776c5114d91a, 0x3fd2776c5114d91a, 0x3fd2776c5114d91a
/*== poly_coeff4 ==*/
.align 64
.quad 0xbfd71547653d0f8d, 0xbfd71547653d0f8d, 0xbfd71547653d0f8d, 0xbfd71547653d0f8d, 0xbfd71547653d0f8d, 0xbfd71547653d0f8d, 0xbfd71547653d0f8d, 0xbfd71547653d0f8d
/*== poly_coeff3 ==*/
.align 64
.quad 0x3fdec709dc3a029f, 0x3fdec709dc3a029f, 0x3fdec709dc3a029f, 0x3fdec709dc3a029f, 0x3fdec709dc3a029f, 0x3fdec709dc3a029f, 0x3fdec709dc3a029f, 0x3fdec709dc3a029f
/*== poly_coeff2 ==*/
.align 64
.quad 0xbfe71547652b82d4, 0xbfe71547652b82d4, 0xbfe71547652b82d4, 0xbfe71547652b82d4, 0xbfe71547652b82d4, 0xbfe71547652b82d4, 0xbfe71547652b82d4, 0xbfe71547652b82d4
/*== poly_coeff1 ==*/
.align 64
.quad 0x3ff71547652b82fe, 0x3ff71547652b82fe, 0x3ff71547652b82fe, 0x3ff71547652b82fe, 0x3ff71547652b82fe, 0x3ff71547652b82fe, 0x3ff71547652b82fe, 0x3ff71547652b82fe
.align 64
.type __svml_dlog2_data_internal_avx512,@object
.size __svml_dlog2_data_internal_avx512,.-__svml_dlog2_data_internal_avx512

View File

@ -0,0 +1,20 @@
/* AVX2 version of vectorized log2f.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVeN16v_log2f _ZGVeN16v_log2f_avx2_wrapper
#include "../svml_s_log2f16_core.S"

View File

@ -0,0 +1,28 @@
/* Multiple versions of vectorized log2f, vector length is 16.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVeN16v_log2f
#include "ifunc-mathvec-avx512-skx.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVeN16v_log2f, __GI__ZGVeN16v_log2f,
__redirect__ZGVeN16v_log2f)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,231 @@
/* Function log2f vectorized with AVX-512.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log2(x) = k - log2(Rcp) + poly_approximation(R)
* log2(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_slog2_data_internal_avx512
*/
#define One 0
#define coeff4 64
#define coeff3 128
#define coeff2 192
#define coeff1 256
#include <sysdep.h>
.text
.section .text.exex512,"ax",@progbits
ENTRY(_ZGVeN16v_log2f_skx)
pushq %rbp
cfi_def_cfa_offset(16)
movq %rsp, %rbp
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
andq $-64, %rsp
subq $192, %rsp
vgetmantps $11, {sae}, %zmm0, %zmm3
vmovups __svml_slog2_data_internal_avx512(%rip), %zmm1
vgetexpps {sae}, %zmm0, %zmm5
/* x<=0? */
vfpclassps $94, %zmm0, %k0
vsubps {rn-sae}, %zmm1, %zmm3, %zmm9
vpsrld $19, %zmm3, %zmm7
vgetexpps {sae}, %zmm3, %zmm6
vpermps coeff4+__svml_slog2_data_internal_avx512(%rip), %zmm7, %zmm1
vpermps coeff3+__svml_slog2_data_internal_avx512(%rip), %zmm7, %zmm2
vpermps coeff2+__svml_slog2_data_internal_avx512(%rip), %zmm7, %zmm4
vpermps coeff1+__svml_slog2_data_internal_avx512(%rip), %zmm7, %zmm8
vsubps {rn-sae}, %zmm6, %zmm5, %zmm10
vfmadd213ps {rn-sae}, %zmm2, %zmm9, %zmm1
kmovw %k0, %edx
vfmadd213ps {rn-sae}, %zmm4, %zmm9, %zmm1
vfmadd213ps {rn-sae}, %zmm8, %zmm9, %zmm1
vfmadd213ps {rn-sae}, %zmm10, %zmm9, %zmm1
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r12 r13 r14 r15 edx zmm0 zmm1
/* Restore registers
* and exit the function
*/
L(EXIT):
vmovaps %zmm1, %zmm0
movq %rbp, %rsp
popq %rbp
cfi_def_cfa(7, 8)
cfi_restore(6)
ret
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
vmovups %zmm0, 64(%rsp)
vmovups %zmm1, 128(%rsp)
# LOE rbx r12 r13 r14 r15 edx zmm1
xorl %eax, %eax
# LOE rbx r12 r13 r14 r15 eax edx
vzeroupper
movq %r12, 16(%rsp)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
movl %eax, %r12d
movq %r13, 8(%rsp)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
movl %edx, %r13d
movq %r14, (%rsp)
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $16, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
vmovups 128(%rsp), %zmm1
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
# LOE rbx r12 r13 r14 r15 zmm1
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 64(%rsp,%r14,4), %xmm0
call log2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 128(%rsp,%r14,4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVeN16v_log2f_skx)
.section .rodata, "a"
.align 64
#ifdef __svml_slog2_data_internal_avx512_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(64)) VUINT32 One[16][1];
__declspec(align(64)) VUINT32 coeff4[16][1];
__declspec(align(64)) VUINT32 coeff3[16][1];
__declspec(align(64)) VUINT32 coeff2[16][1];
__declspec(align(64)) VUINT32 coeff1[16][1];
} __svml_slog2_data_internal_avx512;
#endif
__svml_slog2_data_internal_avx512:
/*== One ==*/
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
// c4
.align 64
.long 0xbea77e4a, 0xbe8aae3d
.long 0xbe67fe32, 0xbe43d1b6
.long 0xbe26a589, 0xbe0ee09b
.long 0xbdf6a8a1, 0xbdd63b49
.long 0xbf584e51, 0xbf3e80a1
.long 0xbf2892f0, 0xbf15d377
.long 0xbf05b525, 0xbeef8e30
.long 0xbed75c8f, 0xbec24184
// c3
.align 64
.long 0x3ef5910c, 0x3ef045a1
.long 0x3ee7d87e, 0x3eddbb84
.long 0x3ed2d6df, 0x3ec7bbd2
.long 0x3ebcc42f, 0x3eb22616
.long 0x3e8f3399, 0x3eb1223e
.long 0x3ec9db4a, 0x3edb7a09
.long 0x3ee79a1a, 0x3eef77cb
.long 0x3ef407a4, 0x3ef607b4
// c2
.align 64
.long 0xbf38a934, 0xbf387de6
.long 0xbf37f6f0, 0xbf37048b
.long 0xbf35a88a, 0xbf33ed04
.long 0xbf31df56, 0xbf2f8d82
.long 0xbf416814, 0xbf3daf58
.long 0xbf3b5c08, 0xbf39fa2a
.long 0xbf393713, 0xbf38d7e1
.long 0xbf38b2cd, 0xbf38aa62
// c1
.align 64
.long 0x3fb8aa3b, 0x3fb8a9c0
.long 0x3fb8a6e8, 0x3fb89f4e
.long 0x3fb890cb, 0x3fb879b1
.long 0x3fb858d8, 0x3fb82d90
.long 0x3fb8655e, 0x3fb8883a
.long 0x3fb89aea, 0x3fb8a42f
.long 0x3fb8a848, 0x3fb8a9c9
.long 0x3fb8aa2f, 0x3fb8aa3b
.align 64
.type __svml_slog2_data_internal_avx512,@object
.size __svml_slog2_data_internal_avx512,.-__svml_slog2_data_internal_avx512

View File

@ -0,0 +1,20 @@
/* SSE2 version of vectorized log2f, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVbN4v_log2f _ZGVbN4v_log2f_sse2
#include "../svml_s_log2f4_core.S"

View File

@ -0,0 +1,28 @@
/* Multiple versions of vectorized log2f, vector length is 4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVbN4v_log2f
#include "ifunc-mathvec-sse4_1.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVbN4v_log2f, __GI__ZGVbN4v_log2f,
__redirect__ZGVbN4v_log2f)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,223 @@
/* Function log2f vectorized with SSE4.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log2(x) = k - log2(Rcp) + poly_approximation(R)
* log2(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_slog2_data_internal
*/
#define MinNorm 0
#define MaxNorm 16
#define iBrkValue 32
#define iOffExpoMask 48
#define One 64
#define sPoly 80
#include <sysdep.h>
.text
.section .text.sse4,"ax",@progbits
ENTRY(_ZGVbN4v_log2f_sse4)
subq $72, %rsp
cfi_def_cfa_offset(80)
movaps %xmm0, %xmm1
/* reduction: compute r,n */
movdqu iBrkValue+__svml_slog2_data_internal(%rip), %xmm2
movaps %xmm0, %xmm4
movdqu iOffExpoMask+__svml_slog2_data_internal(%rip), %xmm10
psubd %xmm2, %xmm1
pand %xmm1, %xmm10
movaps %xmm0, %xmm3
paddd %xmm2, %xmm10
psrad $23, %xmm1
movups sPoly+__svml_slog2_data_internal(%rip), %xmm5
movups sPoly+32+__svml_slog2_data_internal(%rip), %xmm6
movups sPoly+64+__svml_slog2_data_internal(%rip), %xmm7
movups sPoly+96+__svml_slog2_data_internal(%rip), %xmm9
cmpltps MinNorm+__svml_slog2_data_internal(%rip), %xmm4
cmpnleps MaxNorm+__svml_slog2_data_internal(%rip), %xmm3
cvtdq2ps %xmm1, %xmm1
subps One+__svml_slog2_data_internal(%rip), %xmm10
mulps %xmm10, %xmm5
movaps %xmm10, %xmm8
mulps %xmm10, %xmm6
mulps %xmm10, %xmm8
addps sPoly+16+__svml_slog2_data_internal(%rip), %xmm5
mulps %xmm10, %xmm7
addps sPoly+48+__svml_slog2_data_internal(%rip), %xmm6
mulps %xmm10, %xmm9
mulps %xmm8, %xmm5
addps sPoly+80+__svml_slog2_data_internal(%rip), %xmm7
addps sPoly+112+__svml_slog2_data_internal(%rip), %xmm9
addps %xmm5, %xmm6
mulps %xmm8, %xmm6
orps %xmm3, %xmm4
/* combine and get argument value range mask */
movmskps %xmm4, %edx
addps %xmm6, %xmm7
mulps %xmm7, %xmm8
addps %xmm8, %xmm9
mulps %xmm10, %xmm9
addps sPoly+128+__svml_slog2_data_internal(%rip), %xmm9
mulps %xmm9, %xmm10
addps %xmm10, %xmm1
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx rbp r12 r13 r14 r15 edx xmm0 xmm1
/* Restore registers
* and exit the function
*/
L(EXIT):
movaps %xmm1, %xmm0
addq $72, %rsp
cfi_def_cfa_offset(8)
ret
cfi_def_cfa_offset(80)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
movups %xmm0, 32(%rsp)
movups %xmm1, 48(%rsp)
# LOE rbx rbp r12 r13 r14 r15 edx
xorl %eax, %eax
movq %r12, 16(%rsp)
cfi_offset(12, -64)
movl %eax, %r12d
movq %r13, 8(%rsp)
cfi_offset(13, -72)
movl %edx, %r13d
movq %r14, (%rsp)
cfi_offset(14, -80)
# LOE rbx rbp r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx rbp r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $4, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx rbp r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
movups 48(%rsp), %xmm1
/* Go to exit */
jmp L(EXIT)
cfi_offset(12, -64)
cfi_offset(13, -72)
cfi_offset(14, -80)
# LOE rbx rbp r12 r13 r14 r15 xmm1
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp,%r14,4), %xmm0
call log2f@PLT
# LOE rbx rbp r14 r15 r12d r13d xmm0
movss %xmm0, 48(%rsp,%r14,4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx rbp r15 r12d r13d
END(_ZGVbN4v_log2f_sse4)
.section .rodata, "a"
.align 16
#ifdef __svml_slog2_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(16)) VUINT32 MinNorm[4][1];
__declspec(align(16)) VUINT32 MaxNorm[4][1];
__declspec(align(16)) VUINT32 iBrkValue[4][1];
__declspec(align(16)) VUINT32 iOffExpoMask[4][1];
__declspec(align(16)) VUINT32 One[4][1];
__declspec(align(16)) VUINT32 sPoly[9][4][1];
} __svml_slog2_data_internal;
#endif
__svml_slog2_data_internal:
/*== MinNorm ==*/
.long 0x00800000, 0x00800000, 0x00800000, 0x00800000
/*== MaxNorm ==*/
.align 16
.long 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
/*== iBrkValue = SP 2/3 ==*/
.align 16
.long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
/*== iOffExpoMask = SP significand mask ==*/
.align 16
.long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
/*== sOne = SP 1.0 ==*/
.align 16
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
/*== spoly[9] ==*/
.align 16
.long 0x3e554012, 0x3e554012, 0x3e554012, 0x3e554012 /* coeff9 */
.long 0xbe638E14, 0xbe638E14, 0xbe638E14, 0xbe638E14 /* coeff8 */
.long 0x3e4D660B, 0x3e4D660B, 0x3e4D660B, 0x3e4D660B /* coeff7 */
.long 0xbe727824, 0xbe727824, 0xbe727824, 0xbe727824 /* coeff6 */
.long 0x3e93DD07, 0x3e93DD07, 0x3e93DD07, 0x3e93DD07 /* coeff5 */
.long 0xbeB8B969, 0xbeB8B969, 0xbeB8B969, 0xbeB8B969 /* coeff4 */
.long 0x3eF637C0, 0x3eF637C0, 0x3eF637C0, 0x3eF637C0 /* coeff3 */
.long 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B /* coeff2 */
.long 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B /* coeff1 */
.align 16
.type __svml_slog2_data_internal,@object
.size __svml_slog2_data_internal,.-__svml_slog2_data_internal

View File

@ -0,0 +1,20 @@
/* SSE version of vectorized log2f, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define _ZGVdN8v_log2f _ZGVdN8v_log2f_sse_wrapper
#include "../svml_s_log2f8_core.S"

View File

@ -0,0 +1,28 @@
/* Multiple versions of vectorized log2f, vector length is 8.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define SYMBOL_NAME _ZGVdN8v_log2f
#include "ifunc-mathvec-avx2.h"
libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
#ifdef SHARED
__hidden_ver1 (_ZGVdN8v_log2f, __GI__ZGVdN8v_log2f,
__redirect__ZGVdN8v_log2f)
__attribute__ ((visibility ("hidden")));
#endif

View File

@ -0,0 +1,226 @@
/* Function log2f vectorized with AVX2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
https://www.gnu.org/licenses/. */
/*
* ALGORITHM DESCRIPTION:
*
* Get short reciprocal approximation Rcp ~ 1/mantissa(x)
* R = Rcp*x - 1.0
* log2(x) = k - log2(Rcp) + poly_approximation(R)
* log2(Rcp) is tabulated
*
*
*/
/* Offsets for data table __svml_slog2_data_internal
*/
#define MinNorm 0
#define MaxNorm 32
#define iBrkValue 64
#define iOffExpoMask 96
#define One 128
#define sPoly 160
#include <sysdep.h>
.text
.section .text.avx2,"ax",@progbits
ENTRY(_ZGVdN8v_log2f_avx2)
pushq %rbp
cfi_def_cfa_offset(16)
movq %rsp, %rbp
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
andq $-32, %rsp
subq $96, %rsp
/* reduction: compute r,n */
vmovups iBrkValue+__svml_slog2_data_internal(%rip), %ymm4
vmovups sPoly+64+__svml_slog2_data_internal(%rip), %ymm9
vmovups sPoly+128+__svml_slog2_data_internal(%rip), %ymm10
vmovups sPoly+192+__svml_slog2_data_internal(%rip), %ymm12
vpsubd %ymm4, %ymm0, %ymm1
vcmplt_oqps MinNorm+__svml_slog2_data_internal(%rip), %ymm0, %ymm5
vcmpnle_uqps MaxNorm+__svml_slog2_data_internal(%rip), %ymm0, %ymm6
vpand iOffExpoMask+__svml_slog2_data_internal(%rip), %ymm1, %ymm3
vpsrad $23, %ymm1, %ymm2
vmovups sPoly+__svml_slog2_data_internal(%rip), %ymm1
vpaddd %ymm4, %ymm3, %ymm8
vcvtdq2ps %ymm2, %ymm14
vsubps One+__svml_slog2_data_internal(%rip), %ymm8, %ymm13
vfmadd213ps sPoly+32+__svml_slog2_data_internal(%rip), %ymm13, %ymm1
vfmadd213ps sPoly+96+__svml_slog2_data_internal(%rip), %ymm13, %ymm9
vmulps %ymm13, %ymm13, %ymm11
vfmadd213ps sPoly+160+__svml_slog2_data_internal(%rip), %ymm13, %ymm10
vfmadd213ps sPoly+224+__svml_slog2_data_internal(%rip), %ymm13, %ymm12
vfmadd213ps %ymm9, %ymm11, %ymm1
vfmadd213ps %ymm10, %ymm11, %ymm1
vfmadd213ps %ymm12, %ymm11, %ymm1
vfmadd213ps sPoly+256+__svml_slog2_data_internal(%rip), %ymm13, %ymm1
vorps %ymm6, %ymm5, %ymm7
/* combine and get argument value range mask */
vmovmskps %ymm7, %edx
vfmadd213ps %ymm14, %ymm13, %ymm1
testl %edx, %edx
/* Go to special inputs processing branch */
jne L(SPECIAL_VALUES_BRANCH)
# LOE rbx r12 r13 r14 r15 edx ymm0 ymm1
/* Restore registers
* and exit the function
*/
L(EXIT):
vmovaps %ymm1, %ymm0
movq %rbp, %rsp
popq %rbp
cfi_def_cfa(7, 8)
cfi_restore(6)
ret
cfi_def_cfa(6, 16)
cfi_offset(6, -16)
/* Branch to process
* special inputs
*/
L(SPECIAL_VALUES_BRANCH):
vmovups %ymm0, 32(%rsp)
vmovups %ymm1, 64(%rsp)
# LOE rbx r12 r13 r14 r15 edx ymm1
xorl %eax, %eax
# LOE rbx r12 r13 r14 r15 eax edx
vzeroupper
movq %r12, 16(%rsp)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
movl %eax, %r12d
movq %r13, 8(%rsp)
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
movl %edx, %r13d
movq %r14, (%rsp)
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
# LOE rbx r15 r12d r13d
/* Range mask
* bits check
*/
L(RANGEMASK_CHECK):
btl %r12d, %r13d
/* Call scalar math function */
jc L(SCALAR_MATH_CALL)
# LOE rbx r15 r12d r13d
/* Special inputs
* processing loop
*/
L(SPECIAL_VALUES_LOOP):
incl %r12d
cmpl $8, %r12d
/* Check bits in range mask */
jl L(RANGEMASK_CHECK)
# LOE rbx r15 r12d r13d
movq 16(%rsp), %r12
cfi_restore(12)
movq 8(%rsp), %r13
cfi_restore(13)
movq (%rsp), %r14
cfi_restore(14)
vmovups 64(%rsp), %ymm1
/* Go to exit */
jmp L(EXIT)
/* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */
.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */
.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22
/* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */
.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22
# LOE rbx r12 r13 r14 r15 ymm1
/* Scalar math fucntion call
* to process special input
*/
L(SCALAR_MATH_CALL):
movl %r12d, %r14d
movss 32(%rsp,%r14,4), %xmm0
call log2f@PLT
# LOE rbx r14 r15 r12d r13d xmm0
movss %xmm0, 64(%rsp,%r14,4)
/* Process special inputs in loop */
jmp L(SPECIAL_VALUES_LOOP)
# LOE rbx r15 r12d r13d
END(_ZGVdN8v_log2f_avx2)
.section .rodata, "a"
.align 32
#ifdef __svml_slog2_data_internal_typedef
typedef unsigned int VUINT32;
typedef struct {
__declspec(align(32)) VUINT32 MinNorm[8][1];
__declspec(align(32)) VUINT32 MaxNorm[8][1];
__declspec(align(32)) VUINT32 iBrkValue[8][1];
__declspec(align(32)) VUINT32 iOffExpoMask[8][1];
__declspec(align(32)) VUINT32 One[8][1];
__declspec(align(32)) VUINT32 sPoly[9][8][1];
} __svml_slog2_data_internal;
#endif
__svml_slog2_data_internal:
/*== MinNorm ==*/
.long 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000
/*== MaxNorm ==*/
.align 32
.long 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
/*== iBrkValue = SP 2/3 ==*/
.align 32
.long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab
/*== iOffExpoMask = SP significand mask ==*/
.align 32
.long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff
/*== sOne = SP 1.0 ==*/
.align 32
.long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
/*== spoly[9] ==*/
.align 32
.long 0x3e554012, 0x3e554012, 0x3e554012, 0x3e554012, 0x3e554012, 0x3e554012, 0x3e554012, 0x3e554012 /* coeff9 */
.long 0xbe638E14, 0xbe638E14, 0xbe638E14, 0xbe638E14, 0xbe638E14, 0xbe638E14, 0xbe638E14, 0xbe638E14 /* coeff8 */
.long 0x3e4D660B, 0x3e4D660B, 0x3e4D660B, 0x3e4D660B, 0x3e4D660B, 0x3e4D660B, 0x3e4D660B, 0x3e4D660B /* coeff7 */
.long 0xbe727824, 0xbe727824, 0xbe727824, 0xbe727824, 0xbe727824, 0xbe727824, 0xbe727824, 0xbe727824 /* coeff6 */
.long 0x3e93DD07, 0x3e93DD07, 0x3e93DD07, 0x3e93DD07, 0x3e93DD07, 0x3e93DD07, 0x3e93DD07, 0x3e93DD07 /* coeff5 */
.long 0xbeB8B969, 0xbeB8B969, 0xbeB8B969, 0xbeB8B969, 0xbeB8B969, 0xbeB8B969, 0xbeB8B969, 0xbeB8B969 /* coeff4 */
.long 0x3eF637C0, 0x3eF637C0, 0x3eF637C0, 0x3eF637C0, 0x3eF637C0, 0x3eF637C0, 0x3eF637C0, 0x3eF637C0 /* coeff3 */
.long 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B, 0xbf38AA2B /* coeff2 */
.long 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B, 0x3fB8AA3B /* coeff1 */
.align 32
.type __svml_slog2_data_internal,@object
.size __svml_slog2_data_internal,.-__svml_slog2_data_internal

View File

@ -0,0 +1,29 @@
/* Function log2 vectorized with SSE2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVbN2v_log2)
WRAPPER_IMPL_SSE2 log2
END (_ZGVbN2v_log2)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVbN2v_log2)
#endif

View File

@ -0,0 +1,29 @@
/* Function log2 vectorized with AVX2, wrapper version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVdN4v_log2)
WRAPPER_IMPL_AVX _ZGVbN2v_log2
END (_ZGVdN4v_log2)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVdN4v_log2)
#endif

View File

@ -0,0 +1,25 @@
/* Function log2 vectorized in AVX ISA as wrapper to SSE4 ISA version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVcN4v_log2)
WRAPPER_IMPL_AVX _ZGVbN2v_log2
END (_ZGVcN4v_log2)

View File

@ -0,0 +1,25 @@
/* Function log2 vectorized with AVX-512, wrapper to AVX2.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_d_wrapper_impl.h"
.text
ENTRY (_ZGVeN8v_log2)
WRAPPER_IMPL_AVX512 _ZGVdN4v_log2
END (_ZGVeN8v_log2)

View File

@ -0,0 +1,25 @@
/* Function log2f vectorized with AVX-512. Wrapper to AVX2 version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVeN16v_log2f)
WRAPPER_IMPL_AVX512 _ZGVdN8v_log2f
END (_ZGVeN16v_log2f)

View File

@ -0,0 +1,29 @@
/* Function log2f vectorized with SSE2, wrapper version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVbN4v_log2f)
WRAPPER_IMPL_SSE2 log2f
END (_ZGVbN4v_log2f)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVbN4v_log2f)
#endif

View File

@ -0,0 +1,29 @@
/* Function log2f vectorized with AVX2, wrapper version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVdN8v_log2f)
WRAPPER_IMPL_AVX _ZGVbN4v_log2f
END (_ZGVdN8v_log2f)
#ifndef USE_MULTIARCH
libmvec_hidden_def (_ZGVdN8v_log2f)
#endif

View File

@ -0,0 +1,25 @@
/* Function log2f vectorized in AVX ISA as wrapper to SSE4 ISA version.
Copyright (C) 2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include "svml_s_wrapper_impl.h"
.text
ENTRY (_ZGVcN8v_log2f)
WRAPPER_IMPL_AVX _ZGVbN4v_log2f
END (_ZGVcN8v_log2f)

View File

@ -0,0 +1 @@
#include "test-double-libmvec-log2.c"

View File

@ -0,0 +1 @@
#include "test-double-libmvec-log2.c"

View File

@ -0,0 +1 @@
#include "test-double-libmvec-log2.c"

View File

@ -0,0 +1,3 @@
#define LIBMVEC_TYPE double
#define LIBMVEC_FUNC log2
#include "test-vector-abi-arg1.h"

View File

@ -39,6 +39,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVbN2v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVbN2v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVbN2vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVbN2v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVbN2v_log2)
#define VEC_INT_TYPE __m128i

View File

@ -42,6 +42,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVdN4v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVdN4v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVdN4vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVdN4v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVdN4v_log2)
#ifndef __ILP32__
# define VEC_INT_TYPE __m256i

View File

@ -39,6 +39,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVcN4v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVcN4v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVcN4vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVcN4v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVcN4v_log2)
#define VEC_INT_TYPE __m128i

View File

@ -39,6 +39,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinh), _ZGVeN8v_sinh)
VECTOR_WRAPPER (WRAPPER_NAME (cbrt), _ZGVeN8v_cbrt)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2), _ZGVeN8vv_atan2)
VECTOR_WRAPPER (WRAPPER_NAME (log10), _ZGVeN8v_log10)
VECTOR_WRAPPER (WRAPPER_NAME (log2), _ZGVeN8v_log2)
#ifndef __ILP32__
# define VEC_INT_TYPE __m512i

View File

@ -0,0 +1 @@
#include "test-float-libmvec-log2f.c"

View File

@ -0,0 +1 @@
#include "test-float-libmvec-log2f.c"

View File

@ -0,0 +1 @@
#include "test-float-libmvec-log2f.c"

View File

@ -0,0 +1,3 @@
#define LIBMVEC_TYPE float
#define LIBMVEC_FUNC log2f
#include "test-vector-abi-arg1.h"

View File

@ -39,6 +39,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVeN16v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVeN16v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVeN16vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVeN16v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVeN16v_log2f)
#define VEC_INT_TYPE __m512i

View File

@ -39,6 +39,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVbN4v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVbN4v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVbN4vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVbN4v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVbN4v_log2f)
#define VEC_INT_TYPE __m128i

View File

@ -42,6 +42,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVdN8v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVdN8v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVdN8vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVdN8v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVdN8v_log2f)
/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */
#undef VECTOR_WRAPPER_fFF

View File

@ -39,6 +39,7 @@ VECTOR_WRAPPER (WRAPPER_NAME (sinhf), _ZGVcN8v_sinhf)
VECTOR_WRAPPER (WRAPPER_NAME (cbrtf), _ZGVcN8v_cbrtf)
VECTOR_WRAPPER_ff (WRAPPER_NAME (atan2f), _ZGVcN8vv_atan2f)
VECTOR_WRAPPER (WRAPPER_NAME (log10f), _ZGVcN8v_log10f)
VECTOR_WRAPPER (WRAPPER_NAME (log2f), _ZGVcN8v_log2f)
#define VEC_INT_TYPE __m128i