aarch64: Add vector implementations of log routines

Optimised implementations for single and double precision, Advanced
SIMD and SVE, copied from Arm Optimized Routines. Log lookup table
added as HIDDEN symbol to allow it to be shared between AdvSIMD and
SVE variants.

As previously, data tables are used via a barrier to prevent
overly aggressive constant inlining. Special-case handlers are
marked NOINLINE to avoid incurring the penalty of switching call
standards unnecessarily.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
Joe Ramsay 2023-06-28 12:19:38 +01:00 committed by Szabolcs Nagy
parent 3bb1af2051
commit 78c01a5cbe
15 changed files with 563 additions and 1 deletions

View File

@ -1,4 +1,5 @@
libmvec-supported-funcs = cos \
log \
sin
float-advsimd-funcs = $(libmvec-supported-funcs)
@ -10,7 +11,8 @@ ifeq ($(subdir),mathvec)
libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
$(addsuffix _advsimd,$(double-advsimd-funcs)) \
$(addsuffix f_sve,$(float-sve-funcs)) \
$(addsuffix _sve,$(double-sve-funcs))
$(addsuffix _sve,$(double-sve-funcs)) \
v_log_data
endif
sve-cflags = -march=armv8-a+sve

View File

@ -1,11 +1,15 @@
libmvec {
GLIBC_2.38 {
_ZGVnN2v_cos;
_ZGVnN2v_log;
_ZGVnN2v_sin;
_ZGVnN4v_cosf;
_ZGVnN4v_logf;
_ZGVnN4v_sinf;
_ZGVsMxv_cos;
_ZGVsMxv_cosf;
_ZGVsMxv_log;
_ZGVsMxv_logf;
_ZGVsMxv_sin;
_ZGVsMxv_sinf;
}

View File

@ -50,9 +50,11 @@ typedef __SVBool_t __sv_bool_t;
# define __vpcs __attribute__ ((__aarch64_vector_pcs__))
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
# undef __ADVSIMD_VEC_MATH_SUPPORTED
@ -61,9 +63,11 @@ __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
#ifdef __SVE_VEC_MATH_SUPPORTED
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
# undef __SVE_VEC_MATH_SUPPORTED

View File

@ -0,0 +1,105 @@
/* Double-precision vector (Advanced SIMD) log function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
static const struct data
{
float64x2_t poly[5];
float64x2_t ln2;
uint64x2_t min_norm, special_bound, sign_exp_mask;
} data = {
/* Worst-case error: 1.17 + 0.5 ulp.
Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */
.poly = { V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2),
V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3),
V2 (-0x1.554e550bd501ep-3) },
.ln2 = V2 (0x1.62e42fefa39efp-1),
.min_norm = V2 (0x0010000000000000),
.special_bound = V2 (0x7fe0000000000000), /* asuint64(inf) - min_norm. */
.sign_exp_mask = V2 (0xfff0000000000000)
};
#define A(i) d->poly[i]
#define N (1 << V_LOG_TABLE_BITS)
#define IndexMask (N - 1)
#define Off v_u64 (0x3fe6900900000000)
struct entry
{
float64x2_t invc;
float64x2_t logc;
};
static inline struct entry
lookup (uint64x2_t i)
{
/* Since N is a power of 2, n % N = n & (N - 1). */
struct entry e;
e.invc[0] = __v_log_data.invc[i[0] & IndexMask];
e.logc[0] = __v_log_data.logc[i[0] & IndexMask];
e.invc[1] = __v_log_data.invc[i[1] & IndexMask];
e.logc[1] = __v_log_data.logc[i[1] & IndexMask];
return e;
}
static float64x2_t VPCS_ATTR NOINLINE
special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
{
return v_call_f64 (log, x, y, cmp);
}
float64x2_t VPCS_ATTR V_NAME_D1 (log) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
float64x2_t z, r, r2, p, y, kd, hi;
uint64x2_t ix, iz, tmp, cmp;
int64x2_t k;
struct entry e;
ix = vreinterpretq_u64_f64 (x);
cmp = vcgeq_u64 (vsubq_u64 (ix, d->min_norm), d->special_bound);
/* x = 2^k z; where z is in range [Off,2*Off) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = vsubq_u64 (ix, Off);
k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */
iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
z = vreinterpretq_f64_u64 (iz);
e = lookup (vshrq_n_u64 (tmp, 52 - V_LOG_TABLE_BITS));
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
kd = vcvtq_f64_s64 (k);
/* hi = r + log(c) + k*Ln2. */
hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
/* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
r2 = vmulq_f64 (r, r);
y = vfmaq_f64 (A (2), A (3), r);
p = vfmaq_f64 (A (0), A (1), r);
y = vfmaq_f64 (y, A (4), r2);
y = vfmaq_f64 (p, y, r2);
y = vfmaq_f64 (hi, y, r2);
if (__glibc_unlikely (v_any_u64 (cmp)))
return special_case (x, y, cmp);
return y;
}

View File

@ -0,0 +1,80 @@
/* Double-precision vector (SVE) log function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
#define P(i) sv_f64 (__v_log_data.poly[i])
#define N (1 << V_LOG_TABLE_BITS)
#define Off (0x3fe6900900000000)
#define MaxTop (0x7ff)
#define MinTop (0x001)
#define ThreshTop (0x7fe) /* MaxTop - MinTop. */
static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
{
return sv_call_f64 (log, x, y, cmp);
}
/* SVE port of AdvSIMD log algorithm.
Maximum measured error is 2.17 ulp:
SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
want 0x1.ffffff1cca045p-2. */
svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
{
svuint64_t ix = svreinterpret_u64_f64 (x);
svuint64_t top = svlsr_n_u64_x (pg, ix, 52);
svbool_t cmp
= svcmpge_u64 (pg, svsub_n_u64_x (pg, top, MinTop), sv_u64 (ThreshTop));
/* x = 2^k z; where z is in range [Off,2*Off) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
svuint64_t tmp = svsub_n_u64_x (pg, ix, Off);
/* Equivalent to (tmp >> (52 - V_LOG_TABLE_BITS)) % N, since N is a power
of 2. */
svuint64_t i = svand_n_u64_x (
pg, svlsr_n_u64_x (pg, tmp, (52 - V_LOG_TABLE_BITS)), N - 1);
svint64_t k = svasr_n_s64_x (pg, svreinterpret_s64_u64 (tmp),
52); /* Arithmetic shift. */
svuint64_t iz
= svsub_u64_x (pg, ix, svand_n_u64_x (pg, tmp, 0xfffULL << 52));
svfloat64_t z = svreinterpret_f64_u64 (iz);
/* Lookup in 2 global lists (length N). */
svfloat64_t invc = svld1_gather_u64index_f64 (pg, __v_log_data.invc, i);
svfloat64_t logc = svld1_gather_u64index_f64 (pg, __v_log_data.logc, i);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
svfloat64_t r = svmad_n_f64_x (pg, invc, z, -1);
svfloat64_t kd = svcvt_f64_s64_x (pg, k);
/* hi = r + log(c) + k*Ln2. */
svfloat64_t hi
= svmla_n_f64_x (pg, svadd_f64_x (pg, logc, r), kd, __v_log_data.ln2);
/* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
svfloat64_t r2 = svmul_f64_x (pg, r, r);
svfloat64_t y = svmla_f64_x (pg, P (2), r, P (3));
svfloat64_t p = svmla_f64_x (pg, P (0), r, P (1));
y = svmla_f64_x (pg, y, r2, P (4));
y = svmla_f64_x (pg, p, r2, y);
y = svmla_f64_x (pg, hi, r2, y);
if (__glibc_unlikely (svptest_any (pg, cmp)))
return special_case (x, y, cmp);
return y;
}

View File

@ -0,0 +1,81 @@
/* Single-precision vector (Advanced SIMD) log function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
static const struct data
{
float32x4_t poly[7];
float32x4_t ln2, tiny_bound;
uint32x4_t min_norm, special_bound, off, mantissa_mask;
} data = {
/* 3.34 ulp error. */
.poly = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f),
V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f),
V4 (-0x1.ffffc8p-2f) },
.ln2 = V4 (0x1.62e43p-1f),
.tiny_bound = V4 (0x1p-126),
.min_norm = V4 (0x00800000),
.special_bound = V4 (0x7f000000), /* asuint32(inf) - min_norm. */
.off = V4 (0x3f2aaaab), /* 0.666667. */
.mantissa_mask = V4 (0x007fffff)
};
#define P(i) d->poly[7 - i]
static float32x4_t VPCS_ATTR NOINLINE
special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
{
/* Fall back to scalar code. */
return v_call_f32 (logf, x, y, cmp);
}
float32x4_t VPCS_ATTR V_NAME_F1 (log) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
float32x4_t n, p, q, r, r2, y;
uint32x4_t u, cmp;
u = vreinterpretq_u32_f32 (x);
cmp = vcgeq_u32 (vsubq_u32 (u, d->min_norm), d->special_bound);
/* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
u = vsubq_u32 (u, d->off);
n = vcvtq_f32_s32 (
vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */
u = vandq_u32 (u, d->mantissa_mask);
u = vaddq_u32 (u, d->off);
r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
/* y = log(1+r) + n*ln2. */
r2 = vmulq_f32 (r, r);
/* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */
p = vfmaq_f32 (P (5), P (6), r);
q = vfmaq_f32 (P (3), P (4), r);
y = vfmaq_f32 (P (1), P (2), r);
p = vfmaq_f32 (p, P (7), r2);
q = vfmaq_f32 (q, p, r2);
y = vfmaq_f32 (y, q, r2);
p = vfmaq_f32 (r, d->ln2, n);
y = vfmaq_f32 (p, y, r2);
if (__glibc_unlikely (v_any_u32 (cmp)))
return special_case (x, y, cmp);
return y;
}

View File

@ -0,0 +1,87 @@
/* Single-precision vector (SVE) log function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
static const struct data
{
float poly_0135[4];
float poly_246[3];
float ln2;
} data = {
.poly_0135 = {
/* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
that coeffs 0, 1, 3 and 5 can be loaded as a single quad-word, hence used
with _lane variant of MLA intrinsic. */
-0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
},
.poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
.ln2 = 0x1.62e43p-1f
};
#define Min (0x00800000)
#define Max (0x7f800000)
#define Thresh (0x7f000000) /* Max - Min. */
#define Mask (0x007fffff)
#define Off (0x3f2aaaab) /* 0.666667. */
static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
{
return sv_call_f32 (logf, x, y, cmp);
}
/* Optimised implementation of SVE logf, using the same algorithm and
polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
want 0x1.26ede6p-2. */
svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svuint32_t u = svreinterpret_u32_f32 (x);
svbool_t cmp = svcmpge_n_u32 (pg, svsub_n_u32_x (pg, u, Min), Thresh);
/* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
u = svsub_n_u32_x (pg, u, Off);
svfloat32_t n
= svcvt_f32_s32_x (pg, svasr_n_s32_x (pg, svreinterpret_s32_u32 (u),
23)); /* Sign-extend. */
u = svand_n_u32_x (pg, u, Mask);
u = svadd_n_u32_x (pg, u, Off);
svfloat32_t r = svsub_n_f32_x (pg, svreinterpret_f32_u32 (u), 1.0f);
/* y = log(1+r) + n*ln2. */
svfloat32_t r2 = svmul_f32_x (pg, r, r);
/* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */
svfloat32_t p_0135 = svld1rq_f32 (svptrue_b32 (), &d->poly_0135[0]);
svfloat32_t p = svmla_lane_f32 (sv_f32 (d->poly_246[0]), r, p_0135, 1);
svfloat32_t q = svmla_lane_f32 (sv_f32 (d->poly_246[1]), r, p_0135, 2);
svfloat32_t y = svmla_lane_f32 (sv_f32 (d->poly_246[2]), r, p_0135, 3);
p = svmla_lane_f32 (p, r2, p_0135, 0);
q = svmla_f32_x (pg, q, r2, p);
y = svmla_f32_x (pg, y, r2, q);
p = svmla_n_f32_x (pg, r, n, d->ln2);
y = svmla_f32_x (pg, p, r2, y);
if (__glibc_unlikely (svptest_any (pg, cmp)))
return special_case (x, y, cmp);
return y;
}

View File

@ -24,4 +24,5 @@
#define VEC_TYPE float64x2_t
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
VPCS_VECTOR_WRAPPER (log_advsimd, _ZGVnN2v_log)
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)

View File

@ -33,4 +33,5 @@
}
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
SVE_VECTOR_WRAPPER (log_sve, _ZGVsMxv_log)
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)

View File

@ -24,4 +24,5 @@
#define VEC_TYPE float32x4_t
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
VPCS_VECTOR_WRAPPER (logf_advsimd, _ZGVnN4v_logf)
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)

View File

@ -33,4 +33,5 @@
}
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
SVE_VECTOR_WRAPPER (logf_sve, _ZGVsMxv_logf)
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)

View File

@ -0,0 +1,173 @@
/* Lookup table for double-precision log(x) vector function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "vecmath_config.h"
const struct v_log_data __v_log_data = {
/* Worst-case error: 1.17 + 0.5 ulp.
Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */
.poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
.ln2 = 0x1.62e42fefa39efp-1,
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + poly(z/c - 1)
where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
tables:
invc[i] = 1/c
logc[i] = (double)log(c)
where c is near the center of the subinterval and is chosen by trying
several floating point invc candidates around 1/center and selecting one
for which the error in (double)log(c) is minimized (< 0x1p-74), except the
subinterval that contains 1 and the previous one got tweaked to avoid
cancellation. */
.invc = { 0x1.6a133d0dec120p+0, 0x1.6815f2f3e42edp+0,
0x1.661e39be1ac9ep+0, 0x1.642bfa30ac371p+0,
0x1.623f1d916f323p+0, 0x1.60578da220f65p+0,
0x1.5e75349dea571p+0, 0x1.5c97fd387a75ap+0,
0x1.5abfd2981f200p+0, 0x1.58eca051dc99cp+0,
0x1.571e526d9df12p+0, 0x1.5554d555b3fcbp+0,
0x1.539015e2a20cdp+0, 0x1.51d0014ee0164p+0,
0x1.50148538cd9eep+0, 0x1.4e5d8f9f698a1p+0,
0x1.4cab0edca66bep+0, 0x1.4afcf1a9db874p+0,
0x1.495327136e16fp+0, 0x1.47ad9e84af28fp+0,
0x1.460c47b39ae15p+0, 0x1.446f12b278001p+0,
0x1.42d5efdd720ecp+0, 0x1.4140cfe001a0fp+0,
0x1.3fafa3b421f69p+0, 0x1.3e225c9c8ece5p+0,
0x1.3c98ec29a211ap+0, 0x1.3b13442a413fep+0,
0x1.399156baa3c54p+0, 0x1.38131639b4cdbp+0,
0x1.36987540fbf53p+0, 0x1.352166b648f61p+0,
0x1.33adddb3eb575p+0, 0x1.323dcd99fc1d3p+0,
0x1.30d129fefc7d2p+0, 0x1.2f67e6b72fe7dp+0,
0x1.2e01f7cf8b187p+0, 0x1.2c9f518ddc86ep+0,
0x1.2b3fe86e5f413p+0, 0x1.29e3b1211b25cp+0,
0x1.288aa08b373cfp+0, 0x1.2734abcaa8467p+0,
0x1.25e1c82459b81p+0, 0x1.2491eb1ad59c5p+0,
0x1.23450a54048b5p+0, 0x1.21fb1bb09e578p+0,
0x1.20b415346d8f7p+0, 0x1.1f6fed179a1acp+0,
0x1.1e2e99b93c7b3p+0, 0x1.1cf011a7a882ap+0,
0x1.1bb44b97dba5ap+0, 0x1.1a7b3e66cdd4fp+0,
0x1.1944e11dc56cdp+0, 0x1.18112aebb1a6ep+0,
0x1.16e013231b7e9p+0, 0x1.15b1913f156cfp+0,
0x1.14859cdedde13p+0, 0x1.135c2dc68cfa4p+0,
0x1.12353bdb01684p+0, 0x1.1110bf25b85b4p+0,
0x1.0feeafd2f8577p+0, 0x1.0ecf062c51c3bp+0,
0x1.0db1baa076c8bp+0, 0x1.0c96c5bb3048ep+0,
0x1.0b7e20263e070p+0, 0x1.0a67c2acd0ce3p+0,
0x1.0953a6391e982p+0, 0x1.0841c3caea380p+0,
0x1.07321489b13eap+0, 0x1.062491aee9904p+0,
0x1.05193497a7cc5p+0, 0x1.040ff6b5f5e9fp+0,
0x1.0308d19aa6127p+0, 0x1.0203beedb0c67p+0,
0x1.010037d38bcc2p+0, 1.0,
0x1.fc06d493cca10p-1, 0x1.f81e6ac3b918fp-1,
0x1.f44546ef18996p-1, 0x1.f07b10382c84bp-1,
0x1.ecbf7070e59d4p-1, 0x1.e91213f715939p-1,
0x1.e572a9a75f7b7p-1, 0x1.e1e0e2c530207p-1,
0x1.de5c72d8a8be3p-1, 0x1.dae50fa5658ccp-1,
0x1.d77a71145a2dap-1, 0x1.d41c51166623ep-1,
0x1.d0ca6ba0bb29fp-1, 0x1.cd847e8e59681p-1,
0x1.ca4a499693e00p-1, 0x1.c71b8e399e821p-1,
0x1.c3f80faf19077p-1, 0x1.c0df92dc2b0ecp-1,
0x1.bdd1de3cbb542p-1, 0x1.baceb9e1007a3p-1,
0x1.b7d5ef543e55ep-1, 0x1.b4e749977d953p-1,
0x1.b20295155478ep-1, 0x1.af279f8e82be2p-1,
0x1.ac5638197fdf3p-1, 0x1.a98e2f102e087p-1,
0x1.a6cf5606d05c1p-1, 0x1.a4197fc04d746p-1,
0x1.a16c80293dc01p-1, 0x1.9ec82c4dc5bc9p-1,
0x1.9c2c5a491f534p-1, 0x1.9998e1480b618p-1,
0x1.970d9977c6c2dp-1, 0x1.948a5c023d212p-1,
0x1.920f0303d6809p-1, 0x1.8f9b698a98b45p-1,
0x1.8d2f6b81726f6p-1, 0x1.8acae5bb55badp-1,
0x1.886db5d9275b8p-1, 0x1.8617ba567c13cp-1,
0x1.83c8d27487800p-1, 0x1.8180de3c5dbe7p-1,
0x1.7f3fbe71cdb71p-1, 0x1.7d055498071c1p-1,
0x1.7ad182e54f65ap-1, 0x1.78a42c3c90125p-1,
0x1.767d342f76944p-1, 0x1.745c7ef26b00ap-1,
0x1.7241f15769d0fp-1, 0x1.702d70d396e41p-1,
0x1.6e1ee3700cd11p-1, 0x1.6c162fc9cbe02p-1 },
.logc = { -0x1.62fe995eb963ap-2, -0x1.5d5a48dad6b67p-2,
-0x1.57bde257d2769p-2, -0x1.52294fbf2af55p-2,
-0x1.4c9c7b598aa38p-2, -0x1.47174fc5ff560p-2,
-0x1.4199b7fa7b5cap-2, -0x1.3c239f48cfb99p-2,
-0x1.36b4f154d2aebp-2, -0x1.314d9a0ff32fbp-2,
-0x1.2bed85cca3cffp-2, -0x1.2694a11421af9p-2,
-0x1.2142d8d014fb2p-2, -0x1.1bf81a2c77776p-2,
-0x1.16b452a39c6a4p-2, -0x1.11776ffa6c67ep-2,
-0x1.0c416035020e0p-2, -0x1.071211aa10fdap-2,
-0x1.01e972e293b1bp-2, -0x1.f98ee587fd434p-3,
-0x1.ef5800ad716fbp-3, -0x1.e52e160484698p-3,
-0x1.db1104b19352ep-3, -0x1.d100ac59e0bd6p-3,
-0x1.c6fced287c3bdp-3, -0x1.bd05a7b317c29p-3,
-0x1.b31abd229164fp-3, -0x1.a93c0edadb0a3p-3,
-0x1.9f697ee30d7ddp-3, -0x1.95a2efa9aa40ap-3,
-0x1.8be843d796044p-3, -0x1.82395ecc477edp-3,
-0x1.7896240966422p-3, -0x1.6efe77aca8c55p-3,
-0x1.65723e117ec5cp-3, -0x1.5bf15c0955706p-3,
-0x1.527bb6c111da1p-3, -0x1.491133c939f8fp-3,
-0x1.3fb1b90c7fc58p-3, -0x1.365d2cc485f8dp-3,
-0x1.2d13758970de7p-3, -0x1.23d47a721fd47p-3,
-0x1.1aa0229f25ec2p-3, -0x1.117655ddebc3bp-3,
-0x1.0856fbf83ab6bp-3, -0x1.fe83fabbaa106p-4,
-0x1.ec6e8507a56cdp-4, -0x1.da6d68c7cc2eap-4,
-0x1.c88078462be0cp-4, -0x1.b6a786a423565p-4,
-0x1.a4e2676ac7f85p-4, -0x1.9330eea777e76p-4,
-0x1.8192f134d5ad9p-4, -0x1.70084464f0538p-4,
-0x1.5e90bdec5cb1fp-4, -0x1.4d2c3433c5536p-4,
-0x1.3bda7e219879ap-4, -0x1.2a9b732d27194p-4,
-0x1.196eeb2b10807p-4, -0x1.0854be8ef8a7ep-4,
-0x1.ee998cb277432p-5, -0x1.ccadb79919fb9p-5,
-0x1.aae5b1d8618b0p-5, -0x1.89413015d7442p-5,
-0x1.67bfe7bf158dep-5, -0x1.46618f83941bep-5,
-0x1.2525df1b0618ap-5, -0x1.040c8e2f77c6ap-5,
-0x1.c62aad39f738ap-6, -0x1.847fe3bdead9cp-6,
-0x1.43183683400acp-6, -0x1.01f31c4e1d544p-6,
-0x1.82201d1e6b69ap-7, -0x1.00dd0f3e1bfd6p-7,
-0x1.ff6fe1feb4e53p-9, 0.0,
0x1.fe91885ec8e20p-8, 0x1.fc516f716296dp-7,
0x1.7bb4dd70a015bp-6, 0x1.f84c99b34b674p-6,
0x1.39f9ce4fb2d71p-5, 0x1.7756c0fd22e78p-5,
0x1.b43ee82db8f3ap-5, 0x1.f0b3fced60034p-5,
0x1.165bd78d4878ep-4, 0x1.3425d2715ebe6p-4,
0x1.51b8bd91b7915p-4, 0x1.6f15632c76a47p-4,
0x1.8c3c88ecbe503p-4, 0x1.a92ef077625dap-4,
0x1.c5ed5745fa006p-4, 0x1.e27876de1c993p-4,
0x1.fed104fce4cdcp-4, 0x1.0d7bd9c17d78bp-3,
0x1.1b76986cef97bp-3, 0x1.295913d24f750p-3,
0x1.37239fa295d17p-3, 0x1.44d68dd78714bp-3,
0x1.52722ebe5d780p-3, 0x1.5ff6d12671f98p-3,
0x1.6d64c2389484bp-3, 0x1.7abc4da40fddap-3,
0x1.87fdbda1e8452p-3, 0x1.95295b06a5f37p-3,
0x1.a23f6d34abbc5p-3, 0x1.af403a28e04f2p-3,
0x1.bc2c06a85721ap-3, 0x1.c903161240163p-3,
0x1.d5c5aa93287ebp-3, 0x1.e274051823fa9p-3,
0x1.ef0e656300c16p-3, 0x1.fb9509f05aa2ap-3,
0x1.04041821f37afp-2, 0x1.0a340a49b3029p-2,
0x1.105a7918a126dp-2, 0x1.1677819812b84p-2,
0x1.1c8b405b40c0ep-2, 0x1.2295d16cfa6b1p-2,
0x1.28975066318a2p-2, 0x1.2e8fd855d86fcp-2,
0x1.347f83d605e59p-2, 0x1.3a666d1244588p-2,
0x1.4044adb6f8ec4p-2, 0x1.461a5f077558cp-2,
0x1.4be799e20b9c8p-2, 0x1.51ac76a6b79dfp-2,
0x1.57690d5744a45p-2, 0x1.5d1d758e45217p-2 }
};

View File

@ -35,4 +35,14 @@
__ptr; \
})
#define V_LOG_POLY_ORDER 6
#define V_LOG_TABLE_BITS 7
extern const struct v_log_data
{
/* Shared data for vector log and log-derived routines (e.g. asinh). */
double poly[V_LOG_POLY_ORDER - 1];
double ln2;
double invc[1 << V_LOG_TABLE_BITS];
double logc[1 << V_LOG_TABLE_BITS];
} __v_log_data attribute_hidden;
#endif

View File

@ -1219,10 +1219,18 @@ double: 3
float: 3
ldouble: 1
Function: "log_advsimd":
double: 1
float: 3
Function: "log_downward":
float: 2
ldouble: 1
Function: "log_sve":
double: 1
float: 3
Function: "log_towardzero":
float: 2
ldouble: 2

View File

@ -1,8 +1,12 @@
GLIBC_2.38 _ZGVnN2v_cos F
GLIBC_2.38 _ZGVnN2v_log F
GLIBC_2.38 _ZGVnN2v_sin F
GLIBC_2.38 _ZGVnN4v_cosf F
GLIBC_2.38 _ZGVnN4v_logf F
GLIBC_2.38 _ZGVnN4v_sinf F
GLIBC_2.38 _ZGVsMxv_cos F
GLIBC_2.38 _ZGVsMxv_cosf F
GLIBC_2.38 _ZGVsMxv_log F
GLIBC_2.38 _ZGVsMxv_logf F
GLIBC_2.38 _ZGVsMxv_sin F
GLIBC_2.38 _ZGVsMxv_sinf F