Enable libmvec support for AArch64

This patch enables libmvec on AArch64. The proposed change is mainly
implementing build infrastructure to add the new routines to ABI,
tests and benchmarks. I have demonstrated how this all fits together
by adding implementations for vector cos, in both single and double
precision, targeting both Advanced SIMD and SVE.

The implementations of the routines themselves are just loops over the
scalar routine from libm for now, as we are more concerned with
getting the plumbing right at this point. We plan to contribute vector
routines from the Arm Optimized Routines repo that are compliant with
requirements described in the libmvec wiki.

Building libmvec requires minimum GCC 10 for SVE ACLE. To avoid raising
the minimum GCC by such a big jump, we allow users to disable libmvec
if their compiler is too old.

Note that at this point users have to manually call the vector math
functions. This seems to be acceptable to some downstream users.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
Joe Ramsay 2023-04-12 14:37:49 +01:00 committed by Szabolcs Nagy
parent cd87e36843
commit cd94326a13
32 changed files with 983 additions and 19 deletions

View File

@ -493,6 +493,9 @@ build the GNU C Library:
For s390x architecture builds, GCC 7.1 or higher is needed (See gcc
Bug 98269).
For AArch64 architecture builds with mathvec enabled, GCC 10 or
higher is needed due to dependency on arm_sve.h.
For multi-arch support it is recommended to use a GCC which has
been built with support for GNU indirect functions. This ensures
that correct debugging information is generated for functions

6
NEWS
View File

@ -29,6 +29,12 @@ Major new features:
- x86_64-gnu
* Added libmvec vector math library support to AArch64. It requires
GCC version >= 10.1.0. It can be disabled via --disable-mathvec,
however that is not a supported configuration as it changes the ABI.
The symbol names follow the AArch64 vector ABI, they are declared
in math.h and have to be called manually at this point.
Deprecated and removed features, and other changes affecting compatibility:
* In the Linux kernel for the hppa/parisc architecture some of the

View File

@ -26,6 +26,7 @@
#include <json-lib.h>
#include <bench-util.h>
#include <math-tests-arch.h>
#include <bench-libmvec-arch.h>
#include <bench-util.c>
#define D_ITERS 10000
@ -37,24 +38,8 @@ main (int argc, char **argv)
timing_t start, end;
json_ctx_t json_ctx;
#if defined REQUIRE_AVX
if (!CPU_FEATURE_ACTIVE (AVX))
{
printf ("AVX not supported.");
return 77;
}
#elif defined REQUIRE_AVX2
if (!CPU_FEATURE_ACTIVE (AVX2))
{
printf ("AVX2 not supported.");
return 77;
}
#elif defined REQUIRE_AVX512F
if (!CPU_FEATURE_ACTIVE (AVX512F))
{
printf ("AVX512F not supported.");
return 77;
}
#ifdef INIT_ARCH
INIT_ARCH ();
#endif
bench_start ();

View File

@ -536,6 +536,9 @@ For ARC architecture builds, GCC 8.3 or higher is needed.
For s390x architecture builds, GCC 7.1 or higher is needed (See gcc Bug 98269).
For AArch64 architecture builds with mathvec enabled, GCC 10 or higher is needed
due to dependency on arm_sve.h.
For multi-arch support it is recommended to use a GCC which has been built with
support for GNU indirect functions. This ensures that correct debugging
information is generated for functions selected by IFUNC resolvers. This

View File

@ -327,3 +327,26 @@ if test $libc_cv_aarch64_sve_asm = yes; then
$as_echo "#define HAVE_AARCH64_SVE_ASM 1" >>confdefs.h
fi
if test x"$build_mathvec" = xnotset; then
build_mathvec=yes
fi
# Check if compiler is sufficient to build mathvec (needs SVE ACLE)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for availability of SVE ACLE" >&5
$as_echo_n "checking for availability of SVE ACLE... " >&6; }
if ${libc_cv_has_sve_acle+:} false; then :
$as_echo_n "(cached) " >&6
else
if test $build_mathvec = yes; then
cat > conftest.c <<EOF
#include <arm_sve.h>
EOF
if ! ${CC-cc} conftest.c -fsyntax-only; then
as_fn_error 1 "mathvec is enabled but compiler does not have SVE ACLE. Either use a compatible compiler or configure with --disable-mathvec (this results in incomplete ABI)."
fi
rm conftest.c
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_has_sve_acle" >&5
$as_echo "$libc_cv_has_sve_acle" >&6; }

View File

@ -101,3 +101,19 @@ rm -f conftest*])
if test $libc_cv_aarch64_sve_asm = yes; then
AC_DEFINE(HAVE_AARCH64_SVE_ASM)
fi
if test x"$build_mathvec" = xnotset; then
build_mathvec=yes
fi
# Check if compiler is sufficient to build mathvec (needs SVE ACLE)
AC_CACHE_CHECK(for availability of SVE ACLE, libc_cv_has_sve_acle, [dnl
if test $build_mathvec = yes; then
cat > conftest.c <<EOF
#include <arm_sve.h>
EOF
if ! ${CC-cc} conftest.c -fsyntax-only; then
as_fn_error 1 "mathvec is enabled but compiler does not have SVE ACLE. Either use a compatible compiler or configure with --disable-mathvec (this results in incomplete ABI)."
fi
rm conftest.c
fi])

View File

@ -0,0 +1,61 @@
float-advsimd-funcs = cos
double-advsimd-funcs = cos
float-sve-funcs = cos
double-sve-funcs = cos
ifeq ($(subdir),mathvec)
libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
$(addsuffix _advsimd,$(double-advsimd-funcs)) \
$(addsuffix f_sve,$(float-sve-funcs)) \
$(addsuffix _sve,$(double-sve-funcs))
endif
sve-cflags = -march=armv8-a+sve
ifeq ($(build-mathvec),yes)
bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \
$(addprefix double-advsimd-,$(double-advsimd-funcs)) \
$(addprefix float-sve-,$(float-sve-funcs)) \
$(addprefix double-sve-,$(double-sve-funcs))
endif
$(objpfx)bench-float-advsimd-%.c:
$(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py $(basename $(@F)) > $@
$(objpfx)bench-double-advsimd-%.c:
$(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py $(basename $(@F)) > $@
$(objpfx)bench-float-sve-%.c:
$(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py $(basename $(@F)) > $@
$(objpfx)bench-double-sve-%.c:
$(PYTHON) $(..)sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py $(basename $(@F)) > $@
ifeq (${STATIC-BENCHTESTS},yes)
libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a
else
libmvec-benchtests = $(libmvec) $(libm)
endif
$(addprefix $(objpfx)bench-,$(bench-libmvec)): $(libmvec-benchtests)
ifeq ($(build-mathvec),yes)
libmvec-tests += float-advsimd double-advsimd float-sve double-sve
endif
define sve-float-cflags-template
CFLAGS-$(1)f_sve.c += $(sve-cflags)
CFLAGS-bench-float-sve-$(1).c += $(sve-cflags)
endef
define sve-double-cflags-template
CFLAGS-$(1)_sve.c += $(sve-cflags)
CFLAGS-bench-double-sve-$(1).c += $(sve-cflags)
endef
$(foreach f,$(float-sve-funcs), $(eval $(call sve-float-cflags-template,$(f))))
$(foreach f,$(double-sve-funcs), $(eval $(call sve-double-cflags-template,$(f))))
CFLAGS-test-float-sve-wrappers.c = $(sve-cflags)
CFLAGS-test-double-sve-wrappers.c = $(sve-cflags)

View File

@ -0,0 +1,8 @@
libmvec {
GLIBC_2.38 {
_ZGVnN2v_cos;
_ZGVnN4v_cosf;
_ZGVsMxv_cos;
_ZGVsMxv_cosf;
}
}

View File

@ -0,0 +1,39 @@
/* Helpers for Advanced SIMD vector math funtions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <arm_neon.h>
#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
#define V_NAME_D1(fun) _ZGVnN2v_##fun
#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
#define V_NAME_D2(fun) _ZGVnN2vv_##fun
static __always_inline float32x4_t
v_call_f32 (float (*f) (float), float32x4_t x)
{
return (float32x4_t){ f (x[0]), f (x[1]), f (x[2]), f (x[3]) };
}
static __always_inline float64x2_t
v_call_f64 (double (*f) (double), float64x2_t x)
{
return (float64x2_t){ f (x[0]), f (x[1]) };
}

View File

@ -0,0 +1,41 @@
/* Runtime architecture check for libmvec benchtests. aarch64 version.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <sys/auxv.h>
#define INIT_ARCH() \
do \
{ \
if (!supported ()) \
return 77; \
} \
while (0)
static bool
supported (void)
{
#if defined REQUIRE_SVE
if (!(getauxval (AT_HWCAP) & HWCAP_SVE))
{
printf ("SVE not supported.");
return false;
}
#endif
return true;
}

View File

@ -0,0 +1,64 @@
/* Platform-specific SIMD declarations of math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef _MATH_H
# error "Never include <bits/math-vector.h> directly;\
include <math.h> instead."
#endif
/* Get default empty definitions for simd declarations. */
#include <bits/libm-simd-decl-stubs.h>
#if __GNUC_PREREQ(9, 0)
# define __ADVSIMD_VEC_MATH_SUPPORTED
typedef __Float32x4_t __f32x4_t;
typedef __Float64x2_t __f64x2_t;
#elif __glibc_clang_prereq(8, 0)
# define __ADVSIMD_VEC_MATH_SUPPORTED
typedef __attribute__ ((__neon_vector_type__ (4))) float __f32x4_t;
typedef __attribute__ ((__neon_vector_type__ (2))) double __f64x2_t;
#endif
#if __GNUC_PREREQ(10, 0) || __glibc_clang_prereq(11, 0)
# define __SVE_VEC_MATH_SUPPORTED
typedef __SVFloat32_t __sv_f32_t;
typedef __SVFloat64_t __sv_f64_t;
typedef __SVBool_t __sv_bool_t;
#endif
/* If vector types and vector PCS are unsupported in the working
compiler, no choice but to omit vector math declarations. */
#ifdef __ADVSIMD_VEC_MATH_SUPPORTED
# define __vpcs __attribute__ ((__aarch64_vector_pcs__))
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
# undef __ADVSIMD_VEC_MATH_SUPPORTED
#endif /* __ADVSIMD_VEC_MATH_SUPPORTED */
#ifdef __SVE_VEC_MATH_SUPPORTED
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
# undef __SVE_VEC_MATH_SUPPORTED
#endif /* __SVE_VEC_MATH_SUPPORTED */

View File

@ -0,0 +1,29 @@
/* Double-precision vector (Advanced SIMD) cos function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include "advsimd_utils.h"
VPCS_ATTR
float64x2_t
V_NAME_D1 (cos) (float64x2_t x)
{
return v_call_f64 (cos, x);
}

View File

@ -0,0 +1,28 @@
/* Double-precision vector (SVE) cos function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include "sve_utils.h"
svfloat64_t
SV_NAME_D1 (cos) (svfloat64_t x, svbool_t pg)
{
return sv_call_f64 (cos, x, svdup_n_f64 (0), pg);
}

View File

@ -0,0 +1,29 @@
/* Single-precision vector (Advanced SIMD) cos function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include "advsimd_utils.h"
VPCS_ATTR
float32x4_t
V_NAME_F1 (cos) (float32x4_t x)
{
return v_call_f32 (cosf, x);
}

View File

@ -0,0 +1,28 @@
/* Single-precision vector (SVE) cos function.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <math.h>
#include "sve_utils.h"
svfloat32_t
SV_NAME_F1 (cos) (svfloat32_t x, svbool_t pg)
{
return sv_call_f32 (cosf, x, svdup_n_f32 (0), pg);
}

View File

@ -0,0 +1,34 @@
/* Runtime architecture check for math tests. AArch64 version.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifdef REQUIRE_SVE
# include <sys/auxv.h>
# define INIT_ARCH_EXT
# define CHECK_ARCH_EXT \
do \
{ \
if (!(getauxval (AT_HWCAP) & HWCAP_SVE)) \
return; \
} \
while (0)
#else
# include <sysdeps/generic/math-tests-arch.h>
#endif

View File

@ -0,0 +1,90 @@
#!/usr/bin/python3
# Copyright (C) 2023 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
import sys
TEMPLATE = """
#include <math.h>
#include <arm_neon.h>
#define STRIDE {stride}
#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\
{rtype} mx0 = {fname}(vld1q_f{prec_short} (variants[v].in[i].arg0)); \\
mx0; }}))
struct args
{{
{stype} arg0[STRIDE];
double timing;
}};
struct _variants
{{
const char *name;
int count;
const struct args *in;
}};
static const struct args in0[{rowcount}] = {{
{in_data}
}};
static const struct _variants variants[1] = {{
{{"", {rowcount}, in0}},
}};
#define NUM_VARIANTS 1
#define NUM_SAMPLES(i) (variants[i].count)
#define VARIANT(i) (variants[i].name)
static {rtype} volatile ret;
#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC(i, j); }})
#define FUNCNAME "{fname}"
#include <bench-libmvec-skeleton.c>
"""
def main(name):
_, prec, _, func = name.split("-")
scalar_to_advsimd_type = {"double": "float64x2_t", "float": "float32x4_t"}
stride = {"double": 2, "float": 4}[prec]
rtype = scalar_to_advsimd_type[prec]
atype = scalar_to_advsimd_type[prec]
fname = f"_ZGVnN{stride}v_{func}{'f' if prec == 'float' else ''}"
prec_short = {"double": 64, "float": 32}[prec]
with open(f"../benchtests/libmvec/{func}-inputs") as f:
in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
in_vals = [in_vals[i:i+stride] for i in range(0, len(in_vals), stride)]
rowcount= len(in_vals)
in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
print(TEMPLATE.format(stride=stride,
rtype=rtype,
atype=atype,
fname=fname,
prec_short=prec_short,
in_data=in_data,
rowcount=rowcount,
stype=prec))
if __name__ == "__main__":
main(sys.argv[1])

View File

@ -0,0 +1,102 @@
#!/usr/bin/python3
# Copyright (C) 2023 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
import sys
TEMPLATE = """
#include <math.h>
#include <arm_sve.h>
#define MAX_STRIDE {max_stride}
#define STRIDE {stride}
#define PTRUE svptrue_b{prec_short}
#define SV_LOAD svld1_f{prec_short}
#define SV_STORE svst1_f{prec_short}
#define REQUIRE_SVE
#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\
{rtype} mx0 = {fname}(SV_LOAD (PTRUE(), variants[v].in[i].arg0), PTRUE()); \\
mx0; }}))
struct args
{{
{stype} arg0[MAX_STRIDE];
double timing;
}};
struct _variants
{{
const char *name;
int count;
const struct args *in;
}};
static const struct args in0[{rowcount}] = {{
{in_data}
}};
static const struct _variants variants[1] = {{
{{"", {rowcount}, in0}},
}};
#define NUM_VARIANTS 1
#define NUM_SAMPLES(i) (variants[i].count)
#define VARIANT(i) (variants[i].name)
// Cannot pass volatile pointer to svst1. This still does not appear to get optimised out.
static {stype} /*volatile*/ ret[MAX_STRIDE];
#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC(i, j)); }})
#define FUNCNAME "{fname}"
#include <bench-libmvec-skeleton.c>
"""
def main(name):
_, prec, _, func = name.split("-")
scalar_to_sve_type = {"double": "svfloat64_t", "float": "svfloat32_t"}
stride = {"double": "svcntd()", "float": "svcntw()"}[prec]
rtype = scalar_to_sve_type[prec]
atype = scalar_to_sve_type[prec]
fname = f"_ZGVsMxv_{func}{'f' if prec == 'float' else ''}"
prec_short = {"double": 64, "float": 32}[prec]
# Max SVE vector length is 2048 bits. To ensure benchmarks are
# vector-length-agnostic, but still use as wide vectors as
# possible on any given target, divide input data into 2048-bit
# rows, then load/store as many elements as the target will allow.
max_stride = 2048 // prec_short
with open(f"../benchtests/libmvec/{func}-inputs") as f:
in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
in_vals = [in_vals[i:i+max_stride] for i in range(0, len(in_vals), max_stride)]
rowcount= len(in_vals)
in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
print(TEMPLATE.format(stride=stride,
rtype=rtype,
atype=atype,
fname=fname,
prec_short=prec_short,
in_data=in_data,
rowcount=rowcount,
stype=prec,
max_stride=max_stride))
if __name__ == "__main__":
main(sys.argv[1])

View File

@ -0,0 +1,55 @@
/* Helpers for SVE vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <arm_sve.h>
#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
#define SV_NAME_D1(fun) _ZGVsMxv_##fun
#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
static __always_inline svfloat32_t
sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
{
svbool_t p = svpfirst (cmp, svpfalse ());
while (svptest_any (cmp, p))
{
float elem = svclastb_n_f32 (p, 0, x);
elem = (*f) (elem);
svfloat32_t y2 = svdup_n_f32 (elem);
y = svsel_f32 (p, y2, y);
p = svpnext_b32 (cmp, p);
}
return y;
}
static __always_inline svfloat64_t
sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
{
svbool_t p = svpfirst (cmp, svpfalse ());
while (svptest_any (cmp, p))
{
double elem = svclastb_n_f64 (p, 0, x);
elem = (*f) (elem);
svfloat64_t y2 = svdup_n_f64 (elem);
y = svsel_f64 (p, y2, y);
p = svpnext_b64 (cmp, p);
}
return y;
}

View File

@ -0,0 +1,26 @@
/* Scalar wrappers for double-precision Advanced SIMD vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <arm_neon.h>
#include "test-double-advsimd.h"
#define VEC_TYPE float64x2_t
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)

View File

@ -0,0 +1,25 @@
/* Test declarations for double-precision Advanced SIMD vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "test-double.h"
#include "test-math-vector.h"
#include "test-vpcs-vector-wrapper.h"
#define VEC_SUFF _advsimd
#define VEC_LEN 2

View File

@ -0,0 +1,35 @@
/* Scalar wrappers for double-precision SVE vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <arm_sve.h>
#include "test-double-sve.h"
/* Wrapper from scalar to SVE function. Cannot just use VECTOR_WRAPPER due to
predication. */
#define SVE_VECTOR_WRAPPER(scalar_func, vector_func) \
extern VEC_TYPE vector_func (VEC_TYPE, svbool_t); \
FLOAT scalar_func (FLOAT x) \
{ \
VEC_TYPE mx = svdup_n_f64 (x); \
VEC_TYPE mr = vector_func (mx, svptrue_b64 ()); \
return svlastb_f64 (svptrue_b64 (), mr); \
}
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)

View File

@ -0,0 +1,26 @@
/* Test declarations for double-precision SVE vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "test-double.h"
#include "test-math-vector.h"
#define REQUIRE_SVE
#define VEC_SUFF _sve
#define VEC_LEN svcntd ()
#define VEC_TYPE svfloat64_t

View File

@ -0,0 +1,26 @@
/* Scalar wrappers for single-precision Advanced SIMD vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <arm_neon.h>
#include "test-float-advsimd.h"
#define VEC_TYPE float32x4_t
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)

View File

@ -0,0 +1,25 @@
/* Test declarations for singlex-precision Advanced SIMD vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "test-float.h"
#include "test-math-vector.h"
#include "test-vpcs-vector-wrapper.h"
#define VEC_SUFF _advsimd
#define VEC_LEN 4

View File

@ -0,0 +1,35 @@
/* Scalar wrappers for single-precision SVE vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <arm_sve.h>
#include "test-float-sve.h"
/* Wrapper from scalar to SVE function. Cannot just use VECTOR_WRAPPER due to
predication. */
#define SVE_VECTOR_WRAPPER(scalar_func, vector_func) \
extern VEC_TYPE vector_func (VEC_TYPE, svbool_t); \
FLOAT scalar_func (FLOAT x) \
{ \
VEC_TYPE mx = svdup_n_f32 (x); \
VEC_TYPE mr = vector_func (mx, svptrue_b32 ()); \
return svlastb_f32 (svptrue_b32 (), mr); \
}
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)

View File

@ -0,0 +1,26 @@
/* Test declarations for single-precision SVE vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include "test-float.h"
#include "test-math-vector.h"
#define REQUIRE_SVE
#define VEC_SUFF _sve
#define VEC_LEN svcntw ()
#define VEC_TYPE svfloat32_t

View File

@ -0,0 +1,31 @@
/* Scalar wrapper for vpcs-enabled Advanced SIMD vector math functions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#define VPCS_VECTOR_WRAPPER(scalar_func, vector_func) \
extern __attribute__ ((aarch64_vector_pcs)) \
VEC_TYPE vector_func (VEC_TYPE); \
FLOAT scalar_func (FLOAT x) \
{ \
int i; \
VEC_TYPE mx; \
INIT_VEC_LOOP (mx, x, VEC_LEN); \
VEC_TYPE mr = vector_func (mx); \
TEST_VEC_LOOP (mr, VEC_LEN); \
return ((FLOAT) mr[0]); \
}

View File

@ -641,11 +641,19 @@ double: 1
float: 1
ldouble: 2
Function: "cos_advsimd":
double: 1
float: 1
Function: "cos_downward":
double: 1
float: 1
ldouble: 3
Function: "cos_sve":
double: 1
float: 1
Function: "cos_towardzero":
double: 1
float: 1

View File

@ -0,0 +1,4 @@
GLIBC_2.38 _ZGVnN2v_cos F
GLIBC_2.38 _ZGVnN4v_cosf F
GLIBC_2.38 _ZGVsMxv_cos F
GLIBC_2.38 _ZGVsMxv_cosf F

View File

@ -94,7 +94,7 @@ endif
$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)
$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)
bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile
bench-libmvec-deps = $(..)benchtests/bench-libmvec-skeleton.c $(..)sysdeps/x86_64/fpu/bench-libmvec-arch.h bench-timing.h Makefile
$(objpfx)bench-float-%.c: $(bench-libmvec-deps)
{ if [ -n "$($*-INCLUDE)" ]; then \

View File

@ -0,0 +1,53 @@
/* Runtime architecture check for libmvec benchtests. x86_64 version.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <sys/platform/x86.h>
#define INIT_ARCH() \
do \
{ \
if (!supported ()) \
return 77; \
} \
while (0)
static bool
supported (void)
{
#if defined REQUIRE_AVX
if (!CPU_FEATURE_ACTIVE (AVX))
{
printf ("AVX not supported.");
return false;
}
#elif defined REQUIRE_AVX2
if (!CPU_FEATURE_ACTIVE (AVX2))
{
printf ("AVX2 not supported.");
return false;
}
#elif defined REQUIRE_AVX512F
if (!CPU_FEATURE_ACTIVE (AVX512F))
{
printf ("AVX512F not supported.");
return false;
}
#endif
return true;
}