mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-02 01:40:07 +00:00
cd94326a13
This patch enables libmvec on AArch64. The proposed change is mainly implementing build infrastructure to add the new routines to ABI, tests and benchmarks. I have demonstrated how this all fits together by adding implementations for vector cos, in both single and double precision, targeting both Advanced SIMD and SVE. The implementations of the routines themselves are just loops over the scalar routine from libm for now, as we are more concerned with getting the plumbing right at this point. We plan to contribute vector routines from the Arm Optimized Routines repo that are compliant with requirements described in the libmvec wiki. Building libmvec requires minimum GCC 10 for SVE ACLE. To avoid raising the minimum GCC by such a big jump, we allow users to disable libmvec if their compiler is too old. Note that at this point users have to manually call the vector math functions. This seems to be acceptable to some downstream users. Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
103 lines
3.3 KiB
Python
Executable File
103 lines
3.3 KiB
Python
Executable File
#!/usr/bin/python3
|
|
# Copyright (C) 2023 Free Software Foundation, Inc.
|
|
# This file is part of the GNU C Library.
|
|
#
|
|
# The GNU C Library is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU Lesser General Public
|
|
# License as published by the Free Software Foundation; either
|
|
# version 2.1 of the License, or (at your option) any later version.
|
|
#
|
|
# The GNU C Library is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# License along with the GNU C Library; if not, see
|
|
# <https://www.gnu.org/licenses/>.
|
|
|
|
import sys
|
|
|
|
TEMPLATE = """
|
|
#include <math.h>
|
|
#include <arm_sve.h>
|
|
|
|
#define MAX_STRIDE {max_stride}
|
|
#define STRIDE {stride}
|
|
#define PTRUE svptrue_b{prec_short}
|
|
#define SV_LOAD svld1_f{prec_short}
|
|
#define SV_STORE svst1_f{prec_short}
|
|
#define REQUIRE_SVE
|
|
|
|
#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\
|
|
{rtype} mx0 = {fname}(SV_LOAD (PTRUE(), variants[v].in[i].arg0), PTRUE()); \\
|
|
mx0; }}))
|
|
|
|
struct args
|
|
{{
|
|
{stype} arg0[MAX_STRIDE];
|
|
double timing;
|
|
}};
|
|
|
|
struct _variants
|
|
{{
|
|
const char *name;
|
|
int count;
|
|
const struct args *in;
|
|
}};
|
|
|
|
static const struct args in0[{rowcount}] = {{
|
|
{in_data}
|
|
}};
|
|
|
|
static const struct _variants variants[1] = {{
|
|
{{"", {rowcount}, in0}},
|
|
}};
|
|
|
|
#define NUM_VARIANTS 1
|
|
#define NUM_SAMPLES(i) (variants[i].count)
|
|
#define VARIANT(i) (variants[i].name)
|
|
|
|
// Cannot pass volatile pointer to svst1. This still does not appear to get optimised out.
|
|
static {stype} /*volatile*/ ret[MAX_STRIDE];
|
|
|
|
#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC(i, j)); }})
|
|
#define FUNCNAME "{fname}"
|
|
#include <bench-libmvec-skeleton.c>
|
|
"""
|
|
|
|
def main(name):
|
|
_, prec, _, func = name.split("-")
|
|
scalar_to_sve_type = {"double": "svfloat64_t", "float": "svfloat32_t"}
|
|
|
|
stride = {"double": "svcntd()", "float": "svcntw()"}[prec]
|
|
rtype = scalar_to_sve_type[prec]
|
|
atype = scalar_to_sve_type[prec]
|
|
fname = f"_ZGVsMxv_{func}{'f' if prec == 'float' else ''}"
|
|
prec_short = {"double": 64, "float": 32}[prec]
|
|
# Max SVE vector length is 2048 bits. To ensure benchmarks are
|
|
# vector-length-agnostic, but still use as wide vectors as
|
|
# possible on any given target, divide input data into 2048-bit
|
|
# rows, then load/store as many elements as the target will allow.
|
|
max_stride = 2048 // prec_short
|
|
|
|
with open(f"../benchtests/libmvec/{func}-inputs") as f:
|
|
in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
|
|
in_vals = [in_vals[i:i+max_stride] for i in range(0, len(in_vals), max_stride)]
|
|
rowcount= len(in_vals)
|
|
in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
|
|
|
|
print(TEMPLATE.format(stride=stride,
|
|
rtype=rtype,
|
|
atype=atype,
|
|
fname=fname,
|
|
prec_short=prec_short,
|
|
in_data=in_data,
|
|
rowcount=rowcount,
|
|
stype=prec,
|
|
max_stride=max_stride))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1])
|