mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-30 00:31:08 +00:00
a43c0b5483
Add python script to generate libmvec microbenchmark from the input values for each libmvec function using skeleton benchmark template. Creates double and float benchmarks with vector length 1, 2, 4, 8, and 16 for each libmvec function. Vector length 1 corresponds to scalar version of function and is included for vector function perf comparison. Co-authored-by: Haochen Jiang <haochen.jiang@intel.com> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
115 lines
3.4 KiB
Makefile
115 lines
3.4 KiB
Makefile
ifeq ($(subdir),mathvec)
|
|
libmvec-double-func-list = \
|
|
2_core \
|
|
4_core \
|
|
4_core_avx \
|
|
8_core
|
|
libmvec-float-func-list = \
|
|
f4_core \
|
|
f8_core \
|
|
f8_core_avx \
|
|
f16_core
|
|
libmvec-support += \
|
|
svml_d_exp_data \
|
|
svml_d_log_data \
|
|
svml_d_pow_data \
|
|
svml_d_trig_data \
|
|
svml_s_expf_data \
|
|
svml_s_logf_data \
|
|
svml_s_powf_data \
|
|
svml_s_trig_data \
|
|
$(foreach l,$(libmvec-double-func-list), \
|
|
$(addprefix svml_d_,$(addsuffix $(l),$(libmvec-funcs)))) \
|
|
$(foreach l,$(libmvec-float-func-list), \
|
|
$(addprefix svml_s_,$(addsuffix $(l),$(libmvec-funcs))))
|
|
endif
|
|
|
|
# Variables for libmvec tests.
|
|
ifeq ($(subdir)$(build-mathvec),mathyes)
|
|
libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
|
|
float-vlen4 float-vlen8 float-vlen8-avx2 \
|
|
double-vlen8 float-vlen16
|
|
tests += \
|
|
$(libmvec-abi-func-tests) \
|
|
$(libmvec-abi-func-avx-tests) \
|
|
$(libmvec-abi-func-avx2-tests) \
|
|
$(libmvec-abi-func-avx512f-tests)
|
|
|
|
double-vlen2-funcs = $(libmvec-funcs)
|
|
double-vlen4-funcs = $(libmvec-funcs)
|
|
double-vlen4-avx2-funcs = $(libmvec-funcs)
|
|
double-vlen8-funcs = $(libmvec-funcs)
|
|
float-vlen4-funcs = $(libmvec-funcs)
|
|
float-vlen8-funcs = $(libmvec-funcs)
|
|
float-vlen8-avx2-funcs = $(libmvec-funcs)
|
|
float-vlen16-funcs = $(libmvec-funcs)
|
|
|
|
double-vlen4-arch-ext-cflags = -mavx
|
|
double-vlen4-arch-ext2-cflags = -mavx2
|
|
double-vlen8-arch-ext-cflags = -mavx512f
|
|
|
|
float-vlen8-arch-ext-cflags = -mavx
|
|
float-vlen8-arch-ext2-cflags = -mavx2
|
|
float-vlen16-arch-ext-cflags = -mavx512f
|
|
|
|
libmvec-abi-test-cflags = \
|
|
$(libm-test-fast-math-cflags) \
|
|
-fno-inline -fopenmp -Wno-unknown-pragmas
|
|
|
|
CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)
|
|
|
|
CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)
|
|
endif
|
|
|
|
ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
|
|
# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops
|
|
# in branred.c with 256-bit vector instructions, which leads to store
|
|
# forward stall:
|
|
#
|
|
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579
|
|
#
|
|
# Limit vector width to 128 bits to work around this issue. It improves
|
|
# performance of sin and cos by more than 40% on Skylake.
|
|
CFLAGS-branred.c = -mprefer-vector-width=128
|
|
endif
|
|
|
|
ifeq ($(subdir),benchtests)
|
|
double-vlen4-arch-ext-cflags = -mavx
|
|
double-vlen4-arch-ext2-cflags = -mavx2
|
|
double-vlen8-arch-ext-cflags = -mavx512f
|
|
|
|
float-vlen8-arch-ext-cflags = -mavx
|
|
float-vlen8-arch-ext2-cflags = -mavx2
|
|
float-vlen16-arch-ext-cflags = -mavx512f
|
|
|
|
bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float)
|
|
|
|
ifeq (${BENCHSET},)
|
|
bench += $(bench-libmvec)
|
|
endif
|
|
|
|
ifeq (${STATIC-BENCHTESTS},yes)
|
|
libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a
|
|
else
|
|
libmvec-benchtests = $(libmvec) $(libm)
|
|
endif
|
|
|
|
$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)
|
|
$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)
|
|
bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile
|
|
|
|
$(objpfx)bench-float-%.c: $(bench-libmvec-deps)
|
|
{ if [ -n "$($*-INCLUDE)" ]; then \
|
|
cat $($*-INCLUDE); \
|
|
fi; \
|
|
$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
|
|
mv -f $@-tmp $@
|
|
|
|
$(objpfx)bench-double-%.c: $(bench-libmvec-deps)
|
|
{ if [ -n "$($*-INCLUDE)" ]; then \
|
|
cat $($*-INCLUDE); \
|
|
fi; \
|
|
$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
|
|
mv -f $@-tmp $@
|
|
endif
|