glibc/sysdeps/x86_64/fpu/multiarch/Makefile
H.J. Lu 1b214630ce x86_64: Add expm1 with FMA
On Skylake, it improves expm1 bench performance by:

        Before       After     Improvement
max     70.204       68.054       3%
min     20.709       16.2         22%
mean    22.1221      16.7367      24%

NB: Add

extern long double __expm1l (long double);
extern long double __expm1f128 (long double);

for __typeof (__expm1l) and __typeof (__expm1f128) when __expm1 is
defined since __expm1 may be expanded in their declarations which
causes the build failure.
2023-08-14 08:14:19 -07:00

147 lines
3.1 KiB
Makefile

ifeq ($(subdir),math)
libm-sysdep_routines += \
s_ceil-c \
s_ceilf-c \
s_floor-c \
s_floorf-c \
s_rint-c \
s_rintf-c \
s_nearbyint-c \
s_nearbyintf-c \
s_roundeven-c \
s_roundevenf-c \
s_trunc-c \
s_truncf-c \
# libm-sysdep_routines
libm-sysdep_routines += \
s_ceil-sse4_1 \
s_ceilf-sse4_1 \
s_floor-sse4_1 \
s_floorf-sse4_1 \
s_nearbyint-sse4_1 \
s_nearbyintf-sse4_1 \
s_roundeven-sse4_1 \
s_roundevenf-sse4_1 \
s_rint-sse4_1 \
s_rintf-sse4_1 \
s_trunc-sse4_1 \
s_truncf-sse4_1 \
# libm-sysdep_routines
libm-sysdep_routines += \
e_asin-fma \
e_atan2-fma \
e_exp-fma \
e_log-fma \
e_log2-fma \
e_pow-fma \
s_atan-fma \
s_expm1-fma \
s_sin-fma \
s_sincos-fma \
s_tan-fma \
# libm-sysdep_routines
CFLAGS-e_asin-fma.c = -mfma -mavx2
CFLAGS-e_atan2-fma.c = -mfma -mavx2
CFLAGS-e_exp-fma.c = -mfma -mavx2
CFLAGS-e_log-fma.c = -mfma -mavx2
CFLAGS-e_log2-fma.c = -mfma -mavx2
CFLAGS-e_pow-fma.c = -mfma -mavx2
CFLAGS-s_atan-fma.c = -mfma -mavx2
CFLAGS-s_expm1-fma.c = -mfma -mavx2
CFLAGS-s_sin-fma.c = -mfma -mavx2
CFLAGS-s_tan-fma.c = -mfma -mavx2
CFLAGS-s_sincos-fma.c = -mfma -mavx2
libm-sysdep_routines += \
s_cosf-sse2 \
s_sincosf-sse2 \
s_sinf-sse2 \
# libm-sysdep_routines
libm-sysdep_routines += \
e_exp2f-fma \
e_expf-fma \
e_log2f-fma \
e_logf-fma \
e_powf-fma \
s_cosf-fma \
s_sincosf-fma \
s_sinf-fma \
# libm-sysdep_routines
CFLAGS-e_exp2f-fma.c = -mfma -mavx2
CFLAGS-e_expf-fma.c = -mfma -mavx2
CFLAGS-e_log2f-fma.c = -mfma -mavx2
CFLAGS-e_logf-fma.c = -mfma -mavx2
CFLAGS-e_powf-fma.c = -mfma -mavx2
CFLAGS-s_sinf-fma.c = -mfma -mavx2
CFLAGS-s_cosf-fma.c = -mfma -mavx2
CFLAGS-s_sincosf-fma.c = -mfma -mavx2
libm-sysdep_routines += \
e_exp-fma4 \
e_log-fma4 \
e_pow-fma4 \
e_asin-fma4 \
s_atan-fma4 \
e_atan2-fma4 \
s_sin-fma4 \
s_sincos-fma4 \
s_tan-fma4 \
# libm-sysdep_routines
CFLAGS-e_asin-fma4.c = -mfma4
CFLAGS-e_atan2-fma4.c = -mfma4
CFLAGS-e_exp-fma4.c = -mfma4
CFLAGS-e_log-fma4.c = -mfma4
CFLAGS-e_pow-fma4.c = -mfma4
CFLAGS-s_atan-fma4.c = -mfma4
CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4
CFLAGS-s_sincos-fma4.c = -mfma4
libm-sysdep_routines += \
e_exp-avx \
e_log-avx \
s_atan-avx \
e_atan2-avx \
s_sin-avx \
s_sincos-avx \
s_tan-avx \
# libm-sysdep_routines
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sincos-avx.c = -msse2avx -DSSE2AVX
endif
ifeq ($(subdir),mathvec)
libmvec-multiarch-double-func-list = \
2_core-sse2 \
2_core_sse4 \
4_core-sse \
4_core_avx2 \
8_core-avx2 \
8_core_avx512
libmvec-multiarch-float-func-list = \
f4_core-sse2 \
f4_core_sse4 \
f8_core-sse \
f8_core_avx2 \
f16_core-avx2 \
f16_core_avx512
libmvec-sysdep_routines += \
$(foreach l,$(libmvec-multiarch-double-func-list), \
$(addprefix svml_d_,$(addsuffix $(l),$(libmvec-funcs)))) \
$(foreach l,$(libmvec-multiarch-float-func-list), \
$(addprefix svml_s_,$(addsuffix $(l),$(libmvec-funcs))))
endif