mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 10:50:07 +00:00
Use -msse2avx option for x86-64 libm functions
This commit is contained in:
parent
73139a7628
commit
56f6f6a240
17
ChangeLog
17
ChangeLog
@ -1,5 +1,22 @@
|
||||
2012-01-28 Ulrich Drepper <drepper@gmail.com>
|
||||
|
||||
* config.h.in: Define HAVE_SSE2AVX_SUPPORT.
|
||||
* math/math_private.h: Remove libc_fegetround* and
|
||||
libc_fesetround*.
|
||||
* sysdeps/i386/configure.in: Check for -msse2avx.
|
||||
* sysdeps/x86_64/fpu/math_private.h: Use VEX-encoded instructions
|
||||
also if SSE2AVX is defined.
|
||||
Remove libc_fegetround* and libc_fesetround*.
|
||||
* sysdeps/x86_64/fpu/multiarch/Makefile: Compile *-avx functions
|
||||
if config-cflags-sse2avx is yes. Also add -DSSE2AVX to defines.
|
||||
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_AVX again instead
|
||||
of HAS_YMM_USABLE.
|
||||
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
|
||||
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.
|
||||
|
||||
* sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions.
|
||||
|
||||
2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
||||
|
@ -90,7 +90,7 @@
|
||||
certain registers (CR0, MQ, CTR, LR) in asm statements. */
|
||||
#undef BROKEN_PPC_ASM_CR0
|
||||
|
||||
/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden
|
||||
/* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden
|
||||
symbol. sysdeps/sparc/sparc32/elf/configure. */
|
||||
#undef BROKEN_SPARC_WDISP22
|
||||
|
||||
@ -106,17 +106,20 @@
|
||||
/* Define if gcc supports AVX. */
|
||||
#undef HAVE_AVX_SUPPORT
|
||||
|
||||
/* Define if gcc supports VEX encoding. */
|
||||
#undef HAVE_SSE2AVX_SUPPORT
|
||||
|
||||
/* Define if gcc supports FMA4. */
|
||||
#undef HAVE_FMA4_SUPPORT
|
||||
|
||||
/* Define if the compiler's exception support is based on libunwind. */
|
||||
/* Define if the compiler\'s exception support is based on libunwind. */
|
||||
#undef HAVE_CC_WITH_LIBUNWIND
|
||||
|
||||
/* Define if the access to static and hidden variables is position independent
|
||||
and does not need relocations. */
|
||||
#undef PI_STATIC_AND_HIDDEN
|
||||
|
||||
/* Define this to disable the `hidden_proto' et al macros in
|
||||
/* Define this to disable the 'hidden_proto' et al macros in
|
||||
include/libc-symbols.h that avoid PLT slots in the shared objects. */
|
||||
#undef NO_HIDDEN
|
||||
|
||||
|
@ -365,14 +365,6 @@ extern void __docos (double __x, double __dx, double __v[]);
|
||||
know what operations are going to be performed. Therefore we
|
||||
define additional interfaces. By default they refer to the normal
|
||||
interfaces. */
|
||||
#define libc_fegetround() fegetround ()
|
||||
#define libc_fegetroundf() fegetround ()
|
||||
#define libc_fegetroundl() fegetround ()
|
||||
|
||||
#define libc_fesetround(r) (void) fesetround (r)
|
||||
#define libc_fesetroundf(r) (void) fesetround (r)
|
||||
#define libc_fesetroundl(r) (void) fesetround (r)
|
||||
|
||||
#define libc_feholdexcept(e) (void) feholdexcept (e)
|
||||
#define libc_feholdexceptf(e) (void) feholdexcept (e)
|
||||
#define libc_feholdexceptl(e) (void) feholdexcept (e)
|
||||
|
23
sysdeps/i386/configure
vendored
23
sysdeps/i386/configure
vendored
@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then
|
||||
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
|
||||
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
|
||||
if ${libc_cv_cc_sse2avx+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
|
||||
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
(eval $ac_try) 2>&5
|
||||
ac_status=$?
|
||||
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
test $ac_status = 0; }; }; then
|
||||
libc_cv_cc_sse2avx=yes
|
||||
else
|
||||
libc_cv_cc_sse2avx=no
|
||||
fi
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
|
||||
$as_echo "$libc_cv_cc_sse2avx" >&6; }
|
||||
if test $libc_cv_cc_sse2avx = yes; then
|
||||
$as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
|
||||
$as_echo_n "checking for FMA4 support... " >&6; }
|
||||
if ${libc_cv_cc_fma4+:} false; then :
|
||||
|
@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
|
||||
AC_DEFINE(HAVE_AVX_SUPPORT)
|
||||
fi
|
||||
|
||||
dnl Check if -msse2avx works.
|
||||
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
|
||||
if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
|
||||
libc_cv_cc_sse2avx=yes
|
||||
else
|
||||
libc_cv_cc_sse2avx=no
|
||||
fi])
|
||||
if test $libc_cv_cc_sse2avx = yes; then
|
||||
AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
|
||||
fi
|
||||
|
||||
dnl Check if -mfma4 works.
|
||||
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
|
||||
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
/* We can do a few things better on x86-64. */
|
||||
|
||||
#ifdef __AVX__
|
||||
#if defined __AVX__ || defined SSE2AVX
|
||||
# define MOVD "vmovd"
|
||||
# define STMXCSR "vstmxcsr"
|
||||
# define LDMXCSR "vldmxcsr"
|
||||
@ -90,7 +90,7 @@
|
||||
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
|
||||
(__di & 0x7fffffff) < 0x7f800000; })
|
||||
|
||||
#ifdef __AVX__
|
||||
#if defined __AVX__ || defined SSE2AVX
|
||||
# define __ieee754_sqrt(d) \
|
||||
({ double __res; \
|
||||
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
||||
@ -116,7 +116,7 @@
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
# ifndef __rint
|
||||
# ifdef __AVX__
|
||||
# if defined __AVX__ || defined SSE2AVX
|
||||
# define __rint(d) \
|
||||
({ double __res; \
|
||||
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
||||
@ -129,7 +129,7 @@
|
||||
# endif
|
||||
# endif
|
||||
# ifndef __rintf
|
||||
# ifdef __AVX__
|
||||
# if defined __AVX__ || defined SSE2AVX
|
||||
# define __rintf(d) \
|
||||
({ float __res; \
|
||||
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
||||
@ -143,7 +143,7 @@
|
||||
# endif
|
||||
|
||||
# ifndef __floor
|
||||
# ifdef __AVX__
|
||||
# if defined __AVX__ || defined SSE2AVX
|
||||
# define __floor(d) \
|
||||
({ double __res; \
|
||||
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
||||
@ -156,7 +156,7 @@
|
||||
# endif
|
||||
# endif
|
||||
# ifndef __floorf
|
||||
# ifdef __AVX__
|
||||
# if defined __AVX__ || defined SSE2AVX
|
||||
# define __floorf(d) \
|
||||
({ float __res; \
|
||||
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
||||
@ -173,29 +173,6 @@
|
||||
|
||||
/* Specialized variants of the <fenv.h> interfaces which only handle
|
||||
either the FPU or the SSE unit. */
|
||||
#undef libc_fegetround
|
||||
#define libc_fegetround() \
|
||||
({ \
|
||||
unsigned int mxcsr; \
|
||||
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
||||
(mxcsr & 0x6000) >> 3; \
|
||||
})
|
||||
#undef libc_fegetroundf
|
||||
#define libc_fegetroundf() libc_fegetround ()
|
||||
// #define libc_fegetroundl() fegetround ()
|
||||
|
||||
#undef libc_fesetround
|
||||
#define libc_fesetround(r) \
|
||||
do { \
|
||||
unsigned int mxcsr; \
|
||||
asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
||||
mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
|
||||
asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
|
||||
} while (0)
|
||||
#undef libc_fesetroundf
|
||||
#define libc_fesetroundf(r) libc_fesetround (r)
|
||||
// #define libc_fesetroundl(r) (void) fesetround (r)
|
||||
|
||||
#undef libc_feholdexcept
|
||||
#define libc_feholdexcept(e) \
|
||||
do { \
|
||||
@ -224,7 +201,8 @@
|
||||
|
||||
#undef libc_fetestexcept
|
||||
#define libc_fetestexcept(e) \
|
||||
({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
||||
({ unsigned int mxcsr; \
|
||||
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
||||
mxcsr & (e) & FE_ALL_EXCEPT; })
|
||||
#undef libc_fetestexceptf
|
||||
#define libc_fetestexceptf(e) libc_fetestexcept (e)
|
||||
|
@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
|
||||
CFLAGS-s_tan-fma4.c = -mfma4
|
||||
endif
|
||||
|
||||
ifeq ($(config-cflags-avx),yes)
|
||||
ifeq ($(config-cflags-sse2avx),yes)
|
||||
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
|
||||
e_atan2-avx s_sin-avx s_tan-avx \
|
||||
mplog-avx mpa-avx slowexp-avx \
|
||||
mpexp-avx
|
||||
|
||||
CFLAGS-e_atan2-avx.c = -mavx
|
||||
CFLAGS-e_exp-avx.c = -mavx
|
||||
CFLAGS-e_log-avx.c = -mavx
|
||||
CFLAGS-mpa-avx.c = -mavx
|
||||
CFLAGS-mpexp-avx.c = -mavx
|
||||
CFLAGS-mplog-avx.c = -mavx
|
||||
CFLAGS-s_atan-avx.c = -mavx
|
||||
CFLAGS-s_sin-avx.c = -mavx
|
||||
CFLAGS-slowexp-avx.c = -mavx
|
||||
CFLAGS-s_tan-avx.c = -mavx
|
||||
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
|
||||
CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
|
||||
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
|
||||
endif
|
||||
endif
|
||||
|
@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
|
||||
|
||||
libm_ifunc (__ieee754_atan2,
|
||||
HAS_FMA4 ? __ieee754_atan2_fma4
|
||||
: (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
|
||||
: (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
|
||||
strong_alias (__ieee754_atan2, __atan2_finite)
|
||||
|
||||
# define __ieee754_atan2 __ieee754_atan2_sse2
|
||||
|
@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
|
||||
|
||||
libm_ifunc (__ieee754_exp,
|
||||
HAS_FMA4 ? __ieee754_exp_fma4
|
||||
: (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
|
||||
: (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
|
||||
strong_alias (__ieee754_exp, __exp_finite)
|
||||
|
||||
# define __ieee754_exp __ieee754_exp_sse2
|
||||
|
@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double);
|
||||
|
||||
libm_ifunc (__ieee754_log,
|
||||
HAS_FMA4 ? __ieee754_log_fma4
|
||||
: (HAS_YMM_USABLE ? __ieee754_log_avx
|
||||
: __ieee754_log_sse2));
|
||||
: (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
|
||||
strong_alias (__ieee754_log, __log_finite)
|
||||
|
||||
# define __ieee754_log __ieee754_log_sse2
|
||||
|
@ -13,7 +13,7 @@ extern double __atan_fma4 (double);
|
||||
# endif
|
||||
|
||||
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
|
||||
HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
|
||||
HAS_AVX ? __atan_avx : __atan_sse2));
|
||||
|
||||
# define atan __atan_sse2
|
||||
#endif
|
||||
|
@ -18,11 +18,11 @@ extern double __sin_fma4 (double);
|
||||
# endif
|
||||
|
||||
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
|
||||
HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
|
||||
HAS_AVX ? __cos_avx : __cos_sse2));
|
||||
weak_alias (__cos, cos)
|
||||
|
||||
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
|
||||
HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
|
||||
HAS_AVX ? __sin_avx : __sin_sse2));
|
||||
weak_alias (__sin, sin)
|
||||
|
||||
# define __cos __cos_sse2
|
||||
|
@ -13,7 +13,7 @@ extern double __tan_fma4 (double);
|
||||
# endif
|
||||
|
||||
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
|
||||
HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
|
||||
HAS_AVX ? __tan_avx : __tan_sse2));
|
||||
|
||||
# define tan __tan_sse2
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user