Use -msse2avx option for x86-64 libm functions

This commit is contained in:
Ulrich Drepper 2012-01-28 14:48:46 -05:00
parent 73139a7628
commit 56f6f6a240
13 changed files with 83 additions and 60 deletions

View File

@ -1,5 +1,22 @@
2012-01-28 Ulrich Drepper <drepper@gmail.com> 2012-01-28 Ulrich Drepper <drepper@gmail.com>
* config.h.in: Define HAVE_SSE2AVX_SUPPORT.
* math/math_private.h: Remove libc_fegetround* and
libc_fesetround*.
* sysdeps/i386/configure.in: Check for -msse2avx.
* sysdeps/x86_64/fpu/math_private.h: Use VEX-encoded instructions
also if SSE2AVX is defined.
Remove libc_fegetround* and libc_fesetround*.
* sysdeps/x86_64/fpu/multiarch/Makefile: Compile *-avx functions
if config-cflags-sse2avx is yes. Also add -DSSE2AVX to defines.
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_AVX again instead
of HAS_YMM_USABLE.
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.
* sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions. * sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions.
2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com> 2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com>

View File

@ -90,7 +90,7 @@
certain registers (CR0, MQ, CTR, LR) in asm statements. */ certain registers (CR0, MQ, CTR, LR) in asm statements. */
#undef BROKEN_PPC_ASM_CR0 #undef BROKEN_PPC_ASM_CR0
/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden /* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden
symbol. sysdeps/sparc/sparc32/elf/configure. */ symbol. sysdeps/sparc/sparc32/elf/configure. */
#undef BROKEN_SPARC_WDISP22 #undef BROKEN_SPARC_WDISP22
@ -106,17 +106,20 @@
/* Define if gcc supports AVX. */ /* Define if gcc supports AVX. */
#undef HAVE_AVX_SUPPORT #undef HAVE_AVX_SUPPORT
/* Define if gcc supports VEX encoding. */
#undef HAVE_SSE2AVX_SUPPORT
/* Define if gcc supports FMA4. */ /* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT #undef HAVE_FMA4_SUPPORT
/* Define if the compiler's exception support is based on libunwind. */ /* Define if the compiler\'s exception support is based on libunwind. */
#undef HAVE_CC_WITH_LIBUNWIND #undef HAVE_CC_WITH_LIBUNWIND
/* Define if the access to static and hidden variables is position independent /* Define if the access to static and hidden variables is position independent
and does not need relocations. */ and does not need relocations. */
#undef PI_STATIC_AND_HIDDEN #undef PI_STATIC_AND_HIDDEN
/* Define this to disable the `hidden_proto' et al macros in /* Define this to disable the 'hidden_proto' et al macros in
include/libc-symbols.h that avoid PLT slots in the shared objects. */ include/libc-symbols.h that avoid PLT slots in the shared objects. */
#undef NO_HIDDEN #undef NO_HIDDEN

View File

@ -365,14 +365,6 @@ extern void __docos (double __x, double __dx, double __v[]);
know what operations are going to be performed. Therefore we know what operations are going to be performed. Therefore we
define additional interfaces. By default they refer to the normal define additional interfaces. By default they refer to the normal
interfaces. */ interfaces. */
#define libc_fegetround() fegetround ()
#define libc_fegetroundf() fegetround ()
#define libc_fegetroundl() fegetround ()
#define libc_fesetround(r) (void) fesetround (r)
#define libc_fesetroundf(r) (void) fesetround (r)
#define libc_fesetroundl(r) (void) fesetround (r)
#define libc_feholdexcept(e) (void) feholdexcept (e) #define libc_feholdexcept(e) (void) feholdexcept (e)
#define libc_feholdexceptf(e) (void) feholdexcept (e) #define libc_feholdexceptf(e) (void) feholdexcept (e)
#define libc_feholdexceptl(e) (void) feholdexcept (e) #define libc_feholdexceptl(e) (void) feholdexcept (e)

View File

@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
if ${libc_cv_cc_sse2avx+:} false; then :
$as_echo_n "(cached) " >&6
else
if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then
libc_cv_cc_sse2avx=yes
else
libc_cv_cc_sse2avx=no
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
$as_echo "$libc_cv_cc_sse2avx" >&6; }
if test $libc_cv_cc_sse2avx = yes; then
$as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
$as_echo_n "checking for FMA4 support... " >&6; } $as_echo_n "checking for FMA4 support... " >&6; }
if ${libc_cv_cc_fma4+:} false; then : if ${libc_cv_cc_fma4+:} false; then :

View File

@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
AC_DEFINE(HAVE_AVX_SUPPORT) AC_DEFINE(HAVE_AVX_SUPPORT)
fi fi
dnl Check if -msse2avx works.
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
libc_cv_cc_sse2avx=yes
else
libc_cv_cc_sse2avx=no
fi])
if test $libc_cv_cc_sse2avx = yes; then
AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
fi
dnl Check if -mfma4 works. dnl Check if -mfma4 works.
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then

View File

@ -19,7 +19,7 @@
/* We can do a few things better on x86-64. */ /* We can do a few things better on x86-64. */
#ifdef __AVX__ #if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd" # define MOVD "vmovd"
# define STMXCSR "vstmxcsr" # define STMXCSR "vstmxcsr"
# define LDMXCSR "vldmxcsr" # define LDMXCSR "vldmxcsr"
@ -90,7 +90,7 @@
({ int __di; GET_FLOAT_WORD (__di, (float) d); \ ({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) < 0x7f800000; }) (__di & 0x7fffffff) < 0x7f800000; })
#ifdef __AVX__ #if defined __AVX__ || defined SSE2AVX
# define __ieee754_sqrt(d) \ # define __ieee754_sqrt(d) \
({ double __res; \ ({ double __res; \
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@ -116,7 +116,7 @@
#ifdef __SSE4_1__ #ifdef __SSE4_1__
# ifndef __rint # ifndef __rint
# ifdef __AVX__ # if defined __AVX__ || defined SSE2AVX
# define __rint(d) \ # define __rint(d) \
({ double __res; \ ({ double __res; \
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@ -129,7 +129,7 @@
# endif # endif
# endif # endif
# ifndef __rintf # ifndef __rintf
# ifdef __AVX__ # if defined __AVX__ || defined SSE2AVX
# define __rintf(d) \ # define __rintf(d) \
({ float __res; \ ({ float __res; \
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
@ -143,7 +143,7 @@
# endif # endif
# ifndef __floor # ifndef __floor
# ifdef __AVX__ # if defined __AVX__ || defined SSE2AVX
# define __floor(d) \ # define __floor(d) \
({ double __res; \ ({ double __res; \
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \ asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
@ -156,7 +156,7 @@
# endif # endif
# endif # endif
# ifndef __floorf # ifndef __floorf
# ifdef __AVX__ # if defined __AVX__ || defined SSE2AVX
# define __floorf(d) \ # define __floorf(d) \
({ float __res; \ ({ float __res; \
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \ asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
@ -173,29 +173,6 @@
/* Specialized variants of the <fenv.h> interfaces which only handle /* Specialized variants of the <fenv.h> interfaces which only handle
either the FPU or the SSE unit. */ either the FPU or the SSE unit. */
#undef libc_fegetround
#define libc_fegetround() \
({ \
unsigned int mxcsr; \
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
(mxcsr & 0x6000) >> 3; \
})
#undef libc_fegetroundf
#define libc_fegetroundf() libc_fegetround ()
// #define libc_fegetroundl() fegetround ()
#undef libc_fesetround
#define libc_fesetround(r) \
do { \
unsigned int mxcsr; \
asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
} while (0)
#undef libc_fesetroundf
#define libc_fesetroundf(r) libc_fesetround (r)
// #define libc_fesetroundl(r) (void) fesetround (r)
#undef libc_feholdexcept #undef libc_feholdexcept
#define libc_feholdexcept(e) \ #define libc_feholdexcept(e) \
do { \ do { \
@ -224,7 +201,8 @@
#undef libc_fetestexcept #undef libc_fetestexcept
#define libc_fetestexcept(e) \ #define libc_fetestexcept(e) \
({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \ ({ unsigned int mxcsr; \
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
mxcsr & (e) & FE_ALL_EXCEPT; }) mxcsr & (e) & FE_ALL_EXCEPT; })
#undef libc_fetestexceptf #undef libc_fetestexceptf
#define libc_fetestexceptf(e) libc_fetestexcept (e) #define libc_fetestexceptf(e) libc_fetestexcept (e)

View File

@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
CFLAGS-s_tan-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4
endif endif
ifeq ($(config-cflags-avx),yes) ifeq ($(config-cflags-sse2avx),yes)
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
e_atan2-avx s_sin-avx s_tan-avx \ e_atan2-avx s_sin-avx s_tan-avx \
mplog-avx mpa-avx slowexp-avx \ mplog-avx mpa-avx slowexp-avx \
mpexp-avx mpexp-avx
CFLAGS-e_atan2-avx.c = -mavx CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_exp-avx.c = -mavx CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-e_log-avx.c = -mavx CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpa-avx.c = -mavx CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mpexp-avx.c = -mavx CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
CFLAGS-mplog-avx.c = -mavx CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_atan-avx.c = -mavx CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
CFLAGS-s_sin-avx.c = -mavx CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
CFLAGS-slowexp-avx.c = -mavx CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
CFLAGS-s_tan-avx.c = -mavx CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
endif endif
endif endif

View File

@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
libm_ifunc (__ieee754_atan2, libm_ifunc (__ieee754_atan2,
HAS_FMA4 ? __ieee754_atan2_fma4 HAS_FMA4 ? __ieee754_atan2_fma4
: (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
strong_alias (__ieee754_atan2, __atan2_finite) strong_alias (__ieee754_atan2, __atan2_finite)
# define __ieee754_atan2 __ieee754_atan2_sse2 # define __ieee754_atan2 __ieee754_atan2_sse2

View File

@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
libm_ifunc (__ieee754_exp, libm_ifunc (__ieee754_exp,
HAS_FMA4 ? __ieee754_exp_fma4 HAS_FMA4 ? __ieee754_exp_fma4
: (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
strong_alias (__ieee754_exp, __exp_finite) strong_alias (__ieee754_exp, __exp_finite)
# define __ieee754_exp __ieee754_exp_sse2 # define __ieee754_exp __ieee754_exp_sse2

View File

@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double);
libm_ifunc (__ieee754_log, libm_ifunc (__ieee754_log,
HAS_FMA4 ? __ieee754_log_fma4 HAS_FMA4 ? __ieee754_log_fma4
: (HAS_YMM_USABLE ? __ieee754_log_avx : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
: __ieee754_log_sse2));
strong_alias (__ieee754_log, __log_finite) strong_alias (__ieee754_log, __log_finite)
# define __ieee754_log __ieee754_log_sse2 # define __ieee754_log __ieee754_log_sse2

View File

@ -13,7 +13,7 @@ extern double __atan_fma4 (double);
# endif # endif
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); HAS_AVX ? __atan_avx : __atan_sse2));
# define atan __atan_sse2 # define atan __atan_sse2
#endif #endif

View File

@ -18,11 +18,11 @@ extern double __sin_fma4 (double);
# endif # endif
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); HAS_AVX ? __cos_avx : __cos_sse2));
weak_alias (__cos, cos) weak_alias (__cos, cos)
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); HAS_AVX ? __sin_avx : __sin_sse2));
weak_alias (__sin, sin) weak_alias (__sin, sin)
# define __cos __cos_sse2 # define __cos __cos_sse2

View File

@ -13,7 +13,7 @@ extern double __tan_fma4 (double);
# endif # endif
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); HAS_AVX ? __tan_avx : __tan_sse2));
# define tan __tan_sse2 # define tan __tan_sse2
#endif #endif