mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 19:00:07 +00:00
Use -msse2avx option for x86-64 libm functions
This commit is contained in:
parent
73139a7628
commit
56f6f6a240
17
ChangeLog
17
ChangeLog
@ -1,5 +1,22 @@
|
|||||||
2012-01-28 Ulrich Drepper <drepper@gmail.com>
|
2012-01-28 Ulrich Drepper <drepper@gmail.com>
|
||||||
|
|
||||||
|
* config.h.in: Define HAVE_SSE2AVX_SUPPORT.
|
||||||
|
* math/math_private.h: Remove libc_fegetround* and
|
||||||
|
libc_fesetround*.
|
||||||
|
* sysdeps/i386/configure.in: Check for -msse2avx.
|
||||||
|
* sysdeps/x86_64/fpu/math_private.h: Use VEX-encoded instructions
|
||||||
|
also if SSE2AVX is defined.
|
||||||
|
Remove libc_fegetround* and libc_fesetround*.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/Makefile: Compile *-avx functions
|
||||||
|
if config-cflags-sse2avx is yes. Also add -DSSE2AVX to defines.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_AVX again instead
|
||||||
|
of HAS_YMM_USABLE.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
|
||||||
|
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.
|
||||||
|
|
||||||
* sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions.
|
* sysdeps/x86_64/fpu/math_private.h: Simplify use of AVX instructions.
|
||||||
|
|
||||||
2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
2012-01-19 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
|
||||||
|
@ -90,7 +90,7 @@
|
|||||||
certain registers (CR0, MQ, CTR, LR) in asm statements. */
|
certain registers (CR0, MQ, CTR, LR) in asm statements. */
|
||||||
#undef BROKEN_PPC_ASM_CR0
|
#undef BROKEN_PPC_ASM_CR0
|
||||||
|
|
||||||
/* Defined on SPARC if ld doesn't handle R_SPARC_WDISP22 against .hidden
|
/* Defined on SPARC if ld does not handle R_SPARC_WDISP22 against .hidden
|
||||||
symbol. sysdeps/sparc/sparc32/elf/configure. */
|
symbol. sysdeps/sparc/sparc32/elf/configure. */
|
||||||
#undef BROKEN_SPARC_WDISP22
|
#undef BROKEN_SPARC_WDISP22
|
||||||
|
|
||||||
@ -106,17 +106,20 @@
|
|||||||
/* Define if gcc supports AVX. */
|
/* Define if gcc supports AVX. */
|
||||||
#undef HAVE_AVX_SUPPORT
|
#undef HAVE_AVX_SUPPORT
|
||||||
|
|
||||||
|
/* Define if gcc supports VEX encoding. */
|
||||||
|
#undef HAVE_SSE2AVX_SUPPORT
|
||||||
|
|
||||||
/* Define if gcc supports FMA4. */
|
/* Define if gcc supports FMA4. */
|
||||||
#undef HAVE_FMA4_SUPPORT
|
#undef HAVE_FMA4_SUPPORT
|
||||||
|
|
||||||
/* Define if the compiler's exception support is based on libunwind. */
|
/* Define if the compiler\'s exception support is based on libunwind. */
|
||||||
#undef HAVE_CC_WITH_LIBUNWIND
|
#undef HAVE_CC_WITH_LIBUNWIND
|
||||||
|
|
||||||
/* Define if the access to static and hidden variables is position independent
|
/* Define if the access to static and hidden variables is position independent
|
||||||
and does not need relocations. */
|
and does not need relocations. */
|
||||||
#undef PI_STATIC_AND_HIDDEN
|
#undef PI_STATIC_AND_HIDDEN
|
||||||
|
|
||||||
/* Define this to disable the `hidden_proto' et al macros in
|
/* Define this to disable the 'hidden_proto' et al macros in
|
||||||
include/libc-symbols.h that avoid PLT slots in the shared objects. */
|
include/libc-symbols.h that avoid PLT slots in the shared objects. */
|
||||||
#undef NO_HIDDEN
|
#undef NO_HIDDEN
|
||||||
|
|
||||||
|
@ -365,14 +365,6 @@ extern void __docos (double __x, double __dx, double __v[]);
|
|||||||
know what operations are going to be performed. Therefore we
|
know what operations are going to be performed. Therefore we
|
||||||
define additional interfaces. By default they refer to the normal
|
define additional interfaces. By default they refer to the normal
|
||||||
interfaces. */
|
interfaces. */
|
||||||
#define libc_fegetround() fegetround ()
|
|
||||||
#define libc_fegetroundf() fegetround ()
|
|
||||||
#define libc_fegetroundl() fegetround ()
|
|
||||||
|
|
||||||
#define libc_fesetround(r) (void) fesetround (r)
|
|
||||||
#define libc_fesetroundf(r) (void) fesetround (r)
|
|
||||||
#define libc_fesetroundl(r) (void) fesetround (r)
|
|
||||||
|
|
||||||
#define libc_feholdexcept(e) (void) feholdexcept (e)
|
#define libc_feholdexcept(e) (void) feholdexcept (e)
|
||||||
#define libc_feholdexceptf(e) (void) feholdexcept (e)
|
#define libc_feholdexceptf(e) (void) feholdexcept (e)
|
||||||
#define libc_feholdexceptl(e) (void) feholdexcept (e)
|
#define libc_feholdexceptl(e) (void) feholdexcept (e)
|
||||||
|
23
sysdeps/i386/configure
vendored
23
sysdeps/i386/configure
vendored
@ -756,6 +756,29 @@ if test $libc_cv_cc_avx = yes; then
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX encoding of SSE instructions" >&5
|
||||||
|
$as_echo_n "checking for AVX encoding of SSE instructions... " >&6; }
|
||||||
|
if ${libc_cv_cc_sse2avx+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
if { ac_try='${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null'
|
||||||
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
(eval $ac_try) 2>&5
|
||||||
|
ac_status=$?
|
||||||
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
test $ac_status = 0; }; }; then
|
||||||
|
libc_cv_cc_sse2avx=yes
|
||||||
|
else
|
||||||
|
libc_cv_cc_sse2avx=no
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_sse2avx" >&5
|
||||||
|
$as_echo "$libc_cv_cc_sse2avx" >&6; }
|
||||||
|
if test $libc_cv_cc_sse2avx = yes; then
|
||||||
|
$as_echo "#define HAVE_SSE2AVX_SUPPORT 1" >>confdefs.h
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for FMA4 support" >&5
|
||||||
$as_echo_n "checking for FMA4 support... " >&6; }
|
$as_echo_n "checking for FMA4 support... " >&6; }
|
||||||
if ${libc_cv_cc_fma4+:} false; then :
|
if ${libc_cv_cc_fma4+:} false; then :
|
||||||
|
@ -67,6 +67,17 @@ if test $libc_cv_cc_avx = yes; then
|
|||||||
AC_DEFINE(HAVE_AVX_SUPPORT)
|
AC_DEFINE(HAVE_AVX_SUPPORT)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
dnl Check if -msse2avx works.
|
||||||
|
AC_CACHE_CHECK(for AVX encoding of SSE instructions, libc_cv_cc_sse2avx, [dnl
|
||||||
|
if AC_TRY_COMMAND([${CC-cc} -msse2avx -xc /dev/null -S -o /dev/null]); then
|
||||||
|
libc_cv_cc_sse2avx=yes
|
||||||
|
else
|
||||||
|
libc_cv_cc_sse2avx=no
|
||||||
|
fi])
|
||||||
|
if test $libc_cv_cc_sse2avx = yes; then
|
||||||
|
AC_DEFINE(HAVE_SSE2AVX_SUPPORT)
|
||||||
|
fi
|
||||||
|
|
||||||
dnl Check if -mfma4 works.
|
dnl Check if -mfma4 works.
|
||||||
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
|
AC_CACHE_CHECK(for FMA4 support, libc_cv_cc_fma4, [dnl
|
||||||
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
|
if AC_TRY_COMMAND([${CC-cc} -mfma4 -xc /dev/null -S -o /dev/null]); then
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
|
|
||||||
/* We can do a few things better on x86-64. */
|
/* We can do a few things better on x86-64. */
|
||||||
|
|
||||||
#ifdef __AVX__
|
#if defined __AVX__ || defined SSE2AVX
|
||||||
# define MOVD "vmovd"
|
# define MOVD "vmovd"
|
||||||
# define STMXCSR "vstmxcsr"
|
# define STMXCSR "vstmxcsr"
|
||||||
# define LDMXCSR "vldmxcsr"
|
# define LDMXCSR "vldmxcsr"
|
||||||
@ -90,7 +90,7 @@
|
|||||||
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
|
({ int __di; GET_FLOAT_WORD (__di, (float) d); \
|
||||||
(__di & 0x7fffffff) < 0x7f800000; })
|
(__di & 0x7fffffff) < 0x7f800000; })
|
||||||
|
|
||||||
#ifdef __AVX__
|
#if defined __AVX__ || defined SSE2AVX
|
||||||
# define __ieee754_sqrt(d) \
|
# define __ieee754_sqrt(d) \
|
||||||
({ double __res; \
|
({ double __res; \
|
||||||
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
asm ("vsqrtsd %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
||||||
@ -116,7 +116,7 @@
|
|||||||
|
|
||||||
#ifdef __SSE4_1__
|
#ifdef __SSE4_1__
|
||||||
# ifndef __rint
|
# ifndef __rint
|
||||||
# ifdef __AVX__
|
# if defined __AVX__ || defined SSE2AVX
|
||||||
# define __rint(d) \
|
# define __rint(d) \
|
||||||
({ double __res; \
|
({ double __res; \
|
||||||
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
asm ("vroundsd $4, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
||||||
@ -129,7 +129,7 @@
|
|||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
# ifndef __rintf
|
# ifndef __rintf
|
||||||
# ifdef __AVX__
|
# if defined __AVX__ || defined SSE2AVX
|
||||||
# define __rintf(d) \
|
# define __rintf(d) \
|
||||||
({ float __res; \
|
({ float __res; \
|
||||||
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
asm ("vroundss $4, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
||||||
@ -143,7 +143,7 @@
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
# ifndef __floor
|
# ifndef __floor
|
||||||
# ifdef __AVX__
|
# if defined __AVX__ || defined SSE2AVX
|
||||||
# define __floor(d) \
|
# define __floor(d) \
|
||||||
({ double __res; \
|
({ double __res; \
|
||||||
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
asm ("vroundsd $1, %1, %0, %0" : "=x" (__res) : "xm" ((double) (d))); \
|
||||||
@ -156,7 +156,7 @@
|
|||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
# ifndef __floorf
|
# ifndef __floorf
|
||||||
# ifdef __AVX__
|
# if defined __AVX__ || defined SSE2AVX
|
||||||
# define __floorf(d) \
|
# define __floorf(d) \
|
||||||
({ float __res; \
|
({ float __res; \
|
||||||
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
asm ("vroundss $1, %1, %0, %0" : "=x" (__res) : "xm" ((float) (d))); \
|
||||||
@ -173,29 +173,6 @@
|
|||||||
|
|
||||||
/* Specialized variants of the <fenv.h> interfaces which only handle
|
/* Specialized variants of the <fenv.h> interfaces which only handle
|
||||||
either the FPU or the SSE unit. */
|
either the FPU or the SSE unit. */
|
||||||
#undef libc_fegetround
|
|
||||||
#define libc_fegetround() \
|
|
||||||
({ \
|
|
||||||
unsigned int mxcsr; \
|
|
||||||
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
|
||||||
(mxcsr & 0x6000) >> 3; \
|
|
||||||
})
|
|
||||||
#undef libc_fegetroundf
|
|
||||||
#define libc_fegetroundf() libc_fegetround ()
|
|
||||||
// #define libc_fegetroundl() fegetround ()
|
|
||||||
|
|
||||||
#undef libc_fesetround
|
|
||||||
#define libc_fesetround(r) \
|
|
||||||
do { \
|
|
||||||
unsigned int mxcsr; \
|
|
||||||
asm (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
|
||||||
mxcsr = (mxcsr & ~0x6000) | ((r) << 3); \
|
|
||||||
asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \
|
|
||||||
} while (0)
|
|
||||||
#undef libc_fesetroundf
|
|
||||||
#define libc_fesetroundf(r) libc_fesetround (r)
|
|
||||||
// #define libc_fesetroundl(r) (void) fesetround (r)
|
|
||||||
|
|
||||||
#undef libc_feholdexcept
|
#undef libc_feholdexcept
|
||||||
#define libc_feholdexcept(e) \
|
#define libc_feholdexcept(e) \
|
||||||
do { \
|
do { \
|
||||||
@ -224,7 +201,8 @@
|
|||||||
|
|
||||||
#undef libc_fetestexcept
|
#undef libc_fetestexcept
|
||||||
#define libc_fetestexcept(e) \
|
#define libc_fetestexcept(e) \
|
||||||
({ unsigned int mxcsr; asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
({ unsigned int mxcsr; \
|
||||||
|
asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \
|
||||||
mxcsr & (e) & FE_ALL_EXCEPT; })
|
mxcsr & (e) & FE_ALL_EXCEPT; })
|
||||||
#undef libc_fetestexceptf
|
#undef libc_fetestexceptf
|
||||||
#define libc_fetestexceptf(e) libc_fetestexcept (e)
|
#define libc_fetestexceptf(e) libc_fetestexcept (e)
|
||||||
|
@ -34,21 +34,21 @@ CFLAGS-s_sin-fma4.c = -mfma4
|
|||||||
CFLAGS-s_tan-fma4.c = -mfma4
|
CFLAGS-s_tan-fma4.c = -mfma4
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(config-cflags-avx),yes)
|
ifeq ($(config-cflags-sse2avx),yes)
|
||||||
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
|
libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
|
||||||
e_atan2-avx s_sin-avx s_tan-avx \
|
e_atan2-avx s_sin-avx s_tan-avx \
|
||||||
mplog-avx mpa-avx slowexp-avx \
|
mplog-avx mpa-avx slowexp-avx \
|
||||||
mpexp-avx
|
mpexp-avx
|
||||||
|
|
||||||
CFLAGS-e_atan2-avx.c = -mavx
|
CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-e_exp-avx.c = -mavx
|
CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-e_log-avx.c = -mavx
|
CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-mpa-avx.c = -mavx
|
CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-mpexp-avx.c = -mavx
|
CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-mplog-avx.c = -mavx
|
CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-s_atan-avx.c = -mavx
|
CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-s_sin-avx.c = -mavx
|
CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
|
||||||
CFLAGS-slowexp-avx.c = -mavx
|
CFLAGS-slowexp-avx.c = -sse2mavx -DSSE2AVX
|
||||||
CFLAGS-s_tan-avx.c = -mavx
|
CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
|
|||||||
|
|
||||||
libm_ifunc (__ieee754_atan2,
|
libm_ifunc (__ieee754_atan2,
|
||||||
HAS_FMA4 ? __ieee754_atan2_fma4
|
HAS_FMA4 ? __ieee754_atan2_fma4
|
||||||
: (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
|
: (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
|
||||||
strong_alias (__ieee754_atan2, __atan2_finite)
|
strong_alias (__ieee754_atan2, __atan2_finite)
|
||||||
|
|
||||||
# define __ieee754_atan2 __ieee754_atan2_sse2
|
# define __ieee754_atan2 __ieee754_atan2_sse2
|
||||||
|
@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
|
|||||||
|
|
||||||
libm_ifunc (__ieee754_exp,
|
libm_ifunc (__ieee754_exp,
|
||||||
HAS_FMA4 ? __ieee754_exp_fma4
|
HAS_FMA4 ? __ieee754_exp_fma4
|
||||||
: (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
|
: (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
|
||||||
strong_alias (__ieee754_exp, __exp_finite)
|
strong_alias (__ieee754_exp, __exp_finite)
|
||||||
|
|
||||||
# define __ieee754_exp __ieee754_exp_sse2
|
# define __ieee754_exp __ieee754_exp_sse2
|
||||||
|
@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double);
|
|||||||
|
|
||||||
libm_ifunc (__ieee754_log,
|
libm_ifunc (__ieee754_log,
|
||||||
HAS_FMA4 ? __ieee754_log_fma4
|
HAS_FMA4 ? __ieee754_log_fma4
|
||||||
: (HAS_YMM_USABLE ? __ieee754_log_avx
|
: (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2));
|
||||||
: __ieee754_log_sse2));
|
|
||||||
strong_alias (__ieee754_log, __log_finite)
|
strong_alias (__ieee754_log, __log_finite)
|
||||||
|
|
||||||
# define __ieee754_log __ieee754_log_sse2
|
# define __ieee754_log __ieee754_log_sse2
|
||||||
|
@ -13,7 +13,7 @@ extern double __atan_fma4 (double);
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
|
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
|
||||||
HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
|
HAS_AVX ? __atan_avx : __atan_sse2));
|
||||||
|
|
||||||
# define atan __atan_sse2
|
# define atan __atan_sse2
|
||||||
#endif
|
#endif
|
||||||
|
@ -18,11 +18,11 @@ extern double __sin_fma4 (double);
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
|
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
|
||||||
HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
|
HAS_AVX ? __cos_avx : __cos_sse2));
|
||||||
weak_alias (__cos, cos)
|
weak_alias (__cos, cos)
|
||||||
|
|
||||||
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
|
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
|
||||||
HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
|
HAS_AVX ? __sin_avx : __sin_sse2));
|
||||||
weak_alias (__sin, sin)
|
weak_alias (__sin, sin)
|
||||||
|
|
||||||
# define __cos __cos_sse2
|
# define __cos __cos_sse2
|
||||||
|
@ -13,7 +13,7 @@ extern double __tan_fma4 (double);
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
|
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
|
||||||
HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
|
HAS_AVX ? __tan_avx : __tan_sse2));
|
||||||
|
|
||||||
# define tan __tan_sse2
|
# define tan __tan_sse2
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user