Provide internal optimizations on x86-64 with SSE4.1

Provide macros so that the internal users can, if possible, directly use
the new instructions.

Also fix up the mathinline.h header when compiling with SSE4.1 enabled.
This commit is contained in:
Ulrich Drepper 2011-10-17 11:23:40 -04:00
parent b171c13768
commit ed22dcf691
7 changed files with 61 additions and 12 deletions

View File

@ -1,5 +1,15 @@
2011-10-17 Ulrich Drepper <drepper@gmail.com>
* sysdeps/x86_64/fpu/bits/mathinline.h: Don't define inlines if
__NO_MATH_INLINES is defined. Cleanups.
* sysdeps/x86_64/fpu/math_private.h: Define __rint, __rintf, __floor,
and __floorf is target has SSE4.1.
* sysdeps/x86_64/fpu/multiarch/s_floor-c.c: Undef first.
* sysdeps/x86_64/fpu/multiarch/s_floorf-c.: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_rint-c.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: Likewise.
* sysdeps/x86_64/fpu/bits/mathinline.h (floor): Use correct function
name.
(floorf): Likewise.

View File

@ -30,34 +30,35 @@
#endif
#if defined __GNUC__ && __GNUC__ >= 2
# ifdef __USE_ISOC99
/* The gcc, version 2.7 or below, has problems with all this inlining
code. So disable it for this version of the compiler. */
#if __GNUC_PREREQ (2, 8) && defined __USE_ISOC99
__BEGIN_NAMESPACE_C99
/* Test for negative number. Used in the signbit() macro. */
__MATH_INLINE int
__NTH (__signbitf (float __x))
{
# if __WORDSIZE == 32
# if __WORDSIZE == 32
__extension__ union { float __f; int __i; } __u = { __f: __x };
return __u.__i < 0;
# else
# else
int __m;
__asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
return __m & 0x8;
# endif
# endif
}
__MATH_INLINE int
__NTH (__signbit (double __x))
{
# if __WORDSIZE == 32
# if __WORDSIZE == 32
__extension__ union { double __d; int __i[2]; } __u = { __d: __x };
return __u.__i[1] < 0;
# else
# else
int __m;
__asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
return __m & 0x80;
# endif
# endif
}
__MATH_INLINE int
__NTH (__signbitl (long double __x))
@ -66,6 +67,16 @@ __NTH (__signbitl (long double __x))
return (__u.__i[2] & 0x8000) != 0;
}
__END_NAMESPACE_C99
#endif
#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
&& defined __OPTIMIZE__)
# ifdef __USE_ISOC99
__BEGIN_NAMESPACE_C99
/* Round to nearest integer. */
# if __WORDSIZE == 64 || defined __SSE_MATH__
__MATH_INLINE long int
@ -100,14 +111,10 @@ __NTH (llrint (double __x))
__asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x));
return __res;
}
__END_NAMESPACE_C99
# endif
# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
&& (__WORDSIZE == 64 || defined __SSE2_MATH__)
__BEGIN_NAMESPACE_C99
/* Determine maximum of two values. */
__MATH_INLINE float
__NTH (fmaxf (float __x, float __y))

View File

@ -90,3 +90,31 @@ do { \
({ long double __res; \
asm ("fsqrt" : "=t" (__res) : "0" ((long double) d)); \
__res; })
#ifdef __SSE4_1__
# ifndef __rint
# define __rint(d) \
({ double __res; \
asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d)); \
__res; })
# endif
# ifndef __rintf
# define __rintf(d) \
({ float __res; \
asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d)); \
__res; })
# endif
# ifndef __floor
# define __floor(d) \
({ double __res; \
asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d)); \
__res; })
# endif
# ifndef __floorf
# define __floorf(d) \
({ float __res; \
asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d)); \
__res; })
# endif
#endif

View File

@ -1,2 +1,3 @@
#undef __floor
#define __floor __floor_c
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c>

View File

@ -1,2 +1,3 @@
#undef __floorf
#define __floorf __floorf_c
#include <sysdeps/ieee754/flt-32/s_floorf.c>

View File

@ -1,2 +1,3 @@
#undef __rint
#define __rint __rint_c
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c>

View File

@ -1,2 +1,3 @@
#undef __rintf
#define __rintf __rintf_c
#include <sysdeps/ieee754/flt-32/s_rintf.c>