mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 13:00:06 +00:00
Provide internal optimizations on x86-64 with SSE4.1
Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled.
This commit is contained in:
parent
b171c13768
commit
ed22dcf691
10
ChangeLog
10
ChangeLog
@ -1,5 +1,15 @@
|
||||
2011-10-17 Ulrich Drepper <drepper@gmail.com>
|
||||
|
||||
* sysdeps/x86_64/fpu/bits/mathinline.h: Don't define inlines if
|
||||
__NO_MATH_INLINES is defined. Cleanups.
|
||||
|
||||
* sysdeps/x86_64/fpu/math_private.h: Define __rint, __rintf, __floor,
|
||||
and __floorf is target has SSE4.1.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_floor-c.c: Undef first.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_floorf-c.: Likewise.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_rint-c.c: Likewise.
|
||||
* sysdeps/x86_64/fpu/multiarch/s_rintf-c.c: Likewise.
|
||||
|
||||
* sysdeps/x86_64/fpu/bits/mathinline.h (floor): Use correct function
|
||||
name.
|
||||
(floorf): Likewise.
|
||||
|
@ -30,34 +30,35 @@
|
||||
#endif
|
||||
|
||||
|
||||
#if defined __GNUC__ && __GNUC__ >= 2
|
||||
# ifdef __USE_ISOC99
|
||||
/* The gcc, version 2.7 or below, has problems with all this inlining
|
||||
code. So disable it for this version of the compiler. */
|
||||
#if __GNUC_PREREQ (2, 8) && defined __USE_ISOC99
|
||||
__BEGIN_NAMESPACE_C99
|
||||
|
||||
/* Test for negative number. Used in the signbit() macro. */
|
||||
__MATH_INLINE int
|
||||
__NTH (__signbitf (float __x))
|
||||
{
|
||||
# if __WORDSIZE == 32
|
||||
# if __WORDSIZE == 32
|
||||
__extension__ union { float __f; int __i; } __u = { __f: __x };
|
||||
return __u.__i < 0;
|
||||
# else
|
||||
# else
|
||||
int __m;
|
||||
__asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
|
||||
return __m & 0x8;
|
||||
# endif
|
||||
# endif
|
||||
}
|
||||
__MATH_INLINE int
|
||||
__NTH (__signbit (double __x))
|
||||
{
|
||||
# if __WORDSIZE == 32
|
||||
# if __WORDSIZE == 32
|
||||
__extension__ union { double __d; int __i[2]; } __u = { __d: __x };
|
||||
return __u.__i[1] < 0;
|
||||
# else
|
||||
# else
|
||||
int __m;
|
||||
__asm ("pmovmskb %1, %0" : "=r" (__m) : "x" (__x));
|
||||
return __m & 0x80;
|
||||
# endif
|
||||
# endif
|
||||
}
|
||||
__MATH_INLINE int
|
||||
__NTH (__signbitl (long double __x))
|
||||
@ -66,6 +67,16 @@ __NTH (__signbitl (long double __x))
|
||||
return (__u.__i[2] & 0x8000) != 0;
|
||||
}
|
||||
|
||||
__END_NAMESPACE_C99
|
||||
#endif
|
||||
|
||||
|
||||
#if (__GNUC_PREREQ (2, 8) && !defined __NO_MATH_INLINES \
|
||||
&& defined __OPTIMIZE__)
|
||||
|
||||
# ifdef __USE_ISOC99
|
||||
__BEGIN_NAMESPACE_C99
|
||||
|
||||
/* Round to nearest integer. */
|
||||
# if __WORDSIZE == 64 || defined __SSE_MATH__
|
||||
__MATH_INLINE long int
|
||||
@ -100,14 +111,10 @@ __NTH (llrint (double __x))
|
||||
__asm ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x));
|
||||
return __res;
|
||||
}
|
||||
|
||||
__END_NAMESPACE_C99
|
||||
# endif
|
||||
|
||||
# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
|
||||
&& (__WORDSIZE == 64 || defined __SSE2_MATH__)
|
||||
__BEGIN_NAMESPACE_C99
|
||||
|
||||
/* Determine maximum of two values. */
|
||||
__MATH_INLINE float
|
||||
__NTH (fmaxf (float __x, float __y))
|
||||
|
@ -90,3 +90,31 @@ do { \
|
||||
({ long double __res; \
|
||||
asm ("fsqrt" : "=t" (__res) : "0" ((long double) d)); \
|
||||
__res; })
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
# ifndef __rint
|
||||
# define __rint(d) \
|
||||
({ double __res; \
|
||||
asm ("roundsd $4, %1, %0" : "=x" (__res) : "x" ((double) d)); \
|
||||
__res; })
|
||||
# endif
|
||||
# ifndef __rintf
|
||||
# define __rintf(d) \
|
||||
({ float __res; \
|
||||
asm ("roundss $4, %1, %0" : "=x" (__res) : "x" ((float) d)); \
|
||||
__res; })
|
||||
# endif
|
||||
|
||||
# ifndef __floor
|
||||
# define __floor(d) \
|
||||
({ double __res; \
|
||||
asm ("roundsd $1, %1, %0" : "=x" (__res) : "x" ((double) d)); \
|
||||
__res; })
|
||||
# endif
|
||||
# ifndef __floorf
|
||||
# define __floorf(d) \
|
||||
({ float __res; \
|
||||
asm ("roundss $1, %1, %0" : "=x" (__res) : "x" ((float) d)); \
|
||||
__res; })
|
||||
# endif
|
||||
#endif
|
||||
|
@ -1,2 +1,3 @@
|
||||
#undef __floor
|
||||
#define __floor __floor_c
|
||||
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c>
|
||||
|
@ -1,2 +1,3 @@
|
||||
#undef __floorf
|
||||
#define __floorf __floorf_c
|
||||
#include <sysdeps/ieee754/flt-32/s_floorf.c>
|
||||
|
@ -1,2 +1,3 @@
|
||||
#undef __rint
|
||||
#define __rint __rint_c
|
||||
#include <sysdeps/ieee754/dbl-64/wordsize-64/s_rint.c>
|
||||
|
@ -1,2 +1,3 @@
|
||||
#undef __rintf
|
||||
#define __rintf __rintf_c
|
||||
#include <sysdeps/ieee754/flt-32/s_rintf.c>
|
||||
|
Loading…
Reference in New Issue
Block a user