glibc/sysdeps/x86_64/fpu/math_private.h
Peter Collingbourne 1deff3dca1 Use movq for 64-bit operations
The EXTRACT_WORDS64 and INSERT_WORDS64 macros use movd for a 64-bit
operation.  Somehow gcc manages to turn this into movq, but LLVM won't.

2013-05-15  Peter Collingbourne  <pcc@google.com>

	* sysdeps/x86_64/fpu/math_private.h (MOVQ): New macro.
	(EXTRACT_WORDS64) Use where appropriate.
	(INSERT_WORDS64) Likewise.
2013-05-15 20:33:45 +02:00

134 lines
3.0 KiB
C

#ifndef X86_64_MATH_PRIVATE_H
#define X86_64_MATH_PRIVATE_H 1
/* We can do a few things better on x86-64. */
#if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd"
# define MOVQ "vmovq"
#else
# define MOVD "movd"
# define MOVQ "movq"
#endif
/* Direct movement of float into integer register. */
#define EXTRACT_WORDS64(i, d) \
do { \
int64_t i_; \
asm (MOVQ " %1, %0" : "=rm" (i_) : "x" ((double) (d))); \
(i) = i_; \
} while (0)
/* And the reverse. */
#define INSERT_WORDS64(d, i) \
do { \
int64_t i_ = i; \
double d__; \
asm (MOVQ " %1, %0" : "=x" (d__) : "rm" (i_)); \
d = d__; \
} while (0)
/* Direct movement of float into integer register. */
#define GET_FLOAT_WORD(i, d) \
do { \
int i_; \
asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d))); \
(i) = i_; \
} while (0)
/* And the reverse. */
#define SET_FLOAT_WORD(f, i) \
do { \
int i_ = i; \
float f__; \
asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_)); \
f = f__; \
} while (0)
#include <sysdeps/i386/fpu/fenv_private.h>
#include_next <math_private.h>
extern __always_inline double
__ieee754_sqrt (double d)
{
double res;
#if defined __AVX__ || defined SSE2AVX
asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
#else
asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
#endif
return res;
}
extern __always_inline float
__ieee754_sqrtf (float d)
{
float res;
#if defined __AVX__ || defined SSE2AVX
asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
#else
asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
#endif
return res;
}
extern __always_inline long double
__ieee754_sqrtl (long double d)
{
long double res;
asm ("fsqrt" : "=t" (res) : "0" (d));
return res;
}
#ifdef __SSE4_1__
extern __always_inline double
__rint (double d)
{
double res;
# if defined __AVX__ || defined SSE2AVX
asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d));
# endif
return res;
}
extern __always_inline float
__rintf (float d)
{
float res;
# if defined __AVX__ || defined SSE2AVX
asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d));
# endif
return res;
}
extern __always_inline double
__floor (double d)
{
double res;
# if defined __AVX__ || defined SSE2AVX
asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d));
# endif
return res;
}
extern __always_inline float
__floorf (float d)
{
float res;
# if defined __AVX__ || defined SSE2AVX
asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d));
# endif
return res;
}
#endif /* __SSE4_1__ */
#endif /* X86_64_MATH_PRIVATE_H */