glibc/sysdeps/x86_64/fpu/math_private.h

#ifndef X86_64_MATH_PRIVATE_H
#define X86_64_MATH_PRIVATE_H 1

#define math_opt_barrier(x) \
  ({ __typeof(x) __x;							      \
     if (sizeof (x) <= sizeof (double))					      \
       __asm ("" : "=x" (__x) : "0" (x));				      \
     else								      \
       __asm ("" : "=t" (__x) : "0" (x));				      \
     __x; })
#define math_force_eval(x) \
  do {									      \
    if (sizeof (x) <= sizeof (double))					      \
      __asm __volatile ("" : : "x" (x));				      \
    else								      \
      __asm __volatile ("" : : "f" (x));				      \
  } while (0)

/* We can do a few things better on x86-64.  */

#if defined __AVX__ || defined SSE2AVX
# define MOVD "vmovd"
# define STMXCSR "vstmxcsr"
# define LDMXCSR "vldmxcsr"
#else
# define MOVD "movd"
# define STMXCSR "stmxcsr"
# define LDMXCSR "ldmxcsr"
#endif

/* Direct movement of float into integer register.  */
#define EXTRACT_WORDS64(i, d)						      \
  do {									      \
    long int i_;							      \
    asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((double) (d)));		      \
    (i) = i_;								      \
  } while (0)

/* And the reverse.  */
#define INSERT_WORDS64(d, i) \
  do {									      \
    long int i_ = i;							      \
    double d__;								      \
    asm (MOVD " %1, %0" : "=x" (d__) : "rm" (i_));			      \
    d = d__;								      \
  } while (0)

/* Direct movement of float into integer register.  */
#define GET_FLOAT_WORD(i, d) \
  do {									      \
    int i_;								      \
    asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d)));		      \
    (i) = i_;								      \
  } while (0)

/* And the reverse.  */
#define SET_FLOAT_WORD(f, i) \
  do {									      \
    int i_ = i;								      \
    float f__;								      \
    asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_));			      \
    f = f__;								      \
  } while (0)

#include_next <math_private.h>

extern __always_inline double
__ieee754_sqrt (double d)
{
  double res;
#if defined __AVX__ || defined SSE2AVX
  asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
#else
  asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
#endif
  return res;
}

extern __always_inline float
__ieee754_sqrtf (float d)
{
  float res;
#if defined __AVX__ || defined SSE2AVX
  asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
#else
  asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
#endif
  return res;
}

extern __always_inline long double
__ieee754_sqrtl (long double d)
{
  long double res;
  asm ("fsqrt" : "=t" (res) : "0" (d));
  return res;
}

#ifdef __SSE4_1__
extern __always_inline double
__rint (double d)
{
  double res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d));
# endif
  return res;
}

extern __always_inline float
__rintf (float d)
{
  float res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d));
# endif
  return res;
}

extern __always_inline double
__floor (double d)
{
  double res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d));
# endif
  return res;
}

extern __always_inline float
__floorf (float d)
{
  float res;
# if defined __AVX__ || defined SSE2AVX
  asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d));
# else
  asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d));
#  endif
  return res;
}
#endif /* __SSE4_1__ */


/* Specialized variants of the <fenv.h> interfaces which only handle
   either the FPU or the SSE unit.  */
#undef libc_feholdexcept
#define libc_feholdexcept(e) \
  do {									      \
     unsigned int mxcsr;						      \
     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
     (e)->__mxcsr = mxcsr;						      \
     mxcsr = (mxcsr | 0x1f80) & ~0x3f;					      \
     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
  } while (0)
#undef libc_feholdexceptf
#define libc_feholdexceptf(e) libc_feholdexcept (e)
// #define libc_feholdexceptl(e) (void) feholdexcept (e)

#undef libc_feholdexcept_setround
#define libc_feholdexcept_setround(e, r) \
  do {									      \
     unsigned int mxcsr;						      \
     asm (STMXCSR " %0" : "=m" (*&mxcsr));				      \
     (e)->__mxcsr = mxcsr;						      \
     mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | ((r) << 3);			      \
     asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));			      \
  } while (0)
#undef libc_feholdexcept_setroundf
#define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)
// #define libc_feholdexcept_setroundl(e, r) ...

#undef libc_fetestexcept
#define libc_fetestexcept(e) \
  ({ unsigned int mxcsr;						      \
     asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
     mxcsr & (e) & FE_ALL_EXCEPT; })
#undef libc_fetestexceptf
#define libc_fetestexceptf(e) libc_fetestexcept (e)
// #define libc_fetestexceptl(e) fetestexcept (e)

#undef libc_fesetenv
#define libc_fesetenv(e) \
  asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))
#undef libc_fesetenvf
#define libc_fesetenvf(e) libc_fesetenv (e)
// #define libc_fesetenvl(e) (void) fesetenv (e)

#undef libc_feupdateenv
#define libc_feupdateenv(e) \
  do {									      \
    unsigned int mxcsr;							      \
    asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));			      \
    asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr));		      \
    __feraiseexcept (mxcsr & FE_ALL_EXCEPT);				      \
  } while (0)
#undef libc_feupdateenvf
#define libc_feupdateenvf(e) libc_feupdateenv (e)
// #define libc_feupdateenvl(e) (void) feupdateenv (e)

#endif /* X86_64_MATH_PRIVATE_H */
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-06 23:41:14 +00:00			`#ifndef X86_64_MATH_PRIVATE_H`
			`#define X86_64_MATH_PRIVATE_H 1`
[BZ #3306] 2007-03-27 Jakub Jelinek <jakub@redhat.com> [BZ #3306] * math/math_private.h (math_opt_barrier, math_force_eval): Define. * sysdeps/i386/fpu/math_private.h: New file. * sysdeps/x86_64/fpu/math_private.h: New file. * math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". * math/s_nextafter.c (__nextafter): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use math_opt_barrier and math_force_eval macros. * sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Include float.h. (__nexttoward): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is 2. * sysdeps/i386/fpu/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is not 0. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h. (__nldbl_nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * math/bug-nextafter.c (zero, inf): New variables. (main): Add new tests. * math/bug-nexttoward.c (zero, inf): New variables. (main): Add new tests. 2007-04-16 20:41:42 +00:00
			`#define math_opt_barrier(x) \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`({ __typeof(x) __x; \`
			`if (sizeof (x) <= sizeof (double)) \`
			`__asm ("" : "=x" (__x) : "0" (x)); \`
			`else \`
			`__asm ("" : "=t" (__x) : "0" (x)); \`
			`__x; })`
[BZ #3306] 2007-03-27 Jakub Jelinek <jakub@redhat.com> [BZ #3306] * math/math_private.h (math_opt_barrier, math_force_eval): Define. * sysdeps/i386/fpu/math_private.h: New file. * sysdeps/x86_64/fpu/math_private.h: New file. * math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". * math/s_nextafter.c (__nextafter): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use math_opt_barrier and math_force_eval macros. * sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Include float.h. (__nexttoward): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is 2. * sysdeps/i386/fpu/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is not 0. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h. (__nldbl_nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * math/bug-nextafter.c (zero, inf): New variables. (main): Add new tests. * math/bug-nexttoward.c (zero, inf): New variables. (main): Add new tests. 2007-04-16 20:41:42 +00:00			`#define math_force_eval(x) \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`do { \`
			`if (sizeof (x) <= sizeof (double)) \`
			`__asm __volatile ("" : : "x" (x)); \`
			`else \`
			`__asm __volatile ("" : : "f" (x)); \`
			`} while (0)`
[BZ #3306] 2007-03-27 Jakub Jelinek <jakub@redhat.com> [BZ #3306] * math/math_private.h (math_opt_barrier, math_force_eval): Define. * sysdeps/i386/fpu/math_private.h: New file. * sysdeps/x86_64/fpu/math_private.h: New file. * math/s_nexttowardf.c (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". * math/s_nextafter.c (__nextafter): Likewise. * sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/flt-32/s_nextafterf.c (__nextafterf): Likewise. * sysdeps/ieee754/ldbl-128/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/ieee754/ldbl-96/s_nexttoward.c (__nexttoward): Likewise. * sysdeps/i386/fpu/s_nextafterl.c (__nextafterl): Use math_opt_barrier and math_force_eval macros. * sysdeps/ieee754/ldbl-128/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/ieee754/ldbl-96/s_nextafterl.c (__nextafterl): Likewise. * sysdeps/i386/fpu/s_nexttoward.c: Include float.h. (__nexttoward): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is 2. * sysdeps/i386/fpu/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. Use "+m" constraint on asm rather than "=m" and "m". Only use asm to force double result if FLT_EVAL_METHOD is not 0. * sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-opt/s_nexttowardfd.c: Include float.h. (__nldbl_nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * sysdeps/ieee754/ldbl-96/s_nexttowardf.c: Include float.h. (__nexttowardf): Use math_opt_barrier and math_force_eval macros. If FLT_EVAL_METHOD is not 0, force x to float using asm. * math/bug-nextafter.c (zero, inf): New variables. (main): Add new tests. * math/bug-nexttoward.c (zero, inf): New variables. (main): Add new tests. 2007-04-16 20:41:42 +00:00
Optimize float construction/extraction on x86-64. 2009-08-24 21:52:49 +00:00			`/* We can do a few things better on x86-64. */`

Use -msse2avx option for x86-64 libm functions 2012-01-28 19:48:46 +00:00			`#if defined __AVX__ \|\| defined SSE2AVX`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`# define MOVD "vmovd"`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`# define STMXCSR "vstmxcsr"`
			`# define LDMXCSR "vldmxcsr"`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#else`
			`# define MOVD "movd"`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`# define STMXCSR "stmxcsr"`
			`# define LDMXCSR "ldmxcsr"`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#endif`

Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 01:05:48 +00:00			`/* Direct movement of float into integer register. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#define EXTRACT_WORDS64(i, d) \`
			`do { \`
			`long int i_; \`
			`asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((double) (d))); \`
			`(i) = i_; \`
			`} while (0)`
Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 01:05:48 +00:00
			`/* And the reverse. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#define INSERT_WORDS64(d, i) \`
			`do { \`
			`long int i_ = i; \`
			`double d__; \`
			`asm (MOVD " %1, %0" : "=x" (d__) : "rm" (i_)); \`
			`d = d__; \`
			`} while (0)`
Add ceil implementation for 64-bit machines. On 64-bit machines we should not split doubles into two 32 bit integer and handle the words separately. We have wide registers. This patch implements a 64-bit ceil version. Ideally all other functions will be converted over time. 2009-08-25 01:05:48 +00:00
Optimize float construction/extraction on x86-64. 2009-08-24 21:52:49 +00:00			`/* Direct movement of float into integer register. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#define GET_FLOAT_WORD(i, d) \`
			`do { \`
			`int i_; \`
			`asm (MOVD " %1, %0" : "=rm" (i_) : "x" ((float) (d))); \`
			`(i) = i_; \`
			`} while (0)`
Optimize float construction/extraction on x86-64. 2009-08-24 21:52:49 +00:00
			`/* And the reverse. */`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#define SET_FLOAT_WORD(f, i) \`
			`do { \`
			`int i_ = i; \`
			`float f__; \`
			`asm (MOVD " %1, %0" : "=x" (f__) : "rm" (i_)); \`
			`f = f__; \`
			`} while (0)`
Optimize float construction/extraction on x86-64. 2009-08-24 21:52:49 +00:00
Make inline __isnan, __isinf_ns, __finite generic. For code generation to stay identical on x86_64, this requires that we define the fp word manipulation macros before including the generic header. 2012-03-09 20:38:23 +00:00			`#include_next <math_private.h>`
Optimize libm libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized. 2011-10-12 15:27:51 +00:00
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-06 23:41:14 +00:00			`extern __always_inline double`
			`__ieee754_sqrt (double d)`
			`{`
			`double res;`
Use -msse2avx option for x86-64 libm functions 2012-01-28 19:48:46 +00:00			`#if defined __AVX__ \|\| defined SSE2AVX`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-06 23:41:14 +00:00			`asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#else`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-06 23:41:14 +00:00			`asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`#endif`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-06 23:41:14 +00:00			`return res;`
			`}`

			`extern __always_inline float`
			`__ieee754_sqrtf (float d)`
			`{`
			`float res;`
			`#if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));`
			`#else`
			`asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));`
			`#endif`
			`return res;`
			`}`

			`extern __always_inline long double`
			`__ieee754_sqrtl (long double d)`
			`{`
			`long double res;`
			`asm ("fsqrt" : "=t" (res) : "0" (d));`
			`return res;`
			`}`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 15:23:40 +00:00
			`#ifdef __SSE4_1__`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-06 23:58:51 +00:00			`extern __always_inline double`
			`__rint (double d)`
			`{`
			`double res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundsd $4, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundsd $4, %1, %0" : "=x" (res) : "xm" (d));`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 15:23:40 +00:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-06 23:58:51 +00:00			`return res;`
			`}`

			`extern __always_inline float`
			`__rintf (float d)`
			`{`
			`float res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundss $4, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundss $4, %1, %0" : "=x" (res) : "xm" (d));`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 15:23:40 +00:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-06 23:58:51 +00:00			`return res;`
			`}`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 15:23:40 +00:00
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-06 23:58:51 +00:00			`extern __always_inline double`
			`__floor (double d)`
			`{`
			`double res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundsd $1, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundsd $1, %1, %0" : "=x" (res) : "xm" (d));`
Provide internal optimizations on x86-64 with SSE4.1 Provide macros so that the internal users can, if possible, directly use the new instructions. Also fix up the mathinline.h header when compiling with SSE4.1 enabled. 2011-10-17 15:23:40 +00:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-06 23:58:51 +00:00			`return res;`
			`}`

			`extern __always_inline float`
			`__floorf (float d)`
			`{`
			`float res;`
			`# if defined __AVX__ \|\| defined SSE2AVX`
			`asm ("vroundss $1, %1, %0, %0" : "=x" (res) : "xm" (d));`
			`# else`
			`asm ("roundss $1, %1, %0" : "=x" (res) : "xm" (d));`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`# endif`
x86_64: Convert __rint* and __floor* from macros to inlines. 2012-03-06 23:58:51 +00:00			`return res;`
			`}`
			`#endif /* __SSE4_1__ */`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00

			`/* Specialized variants of the <fenv.h> interfaces which only handle`
			`either the FPU or the SSE unit. */`
			`#undef libc_feholdexcept`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`#define libc_feholdexcept(e) \`
Provide combined internal feholdexcept/fesetround interface 2011-10-18 13:59:04 +00:00			`do { \`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00			`unsigned int mxcsr; \`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`asm (STMXCSR " %0" : "=m" (*&mxcsr)); \`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00			`(e)->__mxcsr = mxcsr; \`
			`mxcsr = (mxcsr \| 0x1f80) & ~0x3f; \`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00			`} while (0)`
Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00			`#undef libc_feholdexceptf`
			`#define libc_feholdexceptf(e) libc_feholdexcept (e)`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00			`// #define libc_feholdexceptl(e) (void) feholdexcept (e)`

Provide combined internal feholdexcept/fesetround interface 2011-10-18 13:59:04 +00:00			`#undef libc_feholdexcept_setround`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`#define libc_feholdexcept_setround(e, r) \`
Provide combined internal feholdexcept/fesetround interface 2011-10-18 13:59:04 +00:00			`do { \`
			`unsigned int mxcsr; \`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`asm (STMXCSR " %0" : "=m" (*&mxcsr)); \`
Provide combined internal feholdexcept/fesetround interface 2011-10-18 13:59:04 +00:00			`(e)->__mxcsr = mxcsr; \`
			`mxcsr = ((mxcsr \| 0x1f80) & ~0x603f) \| ((r) << 3); \`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr)); \`
Provide combined internal feholdexcept/fesetround interface 2011-10-18 13:59:04 +00:00			`} while (0)`
Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00			`#undef libc_feholdexcept_setroundf`
			`#define libc_feholdexcept_setroundf(e, r) libc_feholdexcept_setround (e, r)`
Provide combined internal feholdexcept/fesetround interface 2011-10-18 13:59:04 +00:00			`// #define libc_feholdexcept_setroundl(e, r) ...`

Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00			`#undef libc_fetestexcept`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`#define libc_fetestexcept(e) \`
Use -msse2avx option for x86-64 libm functions 2012-01-28 19:48:46 +00:00			`({ unsigned int mxcsr; \`
			`asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`mxcsr & (e) & FE_ALL_EXCEPT; })`
Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00			`#undef libc_fetestexceptf`
			`#define libc_fetestexceptf(e) libc_fetestexcept (e)`
			`// #define libc_fetestexceptl(e) fetestexcept (e)`

Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00			`#undef libc_fesetenv`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`#define libc_fesetenv(e) \`
			`asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr))`
Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00			`#undef libc_fesetenvf`
			`#define libc_fesetenvf(e) libc_fesetenv (e)`
Start optimizing the use of the fenv interfaces in libm itself 2011-10-18 13:00:46 +00:00			`// #define libc_fesetenvl(e) (void) fesetenv (e)`
Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00
			`#undef libc_feupdateenv`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`#define libc_feupdateenv(e) \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`do { \`
			`unsigned int mxcsr; \`
Simplify use of AVX instructions in internal math macros 2012-01-28 16:19:06 +00:00			`asm volatile (STMXCSR " %0" : "=m" (*&mxcsr)); \`
			`asm volatile (LDMXCSR " %0" : : "m" ((e)->__mxcsr)); \`
Use VEX encoding in inline math functions on x86-64 when possible 2011-10-25 12:17:57 +00:00			`__feraiseexcept (mxcsr & FE_ALL_EXCEPT); \`
			`} while (0)`
Use new internal libc_fe* interfaces in more functions 2011-10-18 19:11:31 +00:00			`#undef libc_feupdateenvf`
			`#define libc_feupdateenvf(e) libc_feupdateenv (e)`
			`// #define libc_feupdateenvl(e) (void) feupdateenv (e)`
x86_64: Convert __ieee754_sqrt{,f,l} from macros to inlines. 2012-03-06 23:41:14 +00:00
			`#endif /* X86_64_MATH_PRIVATE_H */`