Really fix AVX tests

There is no problem with strcmp, it doesn't use the YMM registers.
The math routines might since gcc perhaps generates such code.
Introduce bit_YMM_USBALE and use it in the math routines.
This commit is contained in:
Ulrich Drepper 2012-01-26 09:45:54 -05:00
parent afc5ed09cb
commit 08cf777f9e
9 changed files with 41 additions and 28 deletions

View File

@ -2,8 +2,17 @@
[BZ #13583]
* sysdeps/x86_64/multiarch/init-arch.h: Define bit_OSXSAVE.
Clean up HAS_* macros.
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): If
bit_AVX is set also check OSXAVE/XCR0 and reset bit_AVX if necessary.
bit_AVX is set also check OSXAVE/XCR0 and set bit_YMM_Usable if
possible.
* sysdeps/x86_64/fpu/multiarch/e_atan2.c: Use HAS_YMM_USABLE, not
HAS_AVX.
* sysdeps/x86_64/fpu/multiarch/e_exp.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/e_log.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_atan.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_sin.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/s_tan.c: Likewise.
2012-01-25 Joseph Myers <joseph@codesourcery.com>

View File

@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
libm_ifunc (__ieee754_atan2,
HAS_FMA4 ? __ieee754_atan2_fma4
: (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
: (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
strong_alias (__ieee754_atan2, __atan2_finite)
# define __ieee754_atan2 __ieee754_atan2_sse2

View File

@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
libm_ifunc (__ieee754_exp,
HAS_FMA4 ? __ieee754_exp_fma4
: (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
: (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
strong_alias (__ieee754_exp, __exp_finite)
# define __ieee754_exp __ieee754_exp_sse2

View File

@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double);
libm_ifunc (__ieee754_log,
HAS_FMA4 ? __ieee754_log_fma4
: (HAS_AVX ? __ieee754_log_avx
: (HAS_YMM_USABLE ? __ieee754_log_avx
: __ieee754_log_sse2));
strong_alias (__ieee754_log, __log_finite)

View File

@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
# define __atan_fma4 ((void *) 0)
# endif
libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
# define atan __atan_sse2
#endif

View File

@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
# define __sin_fma4 ((void *) 0)
# endif
libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
weak_alias (__cos, cos)
libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
weak_alias (__sin, sin)
# define __cos __cos_sse2

View File

@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
# define __tan_fma4 ((void *) 0)
# endif
libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
# define tan __tan_sse2
#endif

View File

@ -147,13 +147,13 @@ __init_cpu_features (void)
if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
{
/* Reset the AVX bit in case OSXSAVE is disabled. */
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
|| ({ unsigned int xcrlow;
unsigned int xcrhigh;
asm ("xgetbv"
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
(xcrlow & 6) != 6; }))
__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
&& ({ unsigned int xcrlow;
unsigned int xcrhigh;
asm ("xgetbv"
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
(xcrlow & 6) == 6; }))
__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
}
__cpu_features.family = family;

View File

@ -22,6 +22,7 @@
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
#define bit_YMM_Usable (1 << 6)
#define bit_SSE2 (1 << 26)
#define bit_SSSE3 (1 << 9)
@ -49,6 +50,7 @@
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@ -93,7 +95,7 @@ extern struct cpu_features
extern void __init_cpu_features (void) attribute_hidden;
#define INIT_ARCH()\
# define INIT_ARCH() \
do \
if (__cpu_features.kind == arch_kind_unknown) \
__init_cpu_features (); \
@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_Slow_BSF FEATURE_INDEX_1
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
# define index_YMM_Usable FEATURE_INDEX_1
#define HAS_ARCH_FEATURE(idx, bit) \
((__get_cpu_features ()->feature[idx] & (bit)) != 0)
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
#define HAS_FAST_REP_STRING \
HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
#define HAS_FAST_COPY_BACKWARD \
HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
#define HAS_SLOW_BSF \
HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
#define HAS_PREFER_SSE_FOR_MEMOP \
HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
#define HAS_FAST_UNALIGNED_LOAD \
HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
#endif /* __ASSEMBLER__ */