mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-25 14:30:06 +00:00
BZ#14059: Fix AVX and FMA4 detection.
Fix AVX and FMA4 detection by following the guidelines set out by Intel and AMD for detecting these features.
This commit is contained in:
parent
0af797def3
commit
1a0994f535
30
ChangeLog
30
ChangeLog
@ -1,3 +1,33 @@
|
||||
2012-05-17 Andreas Jaeger <aj@suse.de>
|
||||
Carlos O'Donell <carlos_odonell@mentor.com>
|
||||
|
||||
[BZ #14059]
|
||||
* sysdeps/x86_64/multiarch/init-arch.h
|
||||
(bit_YMM_Usable): Rename to...
|
||||
(bit_AVX_Usable): ... this.
|
||||
(bit_FMA4_Usable): New macro.
|
||||
(bit_XMM_state): New macro.
|
||||
(bit_YMM_state): New macro.
|
||||
[__ASSEMBLER__] (index_YMM_Usable): Rename to...
|
||||
[__ASSEMBLER__] (index_AVX_Usable): ... this.
|
||||
[__ASSEMBLER__] (index_FMA4_Usable): New macro.
|
||||
(CPUID_OSXSAVE): New macro.
|
||||
(CPUID_AVX): New macro.
|
||||
(CPUID_FMA4): New macro.
|
||||
(index_YMM_Usable): Rename to...
|
||||
(index_AVX_Usable): ... this.
|
||||
(HAS_AVX): Use HAS_ARCH_FEATURE.
|
||||
(HAS_FMA4): Likewise.
|
||||
(HAS_YMM_USABLE): Remove.
|
||||
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
|
||||
Enable AVX or FMA4 IFF YMM and XMM states are usable and the features
|
||||
are present.
|
||||
* sysdeps/x86_64/multiarch/strcmp.S: Use bit_AVX_Usable.
|
||||
* sysdeps/i386/i686/multiarch/Makefile: Add test-multiarch to tests.
|
||||
* sysdeps/x86_64/multiarch/Makefile: Likewise.
|
||||
* sysdeps/i386/i686/multiarch/test-multiarch.c: New file.
|
||||
* sysdeps/x86_64/multiarch/test-multiarch.c: New file.
|
||||
|
||||
2012-05-17 Chris Metcalf <cmetcalf@tilera.com>
|
||||
|
||||
* math/libm-test.c: Support platforms without multiple rounding modes.
|
||||
|
2
NEWS
2
NEWS
@ -27,7 +27,7 @@ Version 2.16
|
||||
13919, 13920, 13921, 13922, 13923, 13924, 13926, 13927, 13928, 13938,
|
||||
13941, 13942, 13954, 13955, 13956, 13963, 13967, 13970, 13973, 13979,
|
||||
13983, 13986, 14012, 14027, 14033, 14034, 14040, 14043, 14044, 14049,
|
||||
14053, 14055, 14064, 14080, 14083, 14103, 14104, 14109
|
||||
14053, 14055, 14059, 14064, 14080, 14083, 14103, 14104, 14109
|
||||
|
||||
* ISO C11 support:
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
ifeq ($(subdir),csu)
|
||||
aux += init-arch
|
||||
tests += test-multiarch
|
||||
gen-as-const-headers += ifunc-defines.sym
|
||||
endif
|
||||
|
||||
|
1
sysdeps/i386/i686/multiarch/test-multiarch.c
Normal file
1
sysdeps/i386/i686/multiarch/test-multiarch.c
Normal file
@ -0,0 +1 @@
|
||||
#include <sysdeps/x86_64/multiarch/test-multiarch.c>
|
@ -1,5 +1,6 @@
|
||||
ifeq ($(subdir),csu)
|
||||
aux += init-arch
|
||||
tests += test-multiarch
|
||||
gen-as-const-headers += ifunc-defines.sym
|
||||
endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Initialize CPU feature data.
|
||||
This file is part of the GNU C Library.
|
||||
Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributed by Ulrich Drepper <drepper@redhat.com>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -143,16 +143,23 @@ __init_cpu_features (void)
|
||||
else
|
||||
kind = arch_kind_other;
|
||||
|
||||
if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
|
||||
/* Can we call xgetbv? */
|
||||
if (CPUID_OSXSAVE)
|
||||
{
|
||||
/* Reset the AVX bit in case OSXSAVE is disabled. */
|
||||
if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
|
||||
&& ({ unsigned int xcrlow;
|
||||
unsigned int xcrhigh;
|
||||
asm ("xgetbv"
|
||||
: "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
|
||||
(xcrlow & 6) == 6; }))
|
||||
__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
|
||||
unsigned int xcrlow;
|
||||
unsigned int xcrhigh;
|
||||
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
|
||||
/* Is YMM and XMM state usable? */
|
||||
if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
|
||||
(bit_YMM_state | bit_XMM_state))
|
||||
{
|
||||
/* Determine if AVX is usable. */
|
||||
if (CPUID_AVX)
|
||||
__cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
|
||||
/* Determine if FMA4 is usable. */
|
||||
if (CPUID_FMA4)
|
||||
__cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable;
|
||||
}
|
||||
}
|
||||
|
||||
__cpu_features.family = family;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* This file is part of the GNU C Library.
|
||||
Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@ -21,8 +21,10 @@
|
||||
#define bit_Prefer_SSE_for_memop (1 << 3)
|
||||
#define bit_Fast_Unaligned_Load (1 << 4)
|
||||
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
|
||||
#define bit_YMM_Usable (1 << 6)
|
||||
#define bit_AVX_Usable (1 << 6)
|
||||
#define bit_FMA4_Usable (1 << 7)
|
||||
|
||||
/* CPUID Feature flags. */
|
||||
#define bit_SSE2 (1 << 26)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
@ -33,6 +35,10 @@
|
||||
#define bit_FMA (1 << 12)
|
||||
#define bit_FMA4 (1 << 16)
|
||||
|
||||
/* XCR0 Feature flags. */
|
||||
#define bit_XMM_state (1 << 1)
|
||||
#define bit_YMM_state (2 << 1)
|
||||
|
||||
#ifdef __ASSEMBLER__
|
||||
|
||||
# include <ifunc-defines.h>
|
||||
@ -49,7 +55,8 @@
|
||||
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
||||
# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
||||
|
||||
#else /* __ASSEMBLER__ */
|
||||
|
||||
@ -113,35 +120,45 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||||
|
||||
/* Following are the feature tests used throughout libc. */
|
||||
|
||||
/* CPUID_* evaluates to true if the feature flag is enabled.
|
||||
We always use &__cpu_features because the HAS_CPUID_* macros
|
||||
are called only within __init_cpu_features, where we can't
|
||||
call __get_cpu_features without infinite recursion. */
|
||||
# define HAS_CPUID_FLAG(idx, reg, bit) \
|
||||
(((&__cpu_features)->cpuid[idx].reg & (bit)) != 0)
|
||||
|
||||
# define CPUID_OSXSAVE \
|
||||
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE)
|
||||
# define CPUID_AVX \
|
||||
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
|
||||
# define CPUID_FMA4 \
|
||||
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
|
||||
|
||||
/* HAS_* evaluates to true if we may use the feature at runtime. */
|
||||
# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
|
||||
# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT)
|
||||
# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3)
|
||||
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
|
||||
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
|
||||
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
|
||||
# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
|
||||
# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
|
||||
|
||||
# define index_Fast_Rep_String FEATURE_INDEX_1
|
||||
# define index_Fast_Copy_Backward FEATURE_INDEX_1
|
||||
# define index_Slow_BSF FEATURE_INDEX_1
|
||||
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
|
||||
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
|
||||
# define index_YMM_Usable FEATURE_INDEX_1
|
||||
# define index_AVX_Usable FEATURE_INDEX_1
|
||||
# define index_FMA4_Usable FEATURE_INDEX_1
|
||||
|
||||
# define HAS_ARCH_FEATURE(name) \
|
||||
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
|
||||
|
||||
# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
|
||||
|
||||
# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
|
||||
|
||||
# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
|
||||
|
||||
# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
|
||||
|
||||
# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
|
||||
|
||||
# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
|
||||
# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
|
||||
# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
|
||||
# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
|
||||
# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
|
||||
# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
|
||||
# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable)
|
||||
# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable)
|
||||
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* strcmp with SSE4.2
|
||||
Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
Copyright (C) 2009-2012 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
@ -83,6 +83,7 @@
|
||||
.text
|
||||
ENTRY(STRCMP)
|
||||
.type STRCMP, @gnu_indirect_function
|
||||
/* Manually inlined call to __get_cpu_features. */
|
||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
@ -100,13 +101,14 @@ END(STRCMP)
|
||||
# ifdef USE_AS_STRCASECMP_L
|
||||
ENTRY(__strcasecmp)
|
||||
.type __strcasecmp, @gnu_indirect_function
|
||||
/* Manually inlined call to __get_cpu_features. */
|
||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1:
|
||||
# ifdef HAVE_AVX_SUPPORT
|
||||
leaq __strcasecmp_avx(%rip), %rax
|
||||
testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
|
||||
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
||||
jnz 2f
|
||||
# endif
|
||||
leaq __strcasecmp_sse42(%rip), %rax
|
||||
@ -123,13 +125,14 @@ weak_alias (__strcasecmp, strcasecmp)
|
||||
# ifdef USE_AS_STRNCASECMP_L
|
||||
ENTRY(__strncasecmp)
|
||||
.type __strncasecmp, @gnu_indirect_function
|
||||
/* Manually inlined call to __get_cpu_features. */
|
||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||
jne 1f
|
||||
call __init_cpu_features
|
||||
1:
|
||||
# ifdef HAVE_AVX_SUPPORT
|
||||
leaq __strncasecmp_avx(%rip), %rax
|
||||
testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip)
|
||||
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
||||
jnz 2f
|
||||
# endif
|
||||
leaq __strncasecmp_sse42(%rip), %rax
|
||||
|
90
sysdeps/x86_64/multiarch/test-multiarch.c
Normal file
90
sysdeps/x86_64/multiarch/test-multiarch.c
Normal file
@ -0,0 +1,90 @@
|
||||
/* Test CPU feature data.
|
||||
This file is part of the GNU C Library.
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <init-arch.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static char *cpu_flags;
|
||||
|
||||
/* Search for flags in /proc/cpuinfo and store line
|
||||
in cpu_flags. */
|
||||
void
|
||||
get_cpuinfo (void)
|
||||
{
|
||||
FILE *f;
|
||||
char *line = NULL;
|
||||
size_t len = 0;
|
||||
ssize_t read;
|
||||
|
||||
f = fopen ("/proc/cpuinfo", "r");
|
||||
if (f == NULL)
|
||||
{
|
||||
printf ("cannot open /proc/cpuinfo");
|
||||
exit (1);
|
||||
}
|
||||
|
||||
while ((read = getline (&line, &len, f)) != -1)
|
||||
{
|
||||
if (strncmp (line, "flags", 5) == 0)
|
||||
{
|
||||
cpu_flags = strdup (line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose (f);
|
||||
free (line);
|
||||
}
|
||||
|
||||
int
|
||||
check_proc (const char *proc_name, int flag, const char *name)
|
||||
{
|
||||
int found = 0;
|
||||
|
||||
printf ("Checking %s:\n", name);
|
||||
printf (" init-arch %d\n", flag);
|
||||
if (strstr (cpu_flags, proc_name) != NULL)
|
||||
found = 1;
|
||||
printf (" cpuinfo (%s) %d\n", proc_name, found);
|
||||
|
||||
if (found != flag)
|
||||
printf (" *** failure ***\n");
|
||||
|
||||
return (found != flag);
|
||||
}
|
||||
|
||||
static int
|
||||
do_test (int argc, char **argv)
|
||||
{
|
||||
int fails;
|
||||
|
||||
get_cpuinfo ();
|
||||
fails = check_proc ("avx", HAS_AVX, "HAS_AVX");
|
||||
fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4");
|
||||
fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2");
|
||||
fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1");
|
||||
fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3");
|
||||
fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT");
|
||||
|
||||
printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);
|
||||
|
||||
return (fails != 0);
|
||||
}
|
||||
|
||||
#include "../../../test-skeleton.c"
|
Loading…
Reference in New Issue
Block a user