mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-08 18:30:18 +00:00
Enable AVX2 optimized memset only if -mavx2 works
* config.h.in (HAVE_AVX2_SUPPORT): New #undef. * sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and config-cflags-avx2. * sysdeps/x86_64/configure.ac: Likewise. * sysdeps/i386/configure: Regenerated. * sysdeps/x86_64/configure: Likewise. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add memset-avx2 only if config-cflags-avx2 is yes. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is defined. * sysdeps/x86_64/multiarch/memset.S: Define multiple versions only if HAVE_AVX2_SUPPORT is defined. * sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
This commit is contained in:
parent
f6c44d4751
commit
f2fef657d8
17
ChangeLog
17
ChangeLog
@ -1,3 +1,20 @@
|
|||||||
|
2014-07-14 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* config.h.in (HAVE_AVX2_SUPPORT): New #undef.
|
||||||
|
* sysdeps/i386/configure.ac: Set HAVE_AVX2_SUPPORT and
|
||||||
|
config-cflags-avx2.
|
||||||
|
* sysdeps/x86_64/configure.ac: Likewise.
|
||||||
|
* sysdeps/i386/configure: Regenerated.
|
||||||
|
* sysdeps/x86_64/configure: Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
|
||||||
|
memset-avx2 only if config-cflags-avx2 is yes.
|
||||||
|
* sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list):
|
||||||
|
Tests for memset_chk and memset only if HAVE_AVX2_SUPPORT is
|
||||||
|
defined.
|
||||||
|
* sysdeps/x86_64/multiarch/memset.S: Define multiple versions
|
||||||
|
only if HAVE_AVX2_SUPPORT is defined.
|
||||||
|
* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
|
||||||
|
|
||||||
2014-07-14 Alan Modra <amodra@gmail.com>
|
2014-07-14 Alan Modra <amodra@gmail.com>
|
||||||
|
|
||||||
[BZ #17153]
|
[BZ #17153]
|
||||||
|
@ -103,6 +103,9 @@
|
|||||||
/* Define if gcc supports FMA4. */
|
/* Define if gcc supports FMA4. */
|
||||||
#undef HAVE_FMA4_SUPPORT
|
#undef HAVE_FMA4_SUPPORT
|
||||||
|
|
||||||
|
/* Define if gcc supports AVX2. */
|
||||||
|
#undef HAVE_AVX2_SUPPORT
|
||||||
|
|
||||||
/* Define if the compiler\'s exception support is based on libunwind. */
|
/* Define if the compiler\'s exception support is based on libunwind. */
|
||||||
#undef HAVE_CC_WITH_LIBUNWIND
|
#undef HAVE_CC_WITH_LIBUNWIND
|
||||||
|
|
||||||
|
26
sysdeps/i386/configure
vendored
26
sysdeps/i386/configure
vendored
@ -240,6 +240,32 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; }
|
|||||||
config_vars="$config_vars
|
config_vars="$config_vars
|
||||||
config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
|
config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
|
||||||
|
$as_echo_n "checking for AVX2 support... " >&6; }
|
||||||
|
if ${libc_cv_cc_avx2+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null'
|
||||||
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
(eval $ac_try) 2>&5
|
||||||
|
ac_status=$?
|
||||||
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
test $ac_status = 0; }; }; then :
|
||||||
|
libc_cv_cc_avx2=yes
|
||||||
|
else
|
||||||
|
libc_cv_cc_avx2=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5
|
||||||
|
$as_echo "$libc_cv_cc_avx2" >&6; }
|
||||||
|
if test $libc_cv_cc_avx2 = yes; then
|
||||||
|
$as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h
|
||||||
|
|
||||||
|
fi
|
||||||
|
config_vars="$config_vars
|
||||||
|
config-cflags-avx2 = $libc_cv_cc_avx2"
|
||||||
|
|
||||||
$as_echo "#define USE_REGPARMS 1" >>confdefs.h
|
$as_echo "#define USE_REGPARMS 1" >>confdefs.h
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,6 +88,15 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper],
|
|||||||
])
|
])
|
||||||
LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
|
LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
|
||||||
|
|
||||||
|
dnl Check if -mavx2 works.
|
||||||
|
AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl
|
||||||
|
LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no])
|
||||||
|
])
|
||||||
|
if test $libc_cv_cc_avx2 = yes; then
|
||||||
|
AC_DEFINE(HAVE_AVX2_SUPPORT)
|
||||||
|
fi
|
||||||
|
LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2])
|
||||||
|
|
||||||
AC_DEFINE(USE_REGPARMS)
|
AC_DEFINE(USE_REGPARMS)
|
||||||
|
|
||||||
dnl It is always possible to access static and hidden symbols in an
|
dnl It is always possible to access static and hidden symbols in an
|
||||||
|
26
sysdeps/x86_64/configure
vendored
26
sysdeps/x86_64/configure
vendored
@ -249,6 +249,32 @@ if test $libc_cv_asm_mpx == yes; then
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
|
||||||
|
$as_echo_n "checking for AVX2 support... " >&6; }
|
||||||
|
if ${libc_cv_cc_avx2+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
if { ac_try='${CC-cc} -mavx2 -xc /dev/null -S -o /dev/null'
|
||||||
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
(eval $ac_try) 2>&5
|
||||||
|
ac_status=$?
|
||||||
|
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
test $ac_status = 0; }; }; then :
|
||||||
|
libc_cv_cc_avx2=yes
|
||||||
|
else
|
||||||
|
libc_cv_cc_avx2=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_cc_avx2" >&5
|
||||||
|
$as_echo "$libc_cv_cc_avx2" >&6; }
|
||||||
|
if test $libc_cv_cc_avx2 = yes; then
|
||||||
|
$as_echo "#define HAVE_AVX2_SUPPORT 1" >>confdefs.h
|
||||||
|
|
||||||
|
fi
|
||||||
|
config_vars="$config_vars
|
||||||
|
config-cflags-avx2 = $libc_cv_cc_avx2"
|
||||||
|
|
||||||
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
|
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
|
||||||
|
|
||||||
# work around problem with autoconf and empty lines at the end of files
|
# work around problem with autoconf and empty lines at the end of files
|
||||||
|
@ -90,6 +90,15 @@ if test $libc_cv_asm_mpx == yes; then
|
|||||||
AC_DEFINE(HAVE_MPX_SUPPORT)
|
AC_DEFINE(HAVE_MPX_SUPPORT)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
dnl Check if -mavx2 works.
|
||||||
|
AC_CACHE_CHECK(for AVX2 support, libc_cv_cc_avx2, [dnl
|
||||||
|
LIBC_TRY_CC_OPTION([-mavx2], [libc_cv_cc_avx2=yes], [libc_cv_cc_avx2=no])
|
||||||
|
])
|
||||||
|
if test $libc_cv_cc_avx2 = yes; then
|
||||||
|
AC_DEFINE(HAVE_AVX2_SUPPORT)
|
||||||
|
fi
|
||||||
|
LIBC_CONFIG_VAR([config-cflags-avx2], [$libc_cv_cc_avx2])
|
||||||
|
|
||||||
dnl It is always possible to access static and hidden symbols in an
|
dnl It is always possible to access static and hidden symbols in an
|
||||||
dnl position independent way.
|
dnl position independent way.
|
||||||
AC_DEFINE(PI_STATIC_AND_HIDDEN)
|
AC_DEFINE(PI_STATIC_AND_HIDDEN)
|
||||||
|
@ -17,8 +17,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
|
|||||||
strcpy-sse2-unaligned strncpy-sse2-unaligned \
|
strcpy-sse2-unaligned strncpy-sse2-unaligned \
|
||||||
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
||||||
strcat-sse2-unaligned strncat-sse2-unaligned \
|
strcat-sse2-unaligned strncat-sse2-unaligned \
|
||||||
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
|
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned
|
||||||
memset-avx2
|
|
||||||
|
|
||||||
ifeq (yes,$(config-cflags-sse4))
|
ifeq (yes,$(config-cflags-sse4))
|
||||||
sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
|
sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
|
||||||
@ -27,6 +26,10 @@ CFLAGS-strcspn-c.c += -msse4
|
|||||||
CFLAGS-strpbrk-c.c += -msse4
|
CFLAGS-strpbrk-c.c += -msse4
|
||||||
CFLAGS-strspn-c.c += -msse4
|
CFLAGS-strspn-c.c += -msse4
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq (yes,$(config-cflags-avx2))
|
||||||
|
sysdep_routines += memset-avx2
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(subdir),wcsmbs)
|
ifeq ($(subdir),wcsmbs)
|
||||||
|
@ -61,6 +61,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
__memmove_ssse3)
|
__memmove_ssse3)
|
||||||
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
|
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX2_SUPPORT
|
||||||
/* Support sysdeps/x86_64/multiarch/memset_chk.S. */
|
/* Support sysdeps/x86_64/multiarch/memset_chk.S. */
|
||||||
IFUNC_IMPL (i, name, __memset_chk,
|
IFUNC_IMPL (i, name, __memset_chk,
|
||||||
IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
|
IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2)
|
||||||
@ -71,6 +72,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
IFUNC_IMPL (i, name, memset,
|
IFUNC_IMPL (i, name, memset,
|
||||||
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
|
||||||
IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2))
|
IFUNC_IMPL_ADD (array, i, memset, HAS_AVX2, __memset_avx2))
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
|
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
|
||||||
IFUNC_IMPL (i, name, stpncpy,
|
IFUNC_IMPL (i, name, stpncpy,
|
||||||
|
@ -17,12 +17,13 @@
|
|||||||
License along with the GNU C Library; if not, see
|
License along with the GNU C Library; if not, see
|
||||||
<http://www.gnu.org/licenses/>. */
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#ifdef HAVE_AVX2_SUPPORT
|
||||||
#include <sysdep.h>
|
#include <sysdep.h>
|
||||||
#include <shlib-compat.h>
|
#include <shlib-compat.h>
|
||||||
#include <init-arch.h>
|
#include <init-arch.h>
|
||||||
|
|
||||||
/* Define multiple versions only for the definition in lib. */
|
/* Define multiple versions only for the definition in lib. */
|
||||||
#ifndef NOT_IN_libc
|
# ifndef NOT_IN_libc
|
||||||
ENTRY(memset)
|
ENTRY(memset)
|
||||||
.type memset, @gnu_indirect_function
|
.type memset, @gnu_indirect_function
|
||||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||||
@ -34,26 +35,27 @@ ENTRY(memset)
|
|||||||
leaq __memset_avx2(%rip), %rax
|
leaq __memset_avx2(%rip), %rax
|
||||||
2: ret
|
2: ret
|
||||||
END(memset)
|
END(memset)
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
#if !defined NOT_IN_libc
|
# if !defined NOT_IN_libc
|
||||||
# undef memset
|
# undef memset
|
||||||
# define memset __memset_sse2
|
# define memset __memset_sse2
|
||||||
|
|
||||||
# undef __memset_chk
|
# undef __memset_chk
|
||||||
# define __memset_chk __memset_chk_sse2
|
# define __memset_chk __memset_chk_sse2
|
||||||
|
|
||||||
# ifdef SHARED
|
# ifdef SHARED
|
||||||
# undef libc_hidden_builtin_def
|
# undef libc_hidden_builtin_def
|
||||||
/* It doesn't make sense to send libc-internal memset calls through a PLT.
|
/* It doesn't make sense to send libc-internal memset calls through a PLT.
|
||||||
The speedup we get from using GPR instruction is likely eaten away
|
The speedup we get from using GPR instruction is likely eaten away
|
||||||
by the indirect call in the PLT. */
|
by the indirect call in the PLT. */
|
||||||
# define libc_hidden_builtin_def(name) \
|
# define libc_hidden_builtin_def(name) \
|
||||||
.globl __GI_memset; __GI_memset = __memset_sse2
|
.globl __GI_memset; __GI_memset = __memset_sse2
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
# undef strong_alias
|
# undef strong_alias
|
||||||
# define strong_alias(original, alias)
|
# define strong_alias(original, alias)
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "../memset.S"
|
#include "../memset.S"
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
/* Define multiple versions only for the definition in lib. */
|
/* Define multiple versions only for the definition in lib. */
|
||||||
#ifndef NOT_IN_libc
|
#ifndef NOT_IN_libc
|
||||||
# ifdef SHARED
|
# if defined SHARED && defined HAVE_AVX2_SUPPORT
|
||||||
ENTRY(__memset_chk)
|
ENTRY(__memset_chk)
|
||||||
.type __memset_chk, @gnu_indirect_function
|
.type __memset_chk, @gnu_indirect_function
|
||||||
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
|
||||||
|
Loading…
Reference in New Issue
Block a user