mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-09 23:00:07 +00:00
x86-64: Optimize wmemset with SSE2/AVX2/AVX512
The difference between memset and wmemset is byte vs int. Add stubs to SSE2/AVX2/AVX512 memset for wmemset with updated constant and size: SSE2 wmemset: shl $0x2,%rdx movd %esi,%xmm0 mov %rdi,%rax pshufd $0x0,%xmm0,%xmm0 jmp entry_from_wmemset SSE2 memset: movd %esi,%xmm0 mov %rdi,%rax punpcklbw %xmm0,%xmm0 punpcklwd %xmm0,%xmm0 pshufd $0x0,%xmm0,%xmm0 entry_from_wmemset: Since the ERMS versions of wmemset requires "rep stosl" instead of "rep stosb", only the vector store stubs of SSE2/AVX2/AVX512 wmemset are added. The SSE2 wmemset is about 3X faster and the AVX2 wmemset is about 6X faster on Haswell. * include/wchar.h (__wmemset_chk): New. * sysdeps/x86_64/memset.S (VDUP_TO_VEC0_AND_SET_RETURN): Renamed to MEMSET_VDUP_TO_VEC0_AND_SET_RETURN. (WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New. (WMEMSET_CHK_SYMBOL): Likewise. (WMEMSET_SYMBOL): Likewise. (__wmemset): Add hidden definition. (wmemset): Add weak hidden definition. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add wmemset_chk-nonshared. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add __wmemset_sse2_unaligned, __wmemset_avx2_unaligned, __wmemset_avx512_unaligned, __wmemset_chk_sse2_unaligned, __wmemset_chk_avx2_unaligned and __wmemset_chk_avx512_unaligned. * sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S (VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ... (MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This. (WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New. (WMEMSET_SYMBOL): Likewise. * sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S (VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ... (MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This. (WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New. (WMEMSET_SYMBOL): Likewise. * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Updated. (WMEMSET_CHK_SYMBOL): New. (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)): Likewise. (WMEMSET_SYMBOL (__wmemset, unaligned)): Likewise. * sysdeps/x86_64/multiarch/memset.S (WMEMSET_SYMBOL): New. (libc_hidden_builtin_def): Also define __GI_wmemset and __GI___wmemset. (weak_alias): New. * sysdeps/x86_64/multiarch/wmemset.c: New file. * sysdeps/x86_64/multiarch/wmemset.h: Likewise. * sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S: Likewise. * sysdeps/x86_64/multiarch/wmemset_chk.c: Likewise. * sysdeps/x86_64/wmemset.c: Likewise. * sysdeps/x86_64/wmemset_chk.c: Likewise.
This commit is contained in:
parent
9cd30491dd
commit
ef9c4cb6c7
42
ChangeLog
42
ChangeLog
@ -1,3 +1,45 @@
|
|||||||
|
2017-06-05 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* include/wchar.h (__wmemset_chk): New.
|
||||||
|
* sysdeps/x86_64/memset.S (VDUP_TO_VEC0_AND_SET_RETURN): Renamed
|
||||||
|
to MEMSET_VDUP_TO_VEC0_AND_SET_RETURN.
|
||||||
|
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
|
||||||
|
(WMEMSET_CHK_SYMBOL): Likewise.
|
||||||
|
(WMEMSET_SYMBOL): Likewise.
|
||||||
|
(__wmemset): Add hidden definition.
|
||||||
|
(wmemset): Add weak hidden definition.
|
||||||
|
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
|
||||||
|
wmemset_chk-nonshared.
|
||||||
|
* sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||||||
|
(__libc_ifunc_impl_list): Add __wmemset_sse2_unaligned,
|
||||||
|
__wmemset_avx2_unaligned, __wmemset_avx512_unaligned,
|
||||||
|
__wmemset_chk_sse2_unaligned, __wmemset_chk_avx2_unaligned
|
||||||
|
and __wmemset_chk_avx512_unaligned.
|
||||||
|
* sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
|
||||||
|
(VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ...
|
||||||
|
(MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This.
|
||||||
|
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
|
||||||
|
(WMEMSET_SYMBOL): Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
|
||||||
|
(VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ...
|
||||||
|
(MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This.
|
||||||
|
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
|
||||||
|
(WMEMSET_SYMBOL): Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Updated.
|
||||||
|
(WMEMSET_CHK_SYMBOL): New.
|
||||||
|
(WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)): Likewise.
|
||||||
|
(WMEMSET_SYMBOL (__wmemset, unaligned)): Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/memset.S (WMEMSET_SYMBOL): New.
|
||||||
|
(libc_hidden_builtin_def): Also define __GI_wmemset and
|
||||||
|
__GI___wmemset.
|
||||||
|
(weak_alias): New.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemset.c: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemset.h: Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S: Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/wmemset_chk.c: Likewise.
|
||||||
|
* sysdeps/x86_64/wmemset.c: Likewise.
|
||||||
|
* sysdeps/x86_64/wmemset_chk.c: Likewise.
|
||||||
|
|
||||||
2017-06-05 Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
2017-06-05 Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
H.J. Lu <hongjiu.lu@intel.com>
|
H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
@ -157,6 +157,9 @@ extern wchar_t *__wmemmove (wchar_t *__s1, const wchar_t *__s2,
|
|||||||
extern wchar_t *__wcschrnul (const wchar_t *__s, wchar_t __wc)
|
extern wchar_t *__wcschrnul (const wchar_t *__s, wchar_t __wc)
|
||||||
__attribute_pure__;
|
__attribute_pure__;
|
||||||
|
|
||||||
|
extern wchar_t *__wmemset_chk (wchar_t *__s, wchar_t __c, size_t __n,
|
||||||
|
size_t __ns) __THROW;
|
||||||
|
|
||||||
extern int __vfwscanf (__FILE *__restrict __s,
|
extern int __vfwscanf (__FILE *__restrict __s,
|
||||||
const wchar_t *__restrict __format,
|
const wchar_t *__restrict __format,
|
||||||
__gnuc_va_list __arg)
|
__gnuc_va_list __arg)
|
||||||
|
@ -26,13 +26,18 @@
|
|||||||
#define VMOVU movdqu
|
#define VMOVU movdqu
|
||||||
#define VMOVA movdqa
|
#define VMOVA movdqa
|
||||||
|
|
||||||
#define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
#define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||||||
movd d, %xmm0; \
|
movd d, %xmm0; \
|
||||||
movq r, %rax; \
|
movq r, %rax; \
|
||||||
punpcklbw %xmm0, %xmm0; \
|
punpcklbw %xmm0, %xmm0; \
|
||||||
punpcklwd %xmm0, %xmm0; \
|
punpcklwd %xmm0, %xmm0; \
|
||||||
pshufd $0, %xmm0, %xmm0
|
pshufd $0, %xmm0, %xmm0
|
||||||
|
|
||||||
|
#define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||||||
|
movd d, %xmm0; \
|
||||||
|
movq r, %rax; \
|
||||||
|
pshufd $0, %xmm0, %xmm0
|
||||||
|
|
||||||
#define SECTION(p) p
|
#define SECTION(p) p
|
||||||
|
|
||||||
#ifndef MEMSET_SYMBOL
|
#ifndef MEMSET_SYMBOL
|
||||||
@ -40,10 +45,21 @@
|
|||||||
# define MEMSET_SYMBOL(p,s) memset
|
# define MEMSET_SYMBOL(p,s) memset
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef WMEMSET_SYMBOL
|
||||||
|
# define WMEMSET_CHK_SYMBOL(p,s) p
|
||||||
|
# define WMEMSET_SYMBOL(p,s) __wmemset
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "multiarch/memset-vec-unaligned-erms.S"
|
#include "multiarch/memset-vec-unaligned-erms.S"
|
||||||
|
|
||||||
libc_hidden_builtin_def (memset)
|
libc_hidden_builtin_def (memset)
|
||||||
|
|
||||||
|
#if IS_IN (libc)
|
||||||
|
libc_hidden_def (__wmemset)
|
||||||
|
weak_alias (__wmemset, wmemset)
|
||||||
|
libc_hidden_weak (wmemset)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined SHARED && IS_IN (libc) && !defined USE_MULTIARCH
|
#if defined SHARED && IS_IN (libc) && !defined USE_MULTIARCH
|
||||||
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
|
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
|
||||||
.section .gnu.warning.__memset_zero_constant_len_parameter
|
.section .gnu.warning.__memset_zero_constant_len_parameter
|
||||||
|
@ -32,3 +32,7 @@ endif
|
|||||||
ifeq ($(subdir),wcsmbs)
|
ifeq ($(subdir),wcsmbs)
|
||||||
sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
|
sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(subdir),debug)
|
||||||
|
sysdep_routines += wmemset_chk-nonshared
|
||||||
|
endif
|
||||||
|
@ -300,6 +300,17 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
__wmemcmp_ssse3)
|
__wmemcmp_ssse3)
|
||||||
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
|
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
|
||||||
|
|
||||||
|
/* Support sysdeps/x86_64/multiarch/wmemset.S. */
|
||||||
|
IFUNC_IMPL (i, name, wmemset,
|
||||||
|
IFUNC_IMPL_ADD (array, i, wmemset, 1,
|
||||||
|
__wmemset_sse2_unaligned)
|
||||||
|
IFUNC_IMPL_ADD (array, i, wmemset,
|
||||||
|
HAS_ARCH_FEATURE (AVX2_Usable),
|
||||||
|
__wmemset_avx2_unaligned)
|
||||||
|
IFUNC_IMPL_ADD (array, i, wmemset,
|
||||||
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
|
__wmemset_avx512_unaligned))
|
||||||
|
|
||||||
#ifdef SHARED
|
#ifdef SHARED
|
||||||
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
|
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
|
||||||
IFUNC_IMPL (i, name, __memcpy_chk,
|
IFUNC_IMPL (i, name, __memcpy_chk,
|
||||||
@ -417,6 +428,17 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|||||||
IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
|
IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
|
||||||
__strncmp_ssse3)
|
__strncmp_ssse3)
|
||||||
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
|
IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
|
||||||
|
|
||||||
|
/* Support sysdeps/x86_64/multiarch/wmemset_chk.S. */
|
||||||
|
IFUNC_IMPL (i, name, __wmemset_chk,
|
||||||
|
IFUNC_IMPL_ADD (array, i, __wmemset_chk, 1,
|
||||||
|
__wmemset_chk_sse2_unaligned)
|
||||||
|
IFUNC_IMPL_ADD (array, i, __wmemset_chk,
|
||||||
|
HAS_ARCH_FEATURE (AVX2_Usable),
|
||||||
|
__wmemset_chk_avx2_unaligned)
|
||||||
|
IFUNC_IMPL_ADD (array, i, __wmemset_chk,
|
||||||
|
HAS_ARCH_FEATURE (AVX512F_Usable),
|
||||||
|
__wmemset_chk_avx512_unaligned))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return i;
|
return i;
|
||||||
|
@ -4,13 +4,19 @@
|
|||||||
# define VMOVU vmovdqu
|
# define VMOVU vmovdqu
|
||||||
# define VMOVA vmovdqa
|
# define VMOVA vmovdqa
|
||||||
|
|
||||||
# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||||||
vmovd d, %xmm0; \
|
vmovd d, %xmm0; \
|
||||||
movq r, %rax; \
|
movq r, %rax; \
|
||||||
vpbroadcastb %xmm0, %ymm0
|
vpbroadcastb %xmm0, %ymm0
|
||||||
|
|
||||||
|
# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||||||
|
vmovd d, %xmm0; \
|
||||||
|
movq r, %rax; \
|
||||||
|
vpbroadcastd %xmm0, %ymm0
|
||||||
|
|
||||||
# define SECTION(p) p##.avx
|
# define SECTION(p) p##.avx
|
||||||
# define MEMSET_SYMBOL(p,s) p##_avx2_##s
|
# define MEMSET_SYMBOL(p,s) p##_avx2_##s
|
||||||
|
# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
|
||||||
|
|
||||||
# include "memset-vec-unaligned-erms.S"
|
# include "memset-vec-unaligned-erms.S"
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,14 +4,21 @@
|
|||||||
# define VMOVU vmovdqu64
|
# define VMOVU vmovdqu64
|
||||||
# define VMOVA vmovdqa64
|
# define VMOVA vmovdqa64
|
||||||
|
|
||||||
# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||||||
vmovd d, %xmm0; \
|
vmovd d, %xmm0; \
|
||||||
movq r, %rax; \
|
movq r, %rax; \
|
||||||
vpbroadcastb %xmm0, %xmm0; \
|
vpbroadcastb %xmm0, %xmm0; \
|
||||||
vpbroadcastq %xmm0, %zmm0
|
vpbroadcastq %xmm0, %zmm0
|
||||||
|
|
||||||
|
# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||||||
|
vmovd d, %xmm0; \
|
||||||
|
movq r, %rax; \
|
||||||
|
vpbroadcastd %xmm0, %xmm0; \
|
||||||
|
vpbroadcastq %xmm0, %zmm0
|
||||||
|
|
||||||
# define SECTION(p) p##.avx512
|
# define SECTION(p) p##.avx512
|
||||||
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
|
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
|
||||||
|
# define WMEMSET_SYMBOL(p,s) p##_avx512_##s
|
||||||
|
|
||||||
# include "memset-vec-unaligned-erms.S"
|
# include "memset-vec-unaligned-erms.S"
|
||||||
#endif
|
#endif
|
||||||
|
@ -30,6 +30,10 @@
|
|||||||
# define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s)
|
# define MEMSET_CHK_SYMBOL(p,s) MEMSET_SYMBOL(p, s)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef WMEMSET_CHK_SYMBOL
|
||||||
|
# define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef VZEROUPPER
|
#ifndef VZEROUPPER
|
||||||
# if VEC_SIZE > 16
|
# if VEC_SIZE > 16
|
||||||
# define VZEROUPPER vzeroupper
|
# define VZEROUPPER vzeroupper
|
||||||
@ -79,6 +83,21 @@ END (__bzero)
|
|||||||
weak_alias (__bzero, bzero)
|
weak_alias (__bzero, bzero)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if IS_IN (libc)
|
||||||
|
# if defined SHARED
|
||||||
|
ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
|
||||||
|
cmpq %rdx, %rcx
|
||||||
|
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||||
|
END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
|
||||||
|
# endif
|
||||||
|
|
||||||
|
ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned))
|
||||||
|
shlq $2, %rdx
|
||||||
|
WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
||||||
|
jmp L(entry_from_bzero)
|
||||||
|
END (WMEMSET_SYMBOL (__wmemset, unaligned))
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined SHARED && IS_IN (libc)
|
#if defined SHARED && IS_IN (libc)
|
||||||
ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
|
ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
|
||||||
cmpq %rdx, %rcx
|
cmpq %rdx, %rcx
|
||||||
@ -87,8 +106,7 @@ END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
ENTRY (MEMSET_SYMBOL (__memset, unaligned))
|
ENTRY (MEMSET_SYMBOL (__memset, unaligned))
|
||||||
L(memset_entry):
|
MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
||||||
VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
|
||||||
L(entry_from_bzero):
|
L(entry_from_bzero):
|
||||||
cmpq $VEC_SIZE, %rdx
|
cmpq $VEC_SIZE, %rdx
|
||||||
jb L(less_vec)
|
jb L(less_vec)
|
||||||
@ -132,7 +150,7 @@ END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
|
ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
|
||||||
VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
|
||||||
cmpq $VEC_SIZE, %rdx
|
cmpq $VEC_SIZE, %rdx
|
||||||
jb L(less_vec)
|
jb L(less_vec)
|
||||||
cmpq $(VEC_SIZE * 2), %rdx
|
cmpq $(VEC_SIZE * 2), %rdx
|
||||||
|
@ -58,6 +58,7 @@ END(memset)
|
|||||||
|
|
||||||
#if IS_IN (libc)
|
#if IS_IN (libc)
|
||||||
# define MEMSET_SYMBOL(p,s) p##_sse2_##s
|
# define MEMSET_SYMBOL(p,s) p##_sse2_##s
|
||||||
|
# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
|
||||||
|
|
||||||
# ifdef SHARED
|
# ifdef SHARED
|
||||||
# undef libc_hidden_builtin_def
|
# undef libc_hidden_builtin_def
|
||||||
@ -65,9 +66,15 @@ END(memset)
|
|||||||
The speedup we get from using SSE2 instructions is likely eaten away
|
The speedup we get from using SSE2 instructions is likely eaten away
|
||||||
by the indirect call in the PLT. */
|
by the indirect call in the PLT. */
|
||||||
# define libc_hidden_builtin_def(name) \
|
# define libc_hidden_builtin_def(name) \
|
||||||
.globl __GI_memset; __GI_memset = __memset_sse2_unaligned
|
.globl __GI_memset; __GI_memset = __memset_sse2_unaligned; \
|
||||||
|
.globl __GI_wmemset; __GI_wmemset = __wmemset_sse2_unaligned; \
|
||||||
|
.globl __GI___wmemset; __GI___wmemset = __wmemset_sse2_unaligned
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# undef weak_alias
|
||||||
|
# define weak_alias(original, alias) \
|
||||||
|
.weak bzero; bzero = __bzero
|
||||||
|
|
||||||
# undef strong_alias
|
# undef strong_alias
|
||||||
# define strong_alias(original, alias)
|
# define strong_alias(original, alias)
|
||||||
#endif
|
#endif
|
||||||
|
33
sysdeps/x86_64/multiarch/wmemset.c
Normal file
33
sysdeps/x86_64/multiarch/wmemset.c
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/* Multiple versions of wmemset.
|
||||||
|
All versions must be listed in ifunc-impl-list.c.
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in libc. */
|
||||||
|
#if IS_IN (libc)
|
||||||
|
# define wmemset __redirect_wmemset
|
||||||
|
# define __wmemset __redirect___wmemset
|
||||||
|
# include <wchar.h>
|
||||||
|
# undef wmemset
|
||||||
|
# undef __wmemset
|
||||||
|
|
||||||
|
# define SYMBOL_NAME wmemset
|
||||||
|
# include "wmemset.h"
|
||||||
|
|
||||||
|
libc_ifunc_redirected (__redirect_wmemset, __wmemset, IFUNC_SELECTOR ());
|
||||||
|
weak_alias (__wmemset, wmemset)
|
||||||
|
#endif
|
42
sysdeps/x86_64/multiarch/wmemset.h
Normal file
42
sysdeps/x86_64/multiarch/wmemset.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/* Common definition for wmemset/wmemset_chk ifunc selections.
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
|
||||||
|
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
|
||||||
|
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
|
||||||
|
|
||||||
|
static inline void *
|
||||||
|
IFUNC_SELECTOR (void)
|
||||||
|
{
|
||||||
|
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||||
|
|
||||||
|
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||||||
|
&& CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
|
||||||
|
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||||||
|
{
|
||||||
|
if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
|
||||||
|
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
||||||
|
return OPTIMIZE (avx512_unaligned);
|
||||||
|
else
|
||||||
|
return OPTIMIZE (avx2_unaligned);
|
||||||
|
}
|
||||||
|
|
||||||
|
return OPTIMIZE (sse2_unaligned);
|
||||||
|
}
|
21
sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S
Normal file
21
sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
/* Non-shared version of wmemset_chk for x86-64.
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#if IS_IN (libc) && !defined SHARED
|
||||||
|
# include "../wmemset_chk.S"
|
||||||
|
#endif
|
31
sysdeps/x86_64/multiarch/wmemset_chk.c
Normal file
31
sysdeps/x86_64/multiarch/wmemset_chk.c
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/* Multiple versions of wmemset_chk.
|
||||||
|
All versions must be listed in ifunc-impl-list.c.
|
||||||
|
Copyright (C) 2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in libc.so. */
|
||||||
|
#if IS_IN (libc) && defined SHARED
|
||||||
|
# define __wmemset_chk __redirect_wmemset_chk
|
||||||
|
# include <wchar.h>
|
||||||
|
# undef __wmemset_chk
|
||||||
|
|
||||||
|
# define SYMBOL_NAME wmemset_chk
|
||||||
|
# include "wmemset.h"
|
||||||
|
|
||||||
|
libc_ifunc_redirected (__redirect_wmemset_chk, __wmemset_chk,
|
||||||
|
IFUNC_SELECTOR ());
|
||||||
|
#endif
|
1
sysdeps/x86_64/wmemset.S
Normal file
1
sysdeps/x86_64/wmemset.S
Normal file
@ -0,0 +1 @@
|
|||||||
|
/* Implemented in memset.S. */
|
33
sysdeps/x86_64/wmemset_chk.S
Normal file
33
sysdeps/x86_64/wmemset_chk.S
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/* Checking wmemset for x86-64.
|
||||||
|
Copyright (C) 2004-2017 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include "asm-syntax.h"
|
||||||
|
|
||||||
|
#ifndef SHARED
|
||||||
|
/* For libc.so this is defined in wmemset.S.
|
||||||
|
For libc.a, this is a separate source to avoid
|
||||||
|
wmemset bringing in __chk_fail and all routines
|
||||||
|
it calls. */
|
||||||
|
.text
|
||||||
|
ENTRY (__wmemset_chk)
|
||||||
|
cmpq %rdx, %rcx
|
||||||
|
jb __chk_fail
|
||||||
|
jmp wmemset
|
||||||
|
END (__wmemset_chk)
|
||||||
|
#endif
|
Loading…
Reference in New Issue
Block a user