2009-06-23 03:38:41 +00:00
|
|
|
ifeq ($(subdir),string)
|
2011-06-24 19:14:22 +00:00
|
|
|
|
2022-02-05 19:52:33 +00:00
|
|
|
sysdep_routines += \
|
|
|
|
memchr-avx2 \
|
|
|
|
memchr-avx2-rtm \
|
|
|
|
memchr-evex \
|
2022-08-18 13:48:07 +00:00
|
|
|
memchr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
memchr-evex-rtm \
|
|
|
|
memchr-sse2 \
|
|
|
|
memcmp-avx2-movbe \
|
|
|
|
memcmp-avx2-movbe-rtm \
|
|
|
|
memcmp-evex-movbe \
|
|
|
|
memcmp-sse2 \
|
|
|
|
memcmpeq-avx2 \
|
|
|
|
memcmpeq-avx2-rtm \
|
|
|
|
memcmpeq-evex \
|
|
|
|
memcmpeq-sse2 \
|
|
|
|
memmove-avx-unaligned-erms \
|
|
|
|
memmove-avx-unaligned-erms-rtm \
|
|
|
|
memmove-avx512-no-vzeroupper \
|
|
|
|
memmove-avx512-unaligned-erms \
|
2022-06-29 23:07:15 +00:00
|
|
|
memmove-erms \
|
2022-02-05 19:52:33 +00:00
|
|
|
memmove-evex-unaligned-erms \
|
|
|
|
memmove-sse2-unaligned-erms \
|
|
|
|
memmove-ssse3 \
|
|
|
|
memrchr-avx2 \
|
|
|
|
memrchr-avx2-rtm \
|
|
|
|
memrchr-evex \
|
|
|
|
memrchr-sse2 \
|
|
|
|
memset-avx2-unaligned-erms \
|
|
|
|
memset-avx2-unaligned-erms-rtm \
|
|
|
|
memset-avx512-no-vzeroupper \
|
|
|
|
memset-avx512-unaligned-erms \
|
2022-06-29 23:07:05 +00:00
|
|
|
memset-erms \
|
2022-02-05 19:52:33 +00:00
|
|
|
memset-evex-unaligned-erms \
|
|
|
|
memset-sse2-unaligned-erms \
|
|
|
|
rawmemchr-avx2 \
|
|
|
|
rawmemchr-avx2-rtm \
|
|
|
|
rawmemchr-evex \
|
2022-08-18 13:48:07 +00:00
|
|
|
rawmemchr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
rawmemchr-evex-rtm \
|
|
|
|
rawmemchr-sse2 \
|
|
|
|
stpcpy-avx2 \
|
|
|
|
stpcpy-avx2-rtm \
|
|
|
|
stpcpy-evex \
|
|
|
|
stpcpy-sse2 \
|
|
|
|
stpcpy-sse2-unaligned \
|
|
|
|
stpncpy-avx2 \
|
|
|
|
stpncpy-avx2-rtm \
|
|
|
|
stpncpy-evex \
|
|
|
|
stpncpy-sse2-unaligned \
|
2022-03-24 23:56:12 +00:00
|
|
|
strcasecmp_l-avx2 \
|
|
|
|
strcasecmp_l-avx2-rtm \
|
2022-03-24 23:56:13 +00:00
|
|
|
strcasecmp_l-evex \
|
2022-02-05 19:52:33 +00:00
|
|
|
strcasecmp_l-sse2 \
|
|
|
|
strcasecmp_l-sse4_2 \
|
|
|
|
strcat-avx2 \
|
|
|
|
strcat-avx2-rtm \
|
|
|
|
strcat-evex \
|
|
|
|
strcat-sse2 \
|
|
|
|
strcat-sse2-unaligned \
|
|
|
|
strchr-avx2 \
|
|
|
|
strchr-avx2-rtm \
|
|
|
|
strchr-evex \
|
2022-07-26 20:54:56 +00:00
|
|
|
strchr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strchr-sse2 \
|
|
|
|
strchr-sse2-no-bsf \
|
|
|
|
strchrnul-avx2 \
|
|
|
|
strchrnul-avx2-rtm \
|
|
|
|
strchrnul-evex \
|
2022-07-26 20:54:56 +00:00
|
|
|
strchrnul-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strchrnul-sse2 \
|
|
|
|
strcmp-avx2 \
|
|
|
|
strcmp-avx2-rtm \
|
|
|
|
strcmp-evex \
|
|
|
|
strcmp-sse2 \
|
|
|
|
strcmp-sse2-unaligned \
|
|
|
|
strcmp-sse4_2 \
|
|
|
|
strcpy-avx2 \
|
|
|
|
strcpy-avx2-rtm \
|
|
|
|
strcpy-evex \
|
|
|
|
strcpy-sse2 \
|
|
|
|
strcpy-sse2-unaligned \
|
2022-06-16 22:07:12 +00:00
|
|
|
strcspn-sse4 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strlen-avx2 \
|
|
|
|
strlen-avx2-rtm \
|
|
|
|
strlen-evex \
|
2022-02-28 00:39:47 +00:00
|
|
|
strlen-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strlen-sse2 \
|
2022-03-24 23:56:12 +00:00
|
|
|
strncase_l-avx2 \
|
|
|
|
strncase_l-avx2-rtm \
|
2022-03-24 23:56:13 +00:00
|
|
|
strncase_l-evex \
|
2022-02-05 19:52:33 +00:00
|
|
|
strncase_l-sse2 \
|
|
|
|
strncase_l-sse4_2 \
|
|
|
|
strncat-avx2 \
|
|
|
|
strncat-avx2-rtm \
|
|
|
|
strncat-evex \
|
|
|
|
strncat-sse2-unaligned \
|
|
|
|
strncmp-avx2 \
|
|
|
|
strncmp-avx2-rtm \
|
|
|
|
strncmp-evex \
|
|
|
|
strncmp-sse2 \
|
|
|
|
strncmp-sse4_2 \
|
|
|
|
strncpy-avx2 \
|
|
|
|
strncpy-avx2-rtm \
|
|
|
|
strncpy-evex \
|
|
|
|
strncpy-sse2-unaligned \
|
|
|
|
strnlen-avx2 \
|
|
|
|
strnlen-avx2-rtm \
|
|
|
|
strnlen-evex \
|
2022-02-28 00:39:47 +00:00
|
|
|
strnlen-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strnlen-sse2 \
|
2022-06-16 22:07:12 +00:00
|
|
|
strpbrk-sse4 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strrchr-avx2 \
|
|
|
|
strrchr-avx2-rtm \
|
|
|
|
strrchr-evex \
|
2022-08-09 14:57:29 +00:00
|
|
|
strrchr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strrchr-sse2 \
|
2022-06-16 22:07:12 +00:00
|
|
|
strspn-sse4 \
|
2022-06-06 19:17:43 +00:00
|
|
|
strstr-avx512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
strstr-sse2-unaligned \
|
|
|
|
varshift \
|
|
|
|
# sysdep_routines
|
2022-06-16 22:07:12 +00:00
|
|
|
|
|
|
|
CFLAGS-strcspn-sse4.c += -msse4
|
|
|
|
CFLAGS-strpbrk-sse4.c += -msse4
|
|
|
|
CFLAGS-strspn-sse4.c += -msse4
|
|
|
|
|
2022-06-06 19:17:43 +00:00
|
|
|
CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
|
2009-06-23 03:38:41 +00:00
|
|
|
endif
|
2011-12-17 19:39:23 +00:00
|
|
|
|
|
|
|
ifeq ($(subdir),wcsmbs)
|
2022-02-05 19:52:33 +00:00
|
|
|
sysdep_routines += \
|
|
|
|
wcschr-avx2 \
|
|
|
|
wcschr-avx2-rtm \
|
|
|
|
wcschr-evex \
|
2022-07-26 20:54:56 +00:00
|
|
|
wcschr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
wcschr-sse2 \
|
|
|
|
wcscmp-avx2 \
|
|
|
|
wcscmp-avx2-rtm \
|
|
|
|
wcscmp-evex \
|
|
|
|
wcscmp-sse2 \
|
|
|
|
wcscpy-ssse3 \
|
|
|
|
wcslen-avx2 \
|
|
|
|
wcslen-avx2-rtm \
|
|
|
|
wcslen-evex \
|
2022-02-28 00:39:47 +00:00
|
|
|
wcslen-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
wcslen-sse2 \
|
|
|
|
wcslen-sse4_1 \
|
|
|
|
wcsncmp-avx2 \
|
|
|
|
wcsncmp-avx2-rtm \
|
|
|
|
wcsncmp-evex \
|
|
|
|
wcsnlen-avx2 \
|
|
|
|
wcsnlen-avx2-rtm \
|
|
|
|
wcsnlen-evex \
|
2022-02-28 00:39:47 +00:00
|
|
|
wcsnlen-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
wcsnlen-sse4_1 \
|
|
|
|
wcsrchr-avx2 \
|
|
|
|
wcsrchr-avx2-rtm \
|
|
|
|
wcsrchr-evex \
|
2022-08-09 14:57:29 +00:00
|
|
|
wcsrchr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
wcsrchr-sse2 \
|
|
|
|
wmemchr-avx2 \
|
|
|
|
wmemchr-avx2-rtm \
|
|
|
|
wmemchr-evex \
|
2022-08-18 13:48:07 +00:00
|
|
|
wmemchr-evex512 \
|
2022-02-05 19:52:33 +00:00
|
|
|
wmemchr-evex-rtm \
|
|
|
|
wmemchr-sse2 \
|
|
|
|
wmemcmp-avx2-movbe \
|
|
|
|
wmemcmp-avx2-movbe-rtm \
|
|
|
|
wmemcmp-evex-movbe \
|
2022-04-15 17:27:59 +00:00
|
|
|
wmemcmp-sse2 \
|
2022-02-05 19:52:33 +00:00
|
|
|
# sysdep_routines
|
2011-12-17 19:39:23 +00:00
|
|
|
endif
|
x86-64: Optimize wmemset with SSE2/AVX2/AVX512
The difference between memset and wmemset is byte vs int. Add stubs
to SSE2/AVX2/AVX512 memset for wmemset with updated constant and size:
SSE2 wmemset:
shl $0x2,%rdx
movd %esi,%xmm0
mov %rdi,%rax
pshufd $0x0,%xmm0,%xmm0
jmp entry_from_wmemset
SSE2 memset:
movd %esi,%xmm0
mov %rdi,%rax
punpcklbw %xmm0,%xmm0
punpcklwd %xmm0,%xmm0
pshufd $0x0,%xmm0,%xmm0
entry_from_wmemset:
Since the ERMS versions of wmemset requires "rep stosl" instead of
"rep stosb", only the vector store stubs of SSE2/AVX2/AVX512 wmemset
are added. The SSE2 wmemset is about 3X faster and the AVX2 wmemset
is about 6X faster on Haswell.
* include/wchar.h (__wmemset_chk): New.
* sysdeps/x86_64/memset.S (VDUP_TO_VEC0_AND_SET_RETURN): Renamed
to MEMSET_VDUP_TO_VEC0_AND_SET_RETURN.
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
(WMEMSET_CHK_SYMBOL): Likewise.
(WMEMSET_SYMBOL): Likewise.
(__wmemset): Add hidden definition.
(wmemset): Add weak hidden definition.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
wmemset_chk-nonshared.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add __wmemset_sse2_unaligned,
__wmemset_avx2_unaligned, __wmemset_avx512_unaligned,
__wmemset_chk_sse2_unaligned, __wmemset_chk_avx2_unaligned
and __wmemset_chk_avx512_unaligned.
* sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
(VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ...
(MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This.
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
(WMEMSET_SYMBOL): Likewise.
* sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
(VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ...
(MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This.
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
(WMEMSET_SYMBOL): Likewise.
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Updated.
(WMEMSET_CHK_SYMBOL): New.
(WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)): Likewise.
(WMEMSET_SYMBOL (__wmemset, unaligned)): Likewise.
* sysdeps/x86_64/multiarch/memset.S (WMEMSET_SYMBOL): New.
(libc_hidden_builtin_def): Also define __GI_wmemset and
__GI___wmemset.
(weak_alias): New.
* sysdeps/x86_64/multiarch/wmemset.c: New file.
* sysdeps/x86_64/multiarch/wmemset.h: Likewise.
* sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S: Likewise.
* sysdeps/x86_64/multiarch/wmemset_chk.c: Likewise.
* sysdeps/x86_64/wmemset.c: Likewise.
* sysdeps/x86_64/wmemset_chk.c: Likewise.
2017-06-05 18:09:48 +00:00
|
|
|
|
|
|
|
ifeq ($(subdir),debug)
|
2022-02-05 19:52:33 +00:00
|
|
|
sysdep_routines += \
|
|
|
|
memcpy_chk-nonshared \
|
|
|
|
memmove_chk-nonshared \
|
|
|
|
mempcpy_chk-nonshared \
|
|
|
|
memset_chk-nonshared \
|
|
|
|
wmemset_chk-nonshared \
|
|
|
|
# sysdep_routines
|
x86-64: Optimize wmemset with SSE2/AVX2/AVX512
The difference between memset and wmemset is byte vs int. Add stubs
to SSE2/AVX2/AVX512 memset for wmemset with updated constant and size:
SSE2 wmemset:
shl $0x2,%rdx
movd %esi,%xmm0
mov %rdi,%rax
pshufd $0x0,%xmm0,%xmm0
jmp entry_from_wmemset
SSE2 memset:
movd %esi,%xmm0
mov %rdi,%rax
punpcklbw %xmm0,%xmm0
punpcklwd %xmm0,%xmm0
pshufd $0x0,%xmm0,%xmm0
entry_from_wmemset:
Since the ERMS versions of wmemset requires "rep stosl" instead of
"rep stosb", only the vector store stubs of SSE2/AVX2/AVX512 wmemset
are added. The SSE2 wmemset is about 3X faster and the AVX2 wmemset
is about 6X faster on Haswell.
* include/wchar.h (__wmemset_chk): New.
* sysdeps/x86_64/memset.S (VDUP_TO_VEC0_AND_SET_RETURN): Renamed
to MEMSET_VDUP_TO_VEC0_AND_SET_RETURN.
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
(WMEMSET_CHK_SYMBOL): Likewise.
(WMEMSET_SYMBOL): Likewise.
(__wmemset): Add hidden definition.
(wmemset): Add weak hidden definition.
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
wmemset_chk-nonshared.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add __wmemset_sse2_unaligned,
__wmemset_avx2_unaligned, __wmemset_avx512_unaligned,
__wmemset_chk_sse2_unaligned, __wmemset_chk_avx2_unaligned
and __wmemset_chk_avx512_unaligned.
* sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
(VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ...
(MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This.
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
(WMEMSET_SYMBOL): Likewise.
* sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
(VDUP_TO_VEC0_AND_SET_RETURN): Renamed to ...
(MEMSET_VDUP_TO_VEC0_AND_SET_RETURN): This.
(WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN): New.
(WMEMSET_SYMBOL): Likewise.
* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Updated.
(WMEMSET_CHK_SYMBOL): New.
(WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)): Likewise.
(WMEMSET_SYMBOL (__wmemset, unaligned)): Likewise.
* sysdeps/x86_64/multiarch/memset.S (WMEMSET_SYMBOL): New.
(libc_hidden_builtin_def): Also define __GI_wmemset and
__GI___wmemset.
(weak_alias): New.
* sysdeps/x86_64/multiarch/wmemset.c: New file.
* sysdeps/x86_64/multiarch/wmemset.h: Likewise.
* sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S: Likewise.
* sysdeps/x86_64/multiarch/wmemset_chk.c: Likewise.
* sysdeps/x86_64/wmemset.c: Likewise.
* sysdeps/x86_64/wmemset_chk.c: Likewise.
2017-06-05 18:09:48 +00:00
|
|
|
endif
|