mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-29 05:51:10 +00:00
faaf733f49
Changes from v1: Use vec api for register. Replace VPCMP with VPCMPEQ Restructure and remove 1 unconditional jump. Change page cross logic to use sall. This patch implements following evex512 version of string functions. evex512 version takes up to 30% less cycle as compared to evex, depending on length and alignment. - strrchr function using 512 bit vectors. - wcsrchr function using 512 bit vectors. Code size data: strrchr-evex.o 879 byte strrchr-evex512.o 601 byte (-32%) wcsrchr-evex.o 882 byte wcsrchr-evex512.o 572 byte (-35%) Placeholder function, not used by any processor at the moment. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
185 lines
3.6 KiB
Makefile
185 lines
3.6 KiB
Makefile
ifeq ($(subdir),string)
|
|
|
|
sysdep_routines += \
|
|
memchr-avx2 \
|
|
memchr-avx2-rtm \
|
|
memchr-evex \
|
|
memchr-evex512 \
|
|
memchr-evex-rtm \
|
|
memchr-sse2 \
|
|
memcmp-avx2-movbe \
|
|
memcmp-avx2-movbe-rtm \
|
|
memcmp-evex-movbe \
|
|
memcmp-sse2 \
|
|
memcmpeq-avx2 \
|
|
memcmpeq-avx2-rtm \
|
|
memcmpeq-evex \
|
|
memcmpeq-sse2 \
|
|
memmove-avx-unaligned-erms \
|
|
memmove-avx-unaligned-erms-rtm \
|
|
memmove-avx512-no-vzeroupper \
|
|
memmove-avx512-unaligned-erms \
|
|
memmove-erms \
|
|
memmove-evex-unaligned-erms \
|
|
memmove-sse2-unaligned-erms \
|
|
memmove-ssse3 \
|
|
memrchr-avx2 \
|
|
memrchr-avx2-rtm \
|
|
memrchr-evex \
|
|
memrchr-sse2 \
|
|
memset-avx2-unaligned-erms \
|
|
memset-avx2-unaligned-erms-rtm \
|
|
memset-avx512-no-vzeroupper \
|
|
memset-avx512-unaligned-erms \
|
|
memset-erms \
|
|
memset-evex-unaligned-erms \
|
|
memset-sse2-unaligned-erms \
|
|
rawmemchr-avx2 \
|
|
rawmemchr-avx2-rtm \
|
|
rawmemchr-evex \
|
|
rawmemchr-evex512 \
|
|
rawmemchr-evex-rtm \
|
|
rawmemchr-sse2 \
|
|
stpcpy-avx2 \
|
|
stpcpy-avx2-rtm \
|
|
stpcpy-evex \
|
|
stpcpy-sse2 \
|
|
stpcpy-sse2-unaligned \
|
|
stpncpy-avx2 \
|
|
stpncpy-avx2-rtm \
|
|
stpncpy-evex \
|
|
stpncpy-sse2-unaligned \
|
|
strcasecmp_l-avx2 \
|
|
strcasecmp_l-avx2-rtm \
|
|
strcasecmp_l-evex \
|
|
strcasecmp_l-sse2 \
|
|
strcasecmp_l-sse4_2 \
|
|
strcat-avx2 \
|
|
strcat-avx2-rtm \
|
|
strcat-evex \
|
|
strcat-sse2 \
|
|
strcat-sse2-unaligned \
|
|
strchr-avx2 \
|
|
strchr-avx2-rtm \
|
|
strchr-evex \
|
|
strchr-evex512 \
|
|
strchr-sse2 \
|
|
strchr-sse2-no-bsf \
|
|
strchrnul-avx2 \
|
|
strchrnul-avx2-rtm \
|
|
strchrnul-evex \
|
|
strchrnul-evex512 \
|
|
strchrnul-sse2 \
|
|
strcmp-avx2 \
|
|
strcmp-avx2-rtm \
|
|
strcmp-evex \
|
|
strcmp-sse2 \
|
|
strcmp-sse2-unaligned \
|
|
strcmp-sse4_2 \
|
|
strcpy-avx2 \
|
|
strcpy-avx2-rtm \
|
|
strcpy-evex \
|
|
strcpy-sse2 \
|
|
strcpy-sse2-unaligned \
|
|
strcspn-sse4 \
|
|
strlen-avx2 \
|
|
strlen-avx2-rtm \
|
|
strlen-evex \
|
|
strlen-evex512 \
|
|
strlen-sse2 \
|
|
strncase_l-avx2 \
|
|
strncase_l-avx2-rtm \
|
|
strncase_l-evex \
|
|
strncase_l-sse2 \
|
|
strncase_l-sse4_2 \
|
|
strncat-avx2 \
|
|
strncat-avx2-rtm \
|
|
strncat-evex \
|
|
strncat-sse2-unaligned \
|
|
strncmp-avx2 \
|
|
strncmp-avx2-rtm \
|
|
strncmp-evex \
|
|
strncmp-sse2 \
|
|
strncmp-sse4_2 \
|
|
strncpy-avx2 \
|
|
strncpy-avx2-rtm \
|
|
strncpy-evex \
|
|
strncpy-sse2-unaligned \
|
|
strnlen-avx2 \
|
|
strnlen-avx2-rtm \
|
|
strnlen-evex \
|
|
strnlen-evex512 \
|
|
strnlen-sse2 \
|
|
strpbrk-sse4 \
|
|
strrchr-avx2 \
|
|
strrchr-avx2-rtm \
|
|
strrchr-evex \
|
|
strrchr-evex512 \
|
|
strrchr-sse2 \
|
|
strspn-sse4 \
|
|
strstr-avx512 \
|
|
strstr-sse2-unaligned \
|
|
varshift \
|
|
# sysdep_routines
|
|
|
|
CFLAGS-strcspn-sse4.c += -msse4
|
|
CFLAGS-strpbrk-sse4.c += -msse4
|
|
CFLAGS-strspn-sse4.c += -msse4
|
|
|
|
CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
|
|
endif
|
|
|
|
ifeq ($(subdir),wcsmbs)
|
|
sysdep_routines += \
|
|
wcschr-avx2 \
|
|
wcschr-avx2-rtm \
|
|
wcschr-evex \
|
|
wcschr-evex512 \
|
|
wcschr-sse2 \
|
|
wcscmp-avx2 \
|
|
wcscmp-avx2-rtm \
|
|
wcscmp-evex \
|
|
wcscmp-sse2 \
|
|
wcscpy-ssse3 \
|
|
wcslen-avx2 \
|
|
wcslen-avx2-rtm \
|
|
wcslen-evex \
|
|
wcslen-evex512 \
|
|
wcslen-sse2 \
|
|
wcslen-sse4_1 \
|
|
wcsncmp-avx2 \
|
|
wcsncmp-avx2-rtm \
|
|
wcsncmp-evex \
|
|
wcsnlen-avx2 \
|
|
wcsnlen-avx2-rtm \
|
|
wcsnlen-evex \
|
|
wcsnlen-evex512 \
|
|
wcsnlen-sse4_1 \
|
|
wcsrchr-avx2 \
|
|
wcsrchr-avx2-rtm \
|
|
wcsrchr-evex \
|
|
wcsrchr-evex512 \
|
|
wcsrchr-sse2 \
|
|
wmemchr-avx2 \
|
|
wmemchr-avx2-rtm \
|
|
wmemchr-evex \
|
|
wmemchr-evex512 \
|
|
wmemchr-evex-rtm \
|
|
wmemchr-sse2 \
|
|
wmemcmp-avx2-movbe \
|
|
wmemcmp-avx2-movbe-rtm \
|
|
wmemcmp-evex-movbe \
|
|
wmemcmp-sse2 \
|
|
# sysdep_routines
|
|
endif
|
|
|
|
ifeq ($(subdir),debug)
|
|
sysdep_routines += \
|
|
memcpy_chk-nonshared \
|
|
memmove_chk-nonshared \
|
|
mempcpy_chk-nonshared \
|
|
memset_chk-nonshared \
|
|
wmemset_chk-nonshared \
|
|
# sysdep_routines
|
|
endif
|