mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-29 08:11:08 +00:00
721314c980
As indicated in a recent thread, this it is a simple brute-force algorithm that checks the whole needle at a matching character pair (and does so 1 byte at a time after the first 64 bytes of a needle). Also it never skips ahead and thus can match at every haystack position after trying to match all of the needle, which generic implementation avoids. As indicated by Wilco, a 4x larger needle and 16x larger haystack gives a clear 65x slowdown both basic_strstr and __strstr_avx512: "ifuncs": ["basic_strstr", "twoway_strstr", "__strstr_avx512", "__strstr_sse2_unaligned", "__strstr_generic"], { "len_haystack": 65536, "len_needle": 1024, "align_haystack": 0, "align_needle": 0, "fail": 1, "desc": "Difficult bruteforce needle", "timings": [4.0948e+07, 15094.5, 3.20818e+07, 108558, 10839.2] }, { "len_haystack": 1048576, "len_needle": 4096, "align_haystack": 0, "align_needle": 0, "fail": 1, "desc": "Difficult bruteforce needle", "timings": [2.69767e+09, 100797, 2.08535e+09, 495706, 82666.9] } PS: I don't have an AVX512 capable machine to verify this issues, but skimming through the code it does seems to follow what Wilco has described. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
200 lines
3.8 KiB
Makefile
200 lines
3.8 KiB
Makefile
ifeq ($(subdir),string)
|
|
|
|
sysdep_routines += \
|
|
memchr-avx2 \
|
|
memchr-avx2-rtm \
|
|
memchr-evex \
|
|
memchr-evex-rtm \
|
|
memchr-evex512 \
|
|
memchr-sse2 \
|
|
memcmp-avx2-movbe \
|
|
memcmp-avx2-movbe-rtm \
|
|
memcmp-evex-movbe \
|
|
memcmp-sse2 \
|
|
memcmpeq-avx2 \
|
|
memcmpeq-avx2-rtm \
|
|
memcmpeq-evex \
|
|
memcmpeq-sse2 \
|
|
memmove-avx-unaligned-erms \
|
|
memmove-avx-unaligned-erms-rtm \
|
|
memmove-avx512-no-vzeroupper \
|
|
memmove-avx512-unaligned-erms \
|
|
memmove-erms \
|
|
memmove-evex-unaligned-erms \
|
|
memmove-sse2-unaligned-erms \
|
|
memmove-ssse3 \
|
|
memrchr-avx2 \
|
|
memrchr-avx2-rtm \
|
|
memrchr-evex \
|
|
memrchr-sse2 \
|
|
memset-avx2-unaligned-erms \
|
|
memset-avx2-unaligned-erms-rtm \
|
|
memset-avx512-no-vzeroupper \
|
|
memset-avx512-unaligned-erms \
|
|
memset-erms \
|
|
memset-evex-unaligned-erms \
|
|
memset-sse2-unaligned-erms \
|
|
rawmemchr-avx2 \
|
|
rawmemchr-avx2-rtm \
|
|
rawmemchr-evex \
|
|
rawmemchr-evex-rtm \
|
|
rawmemchr-evex512 \
|
|
rawmemchr-sse2 \
|
|
stpcpy-avx2 \
|
|
stpcpy-avx2-rtm \
|
|
stpcpy-evex \
|
|
stpcpy-sse2 \
|
|
stpcpy-sse2-unaligned \
|
|
stpncpy-avx2 \
|
|
stpncpy-avx2-rtm \
|
|
stpncpy-evex \
|
|
stpncpy-sse2-unaligned \
|
|
strcasecmp_l-avx2 \
|
|
strcasecmp_l-avx2-rtm \
|
|
strcasecmp_l-evex \
|
|
strcasecmp_l-sse2 \
|
|
strcasecmp_l-sse4_2 \
|
|
strcat-avx2 \
|
|
strcat-avx2-rtm \
|
|
strcat-evex \
|
|
strcat-sse2 \
|
|
strcat-sse2-unaligned \
|
|
strchr-avx2 \
|
|
strchr-avx2-rtm \
|
|
strchr-evex \
|
|
strchr-evex512 \
|
|
strchr-sse2 \
|
|
strchr-sse2-no-bsf \
|
|
strchrnul-avx2 \
|
|
strchrnul-avx2-rtm \
|
|
strchrnul-evex \
|
|
strchrnul-evex512 \
|
|
strchrnul-sse2 \
|
|
strcmp-avx2 \
|
|
strcmp-avx2-rtm \
|
|
strcmp-evex \
|
|
strcmp-sse2 \
|
|
strcmp-sse2-unaligned \
|
|
strcmp-sse4_2 \
|
|
strcpy-avx2 \
|
|
strcpy-avx2-rtm \
|
|
strcpy-evex \
|
|
strcpy-sse2 \
|
|
strcpy-sse2-unaligned \
|
|
strcspn-sse4 \
|
|
strlen-avx2 \
|
|
strlen-avx2-rtm \
|
|
strlen-evex \
|
|
strlen-evex512 \
|
|
strlen-sse2 \
|
|
strncase_l-avx2 \
|
|
strncase_l-avx2-rtm \
|
|
strncase_l-evex \
|
|
strncase_l-sse2 \
|
|
strncase_l-sse4_2 \
|
|
strncat-avx2 \
|
|
strncat-avx2-rtm \
|
|
strncat-evex \
|
|
strncat-sse2-unaligned \
|
|
strncmp-avx2 \
|
|
strncmp-avx2-rtm \
|
|
strncmp-evex \
|
|
strncmp-sse2 \
|
|
strncmp-sse4_2 \
|
|
strncpy-avx2 \
|
|
strncpy-avx2-rtm \
|
|
strncpy-evex \
|
|
strncpy-sse2-unaligned \
|
|
strnlen-avx2 \
|
|
strnlen-avx2-rtm \
|
|
strnlen-evex \
|
|
strnlen-evex512 \
|
|
strnlen-sse2 \
|
|
strpbrk-sse4 \
|
|
strrchr-avx2 \
|
|
strrchr-avx2-rtm \
|
|
strrchr-evex \
|
|
strrchr-evex512 \
|
|
strrchr-sse2 \
|
|
strspn-sse4 \
|
|
strstr-sse2-unaligned \
|
|
varshift \
|
|
# sysdep_routines
|
|
|
|
CFLAGS-strcspn-sse4.c += -msse4
|
|
CFLAGS-strpbrk-sse4.c += -msse4
|
|
CFLAGS-strspn-sse4.c += -msse4
|
|
endif
|
|
|
|
ifeq ($(subdir),wcsmbs)
|
|
sysdep_routines += \
|
|
wcpcpy-avx2 \
|
|
wcpcpy-evex \
|
|
wcpcpy-generic \
|
|
wcpncpy-avx2 \
|
|
wcpncpy-evex \
|
|
wcpncpy-generic \
|
|
wcscat-avx2 \
|
|
wcscat-evex \
|
|
wcscat-generic \
|
|
wcschr-avx2 \
|
|
wcschr-avx2-rtm \
|
|
wcschr-evex \
|
|
wcschr-evex512 \
|
|
wcschr-sse2 \
|
|
wcscmp-avx2 \
|
|
wcscmp-avx2-rtm \
|
|
wcscmp-evex \
|
|
wcscmp-sse2 \
|
|
wcscpy-avx2 \
|
|
wcscpy-evex \
|
|
wcscpy-generic \
|
|
wcscpy-ssse3 \
|
|
wcslen-avx2 \
|
|
wcslen-avx2-rtm \
|
|
wcslen-evex \
|
|
wcslen-evex512 \
|
|
wcslen-sse2 \
|
|
wcslen-sse4_1 \
|
|
wcsncat-avx2 \
|
|
wcsncat-evex \
|
|
wcsncat-generic \
|
|
wcsncmp-avx2 \
|
|
wcsncmp-avx2-rtm \
|
|
wcsncmp-evex \
|
|
wcsncpy-avx2 \
|
|
wcsncpy-evex \
|
|
wcsncpy-generic \
|
|
wcsnlen-avx2 \
|
|
wcsnlen-avx2-rtm \
|
|
wcsnlen-evex \
|
|
wcsnlen-evex512 \
|
|
wcsnlen-sse4_1 \
|
|
wcsrchr-avx2 \
|
|
wcsrchr-avx2-rtm \
|
|
wcsrchr-evex \
|
|
wcsrchr-evex512 \
|
|
wcsrchr-sse2 \
|
|
wmemchr-avx2 \
|
|
wmemchr-avx2-rtm \
|
|
wmemchr-evex \
|
|
wmemchr-evex-rtm \
|
|
wmemchr-evex512 \
|
|
wmemchr-sse2 \
|
|
wmemcmp-avx2-movbe \
|
|
wmemcmp-avx2-movbe-rtm \
|
|
wmemcmp-evex-movbe \
|
|
wmemcmp-sse2 \
|
|
# sysdep_routines
|
|
endif
|
|
|
|
ifeq ($(subdir),debug)
|
|
sysdep_routines += \
|
|
memcpy_chk-nonshared \
|
|
memmove_chk-nonshared \
|
|
mempcpy_chk-nonshared \
|
|
memset_chk-nonshared \
|
|
wmemset_chk-nonshared \
|
|
# sysdep_routines
|
|
endif
|