mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-18 22:50:07 +00:00
1457016337
Optimize x86-64 strcmp/wcscmp and strncmp/wcsncmp with AVX2. It uses vector comparison as much as possible. Peak performance observed on a SkyLake machine: 9x, 3x, 2.5x and 5.5x for strcmp, strncmp, wcscmp and wcsncmp, respectively. The larger the comparison length, the more benefit using avx2 functions, except on the strcmp, where peak is observed at length == 32 bytes. Select AVX2 strcmp/wcscmp on AVX2 machines where vzeroupper is preferred and AVX unaligned load is fast. NB: It uses TZCNT instead of BSF since TZCNT produces the same result as BSF for non-zero input. TZCNT is faster than BSF and is executed as BSF if machine doesn't support TZCNT. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add strcmp-avx2, strncmp-avx2, wcscmp-avx2, wcscmp-sse2, wcsncmp-avx2 and wcsncmp-sse2. * sysdeps/x86_64/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add tests for __strcmp_avx2, __strncmp_avx2, __wcscmp_avx2, __wcsncmp_avx2, __wcscmp_sse2 and __wcsncmp_sse2. * sysdeps/x86_64/multiarch/strcmp.c (OPTIMIZE (avx2)): (IFUNC_SELECTOR): Return OPTIMIZE (avx2) on AVX 2 machines if AVX unaligned load is fast and vzeroupper is preferred. * sysdeps/x86_64/multiarch/strncmp.c: Likewise. * sysdeps/x86_64/multiarch/strcmp-avx2.S: New file. * sysdeps/x86_64/multiarch/strncmp-avx2.S: Likewise. * sysdeps/x86_64/multiarch/wcscmp-avx2.S: Likewise. * sysdeps/x86_64/multiarch/wcscmp-sse2.S: Likewise. * sysdeps/x86_64/multiarch/wcscmp.c: Likewise. * sysdeps/x86_64/multiarch/wcsncmp-avx2.S: Likewise. * sysdeps/x86_64/multiarch/wcsncmp-sse2.c: Likewise. * sysdeps/x86_64/multiarch/wcsncmp.c: Likewise. * sysdeps/x86_64/wcscmp.S (__wcscmp): Add alias only if __wcscmp is undefined.
68 lines
2.2 KiB
Makefile
68 lines
2.2 KiB
Makefile
ifeq ($(subdir),csu)
|
|
tests += test-multiarch
|
|
endif
|
|
|
|
ifeq ($(subdir),string)
|
|
|
|
sysdep_routines += strncat-c stpncpy-c strncpy-c \
|
|
strcmp-sse2 strcmp-sse2-unaligned strcmp-ssse3 \
|
|
strcmp-sse4_2 strcmp-avx2 \
|
|
strncmp-sse2 strncmp-ssse3 strncmp-sse4_2 strncmp-avx2 \
|
|
memchr-sse2 rawmemchr-sse2 memchr-avx2 rawmemchr-avx2 \
|
|
memrchr-sse2 memrchr-avx2 \
|
|
memcmp-sse2 \
|
|
memcmp-avx2-movbe \
|
|
memcmp-sse4 memcpy-ssse3 \
|
|
memmove-ssse3 \
|
|
memcpy-ssse3-back \
|
|
memmove-ssse3-back \
|
|
memmove-avx512-no-vzeroupper \
|
|
strcasecmp_l-sse2 strcasecmp_l-ssse3 \
|
|
strcasecmp_l-sse4_2 strcasecmp_l-avx \
|
|
strncase_l-sse2 strncase_l-ssse3 \
|
|
strncase_l-sse4_2 strncase_l-avx \
|
|
strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
|
|
strrchr-sse2 strrchr-avx2 \
|
|
strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
|
|
strcat-ssse3 strncat-ssse3\
|
|
strcpy-sse2 stpcpy-sse2 \
|
|
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
|
|
strcpy-sse2-unaligned strncpy-sse2-unaligned \
|
|
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
|
strcat-sse2 \
|
|
strcat-sse2-unaligned strncat-sse2-unaligned \
|
|
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
|
|
strcspn-sse2 strpbrk-sse2 strspn-sse2 \
|
|
strcspn-c strpbrk-c strspn-c varshift \
|
|
memset-avx512-no-vzeroupper \
|
|
memmove-sse2-unaligned-erms \
|
|
memmove-avx-unaligned-erms \
|
|
memmove-avx512-unaligned-erms \
|
|
memset-sse2-unaligned-erms \
|
|
memset-avx2-unaligned-erms \
|
|
memset-avx512-unaligned-erms
|
|
CFLAGS-varshift.c += -msse4
|
|
CFLAGS-strcspn-c.c += -msse4
|
|
CFLAGS-strpbrk-c.c += -msse4
|
|
CFLAGS-strspn-c.c += -msse4
|
|
endif
|
|
|
|
ifeq ($(subdir),wcsmbs)
|
|
sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
|
|
wmemcmp-avx2-movbe \
|
|
wmemchr-sse2 wmemchr-avx2 \
|
|
wcscmp-sse2 wcscmp-avx2 \
|
|
wcsncmp-sse2 wcsncmp-avx2 \
|
|
wcscpy-ssse3 wcscpy-c \
|
|
wcschr-sse2 wcschr-avx2 \
|
|
wcsrchr-sse2 wcsrchr-avx2 \
|
|
wcsnlen-sse4_1 wcsnlen-c \
|
|
wcslen-sse2 wcslen-avx2 wcsnlen-avx2
|
|
endif
|
|
|
|
ifeq ($(subdir),debug)
|
|
sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \
|
|
memmove_chk-nonshared memset_chk-nonshared \
|
|
wmemset_chk-nonshared
|
|
endif
|