mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-18 22:50:07 +00:00
8b4416d83c
These new memcpy functions are the 32-bit version of x86_64 SSE2 unaligned memcpy. Memcpy average performace benefit is 18% on Silvermont, other platforms also improved about 35%, benchmarked on Silvermont, Haswell, Ivy Bridge, Sandy Bridge and Westmere, performance results attached in https://sourceware.org/ml/libc-alpha/2014-07/msg00157.html * sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S: New file. * sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S: Likewise. * sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S: Likewise. * sysdeps/i386/i686/multiarch/bcopy.S: Select the sse2_unaligned version if bit_Fast_Unaligned_Load is set. * sysdeps/i386/i686/multiarch/memcpy.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/memmove.S: Likewise. * sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add bcopy-sse2-unaligned, memcpy-sse2-unaligned, memmove-sse2-unaligned and mempcpy-sse2-unaligned. * sysdeps/i386/i686/multiarch/ifunc-impl-list.c (MAX_IFUNC): Set to 4. (__libc_ifunc_impl_list): Test __bcopy_sse2_unaligned, __memmove_chk_sse2_unaligned, __memmove_sse2_unaligned, __memcpy_chk_sse2_unaligned, __memcpy_sse2_unaligned, __mempcpy_chk_sse2_unaligned, and __mempcpy_sse2_unaligned.
49 lines
1.8 KiB
Makefile
49 lines
1.8 KiB
Makefile
ifeq ($(subdir),csu)
|
|
aux += init-arch
|
|
tests += test-multiarch
|
|
gen-as-const-headers += ifunc-defines.sym
|
|
endif
|
|
|
|
ifeq ($(subdir),string)
|
|
gen-as-const-headers += locale-defines.sym
|
|
sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
|
|
memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
|
|
memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
|
|
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
|
|
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
|
|
memcmp-ssse3 memcmp-sse4 varshift \
|
|
strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
|
|
strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
|
|
strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
|
|
strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
|
|
strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
|
|
memchr-sse2 memchr-sse2-bsf \
|
|
memrchr-sse2 memrchr-sse2-bsf memrchr-c \
|
|
rawmemchr-sse2 rawmemchr-sse2-bsf \
|
|
strnlen-sse2 strnlen-c \
|
|
strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
|
|
strncase_l-c strncase-c strncase_l-ssse3 \
|
|
strcasecmp_l-sse4 strncase_l-sse4 \
|
|
bcopy-sse2-unaligned memcpy-sse2-unaligned \
|
|
mempcpy-sse2-unaligned memmove-sse2-unaligned
|
|
ifeq (yes,$(config-cflags-sse4))
|
|
sysdep_routines += strcspn-c strpbrk-c strspn-c
|
|
CFLAGS-varshift.c += -msse4
|
|
CFLAGS-strcspn-c.c += -msse4
|
|
CFLAGS-strpbrk-c.c += -msse4
|
|
CFLAGS-strspn-c.c += -msse4
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(subdir),wcsmbs)
|
|
sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \
|
|
wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \
|
|
wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c
|
|
endif
|
|
|
|
ifeq (mathyes,$(subdir)$(config-cflags-avx))
|
|
libm-sysdep_routines += s_fma-fma s_fmaf-fma
|
|
CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse
|
|
CFLAGS-s_fmaf-fma.c += -mavx -mfpmath=sse
|
|
endif
|