x86-64: Fix memcpy IFUNC selection

Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for
Fast_Copy_Backward to enable __memcpy_ssse3_back.  Existing selection
order is updated with following selection order:

1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_sse2 if SSSE3 isn't available.
4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_ssse3

	[BZ #18880]
	* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
	instead of Slow_BSF, and also check for Fast_Copy_Backward to
	enable __memcpy_ssse3_back.
This commit is contained in:
H.J. Lu 2016-03-04 08:37:40 -08:00
parent 4b230f6a60
commit 14a1d7cc4c
2 changed files with 22 additions and 13 deletions

View File

@ -1,3 +1,11 @@
2016-03-04 Amit Pawar <Amit.Pawar@amd.com>
H.J. Lu <hongjiu.lu@intel.com>
[BZ #18880]
* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
instead of Slow_BSF, and also check for Fast_Copy_Backward to
enable __memcpy_ssse3_back.
2016-03-03 H.J. Lu <hongjiu.lu@intel.com> 2016-03-03 H.J. Lu <hongjiu.lu@intel.com>
[BZ #19758] [BZ #19758]

View File

@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
jz 1f jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f jz 1f
leaq __memcpy_avx512_no_vzeroupper(%rip), %rax lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
ret ret
#endif #endif
1: leaq __memcpy_avx_unaligned(%rip), %rax 1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
jz 2f jnz 2f
ret lea __memcpy_sse2_unaligned(%rip), %RAX_LP
2: leaq __memcpy_sse2(%rip), %rax HAS_ARCH_FEATURE (Fast_Unaligned_Load)
HAS_ARCH_FEATURE (Slow_BSF) jnz 2f
jnz 3f lea __memcpy_sse2(%rip), %RAX_LP
leaq __memcpy_sse2_unaligned(%rip), %rax HAS_CPU_FEATURE (SSSE3)
ret jz 2f
3: HAS_CPU_FEATURE (SSSE3) lea __memcpy_ssse3_back(%rip), %RAX_LP
jz 4f HAS_ARCH_FEATURE (Fast_Copy_Backward)
leaq __memcpy_ssse3(%rip), %rax jnz 2f
4: ret lea __memcpy_ssse3(%rip), %RAX_LP
2: ret
END(__new_memcpy) END(__new_memcpy)
# undef ENTRY # undef ENTRY