mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-22 02:40:08 +00:00
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection order is updated with following selection order: 1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set. 2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set. 3. __memcpy_sse2 if SSSE3 isn't available. 4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set. 5. __memcpy_ssse3 [BZ #18880] * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back.
This commit is contained in:
parent
4b230f6a60
commit
14a1d7cc4c
@ -1,3 +1,11 @@
|
|||||||
|
2016-03-04 Amit Pawar <Amit.Pawar@amd.com>
|
||||||
|
H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
[BZ #18880]
|
||||||
|
* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
|
||||||
|
instead of Slow_BSF, and also check for Fast_Copy_Backward to
|
||||||
|
enable __memcpy_ssse3_back.
|
||||||
|
|
||||||
2016-03-03 H.J. Lu <hongjiu.lu@intel.com>
|
2016-03-03 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
[BZ #19758]
|
[BZ #19758]
|
||||||
|
@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
|
|||||||
jz 1f
|
jz 1f
|
||||||
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
|
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
|
||||||
jz 1f
|
jz 1f
|
||||||
leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
|
lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
|
||||||
ret
|
ret
|
||||||
#endif
|
#endif
|
||||||
1: leaq __memcpy_avx_unaligned(%rip), %rax
|
1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
|
||||||
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
|
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
|
||||||
jz 2f
|
jnz 2f
|
||||||
ret
|
lea __memcpy_sse2_unaligned(%rip), %RAX_LP
|
||||||
2: leaq __memcpy_sse2(%rip), %rax
|
HAS_ARCH_FEATURE (Fast_Unaligned_Load)
|
||||||
HAS_ARCH_FEATURE (Slow_BSF)
|
jnz 2f
|
||||||
jnz 3f
|
lea __memcpy_sse2(%rip), %RAX_LP
|
||||||
leaq __memcpy_sse2_unaligned(%rip), %rax
|
HAS_CPU_FEATURE (SSSE3)
|
||||||
ret
|
jz 2f
|
||||||
3: HAS_CPU_FEATURE (SSSE3)
|
lea __memcpy_ssse3_back(%rip), %RAX_LP
|
||||||
jz 4f
|
HAS_ARCH_FEATURE (Fast_Copy_Backward)
|
||||||
leaq __memcpy_ssse3(%rip), %rax
|
jnz 2f
|
||||||
4: ret
|
lea __memcpy_ssse3(%rip), %RAX_LP
|
||||||
|
2: ret
|
||||||
END(__new_memcpy)
|
END(__new_memcpy)
|
||||||
|
|
||||||
# undef ENTRY
|
# undef ENTRY
|
||||||
|
Loading…
Reference in New Issue
Block a user