mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 13:00:06 +00:00
Skip SSE4.2 versions on Intel Silvermont
SSE2/SSSE3 versions are faster than SSE4.2 versions on Intel Silvermont.
This commit is contained in:
parent
89cd956937
commit
6308fd9a46
14
ChangeLog
14
ChangeLog
@ -1,3 +1,17 @@
|
|||||||
|
2013-06-28 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set
|
||||||
|
bit_Slow_SSE4_2 and bit_Prefer_PMINUB_for_stringop for Intel
|
||||||
|
Silvermont.
|
||||||
|
* sysdeps/x86_64/multiarch/init-arch.h (bit_Slow_SSE4_2): New
|
||||||
|
macro.
|
||||||
|
(index_Slow_SSE4_2): Likewise.
|
||||||
|
(index_Prefer_PMINUB_for_stringop): Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/strchr.S: Skip SSE4.2 version if
|
||||||
|
bit_Slow_SSE4_2 is set.
|
||||||
|
* sysdeps/x86_64/multiarch/strcmp.S: Likewise.
|
||||||
|
* sysdeps/x86_64/multiarch/strrchr.S: Likewise.
|
||||||
|
|
||||||
2013-06-28 Ryan S. Arnold <rsa@linux.vnet.ibm.com>
|
2013-06-28 Ryan S. Arnold <rsa@linux.vnet.ibm.com>
|
||||||
|
|
||||||
* sysdeps/powerpc/Makefile: Add comment about generating an offset to
|
* sysdeps/powerpc/Makefile: Add comment about generating an offset to
|
||||||
|
@ -81,8 +81,16 @@ __init_cpu_features (void)
|
|||||||
case 0x37:
|
case 0x37:
|
||||||
/* Unaligned load versions are faster than SSSE3
|
/* Unaligned load versions are faster than SSSE3
|
||||||
on Silvermont. */
|
on Silvermont. */
|
||||||
|
#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
|
||||||
|
# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
|
||||||
|
#endif
|
||||||
|
#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
|
||||||
|
# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
|
||||||
|
#endif
|
||||||
__cpu_features.feature[index_Fast_Unaligned_Load]
|
__cpu_features.feature[index_Fast_Unaligned_Load]
|
||||||
|= bit_Fast_Unaligned_Load;
|
|= (bit_Fast_Unaligned_Load
|
||||||
|
| bit_Prefer_PMINUB_for_stringop
|
||||||
|
| bit_Slow_SSE4_2);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#define bit_AVX_Usable (1 << 6)
|
#define bit_AVX_Usable (1 << 6)
|
||||||
#define bit_FMA_Usable (1 << 7)
|
#define bit_FMA_Usable (1 << 7)
|
||||||
#define bit_FMA4_Usable (1 << 8)
|
#define bit_FMA4_Usable (1 << 8)
|
||||||
|
#define bit_Slow_SSE4_2 (1 << 9)
|
||||||
|
|
||||||
/* CPUID Feature flags. */
|
/* CPUID Feature flags. */
|
||||||
|
|
||||||
@ -62,6 +63,7 @@
|
|||||||
# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
||||||
# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
||||||
# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
|
||||||
|
# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
|
||||||
|
|
||||||
#else /* __ASSEMBLER__ */
|
#else /* __ASSEMBLER__ */
|
||||||
|
|
||||||
@ -156,9 +158,11 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|||||||
# define index_Fast_Copy_Backward FEATURE_INDEX_1
|
# define index_Fast_Copy_Backward FEATURE_INDEX_1
|
||||||
# define index_Slow_BSF FEATURE_INDEX_1
|
# define index_Slow_BSF FEATURE_INDEX_1
|
||||||
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
|
# define index_Fast_Unaligned_Load FEATURE_INDEX_1
|
||||||
|
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
|
||||||
# define index_AVX_Usable FEATURE_INDEX_1
|
# define index_AVX_Usable FEATURE_INDEX_1
|
||||||
# define index_FMA_Usable FEATURE_INDEX_1
|
# define index_FMA_Usable FEATURE_INDEX_1
|
||||||
# define index_FMA4_Usable FEATURE_INDEX_1
|
# define index_FMA4_Usable FEATURE_INDEX_1
|
||||||
|
# define index_Slow_SSE4_2 FEATURE_INDEX_1
|
||||||
|
|
||||||
# define HAS_ARCH_FEATURE(name) \
|
# define HAS_ARCH_FEATURE(name) \
|
||||||
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
|
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
|
||||||
|
@ -29,6 +29,8 @@ ENTRY(strchr)
|
|||||||
jne 1f
|
jne 1f
|
||||||
call __init_cpu_features
|
call __init_cpu_features
|
||||||
1: leaq __strchr_sse2(%rip), %rax
|
1: leaq __strchr_sse2(%rip), %rax
|
||||||
|
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
|
||||||
|
jnz 2f
|
||||||
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
||||||
jz 2f
|
jz 2f
|
||||||
leaq __strchr_sse42(%rip), %rax
|
leaq __strchr_sse42(%rip), %rax
|
||||||
|
@ -88,14 +88,16 @@ ENTRY(STRCMP)
|
|||||||
jne 1f
|
jne 1f
|
||||||
call __init_cpu_features
|
call __init_cpu_features
|
||||||
1:
|
1:
|
||||||
|
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
|
||||||
|
jnz 2f
|
||||||
leaq STRCMP_SSE42(%rip), %rax
|
leaq STRCMP_SSE42(%rip), %rax
|
||||||
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
leaq STRCMP_SSSE3(%rip), %rax
|
2: leaq STRCMP_SSSE3(%rip), %rax
|
||||||
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
leaq STRCMP_SSE2(%rip), %rax
|
leaq STRCMP_SSE2(%rip), %rax
|
||||||
2: ret
|
3: ret
|
||||||
END(STRCMP)
|
END(STRCMP)
|
||||||
|
|
||||||
# ifdef USE_AS_STRCASECMP_L
|
# ifdef USE_AS_STRCASECMP_L
|
||||||
@ -109,16 +111,18 @@ ENTRY(__strcasecmp)
|
|||||||
# ifdef HAVE_AVX_SUPPORT
|
# ifdef HAVE_AVX_SUPPORT
|
||||||
leaq __strcasecmp_avx(%rip), %rax
|
leaq __strcasecmp_avx(%rip), %rax
|
||||||
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
# endif
|
# endif
|
||||||
|
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
|
||||||
|
jnz 2f
|
||||||
leaq __strcasecmp_sse42(%rip), %rax
|
leaq __strcasecmp_sse42(%rip), %rax
|
||||||
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
leaq __strcasecmp_ssse3(%rip), %rax
|
2: leaq __strcasecmp_ssse3(%rip), %rax
|
||||||
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
leaq __strcasecmp_sse2(%rip), %rax
|
leaq __strcasecmp_sse2(%rip), %rax
|
||||||
2: ret
|
3: ret
|
||||||
END(__strcasecmp)
|
END(__strcasecmp)
|
||||||
weak_alias (__strcasecmp, strcasecmp)
|
weak_alias (__strcasecmp, strcasecmp)
|
||||||
# endif
|
# endif
|
||||||
@ -133,16 +137,18 @@ ENTRY(__strncasecmp)
|
|||||||
# ifdef HAVE_AVX_SUPPORT
|
# ifdef HAVE_AVX_SUPPORT
|
||||||
leaq __strncasecmp_avx(%rip), %rax
|
leaq __strncasecmp_avx(%rip), %rax
|
||||||
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
# endif
|
# endif
|
||||||
|
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
|
||||||
|
jnz 2f
|
||||||
leaq __strncasecmp_sse42(%rip), %rax
|
leaq __strncasecmp_sse42(%rip), %rax
|
||||||
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
leaq __strncasecmp_ssse3(%rip), %rax
|
2: leaq __strncasecmp_ssse3(%rip), %rax
|
||||||
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
jnz 2f
|
jnz 3f
|
||||||
leaq __strncasecmp_sse2(%rip), %rax
|
leaq __strncasecmp_sse2(%rip), %rax
|
||||||
2: ret
|
3: ret
|
||||||
END(__strncasecmp)
|
END(__strncasecmp)
|
||||||
weak_alias (__strncasecmp, strncasecmp)
|
weak_alias (__strncasecmp, strncasecmp)
|
||||||
# endif
|
# endif
|
||||||
|
@ -32,6 +32,8 @@ ENTRY(strrchr)
|
|||||||
jne 1f
|
jne 1f
|
||||||
call __init_cpu_features
|
call __init_cpu_features
|
||||||
1: leaq __strrchr_sse2(%rip), %rax
|
1: leaq __strrchr_sse2(%rip), %rax
|
||||||
|
testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
|
||||||
|
jnz 2f
|
||||||
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
|
||||||
jz 2f
|
jz 2f
|
||||||
leaq __strrchr_sse42(%rip), %rax
|
leaq __strrchr_sse42(%rip), %rax
|
||||||
|
Loading…
Reference in New Issue
Block a user