mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-07 10:00:07 +00:00
5082a287d5
Adding a 512-bit EVEX version of strstr. The algorithm works as follows: (1) We spend a few cycles at the begining to peek into the needle. We locate an edge in the needle (first occurance of 2 consequent distinct characters) and also store the first 64-bytes into a zmm register. (2) We search for the edge in the haystack by looking into one cache line of the haystack at a time. This avoids having to read past a page boundary which can cause a seg fault. (3) If an edge is found in the haystack we first compare the first 64-bytes of the needle (already stored in a zmm register) before we proceed with a full string compare performed byte by byte. Benchmarking results: (old = strstr_sse2_unaligned, new = strstr_avx512) Geometric mean of all benchmarks: new / old = 0.66 Difficult skiptable(0) : new / old = 0.02 Difficult skiptable(1) : new / old = 0.01 Difficult 2-way : new / old = 0.25 Difficult testing first 2 : new / old = 1.26 Difficult skiptable(0) : new / old = 0.05 Difficult skiptable(1) : new / old = 0.06 Difficult 2-way : new / old = 0.26 Difficult testing first 2 : new / old = 1.05 Difficult skiptable(0) : new / old = 0.42 Difficult skiptable(1) : new / old = 0.24 Difficult 2-way : new / old = 0.21 Difficult testing first 2 : new / old = 1.04 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
67 lines
2.3 KiB
C
67 lines
2.3 KiB
C
/* Multiple versions of strstr.
|
|
All versions must be listed in ifunc-impl-list.c.
|
|
Copyright (C) 2012-2022 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
/* Redefine strstr so that the compiler won't complain about the type
|
|
mismatch with the IFUNC selector in strong_alias, below. */
|
|
#undef strstr
|
|
#define strstr __redirect_strstr
|
|
#include <string.h>
|
|
#undef strstr
|
|
|
|
#define STRSTR __strstr_sse2
|
|
#ifdef SHARED
|
|
# undef libc_hidden_builtin_def
|
|
# define libc_hidden_builtin_def(name) \
|
|
__hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
|
|
#endif
|
|
|
|
#include "string/strstr.c"
|
|
|
|
extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
|
|
extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
|
|
extern __typeof (__redirect_strstr) __strstr_avx512 attribute_hidden;
|
|
|
|
#include "init-arch.h"
|
|
|
|
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
|
ifunc symbol properly. */
|
|
extern __typeof (__redirect_strstr) __libc_strstr;
|
|
|
|
static inline void *
|
|
IFUNC_SELECTOR (void)
|
|
{
|
|
const struct cpu_features *cpu_features = __get_cpu_features ();
|
|
|
|
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
|
|
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
|
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
|
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
|
|
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
|
return __strstr_avx512;
|
|
|
|
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
|
|
return __strstr_sse2_unaligned;
|
|
|
|
return __strstr_sse2;
|
|
}
|
|
|
|
libc_ifunc_redirected (__redirect_strstr, __libc_strstr, IFUNC_SELECTOR ());
|
|
#undef strstr
|
|
strong_alias (__libc_strstr, strstr)
|