x86: Add EVEX optimized memchr family not safe for RTM

No bug.

This commit adds a new implementation for EVEX memchr that is not safe
for RTM because it uses vzeroupper. The benefit is that by using
ymm0-ymm15 it can use vpcmpeq and vpternlogd in the 4x loop which is
faster than the RTM safe version which cannot use vpcmpeq because
there is no EVEX encoding for the instruction. All parts of the
implementation aside from the 4x loop are the same for the two
versions and the optimization is only relevant for large sizes.

Tigerlake:
size  , algn  , Pos   , Cur T , New T , Win     , Dif
512   , 6     , 192   , 9.2   , 9.04  , no-RTM  , 0.16
512   , 7     , 224   , 9.19  , 8.98  , no-RTM  , 0.21
2048  , 0     , 256   , 10.74 , 10.54 , no-RTM  , 0.2
2048  , 0     , 512   , 14.81 , 14.87 , RTM     , 0.06
2048  , 0     , 1024  , 22.97 , 22.57 , no-RTM  , 0.4
2048  , 0     , 2048  , 37.49 , 34.51 , no-RTM  , 2.98   <--

Icelake:
size  , algn  , Pos   , Cur T , New T , Win     , Dif
512   , 6     , 192   , 7.6   , 7.3   , no-RTM  , 0.3
512   , 7     , 224   , 7.63  , 7.27  , no-RTM  , 0.36
2048  , 0     , 256   , 8.48  , 8.38  , no-RTM  , 0.1
2048  , 0     , 512   , 11.57 , 11.42 , no-RTM  , 0.15
2048  , 0     , 1024  , 17.92 , 17.38 , no-RTM  , 0.54
2048  , 0     , 2048  , 30.37 , 27.34 , no-RTM  , 3.03   <--

test-memchr, test-wmemchr, and test-rawmemchr are all passing.

Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
Noah Goldstein 2021-05-04 19:02:40 -04:00
parent 6ea916adfa
commit 104c7b1967
10 changed files with 217 additions and 41 deletions

View File

@ -77,7 +77,9 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
strncmp-evex \ strncmp-evex \
strncpy-evex \ strncpy-evex \
strnlen-evex \ strnlen-evex \
strrchr-evex strrchr-evex \
memchr-evex-rtm \
rawmemchr-evex-rtm
CFLAGS-varshift.c += -msse4 CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4 CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4
@ -110,7 +112,8 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
wcsnlen-evex \ wcsnlen-evex \
wcsrchr-evex \ wcsrchr-evex \
wmemchr-evex \ wmemchr-evex \
wmemcmp-evex-movbe wmemcmp-evex-movbe \
wmemchr-evex-rtm
endif endif
ifeq ($(subdir),debug) ifeq ($(subdir),debug)

View File

@ -0,0 +1,55 @@
/* Common definition for ifunc selection optimized with EVEX.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2017-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_rtm) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
{
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (evex_rtm);
return OPTIMIZE (evex);
}
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_rtm);
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
return OPTIMIZE (avx2);
}
return OPTIMIZE (sse2);
}

View File

@ -52,6 +52,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (AVX512BW) && CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)), && CPU_FEATURE_USABLE (BMI2)),
__memchr_evex) __memchr_evex)
IFUNC_IMPL_ADD (array, i, memchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__memchr_evex_rtm)
IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2)) IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
/* Support sysdeps/x86_64/multiarch/memcmp.c. */ /* Support sysdeps/x86_64/multiarch/memcmp.c. */
@ -288,6 +293,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (AVX512BW) && CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)), && CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex) __rawmemchr_evex)
IFUNC_IMPL_ADD (array, i, rawmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__rawmemchr_evex_rtm)
IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2)) IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
/* Support sysdeps/x86_64/multiarch/strlen.c. */ /* Support sysdeps/x86_64/multiarch/strlen.c. */
@ -708,6 +718,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (AVX512BW) && CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)), && CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex) __wmemchr_evex)
IFUNC_IMPL_ADD (array, i, wmemchr,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wmemchr_evex_rtm)
IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2)) IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
/* Support sysdeps/x86_64/multiarch/wmemcmp.c. */ /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */

View File

@ -0,0 +1,8 @@
#ifndef MEMCHR
# define MEMCHR __memchr_evex_rtm
#endif
#define USE_IN_RTM 1
#define SECTION(p) p##.evex.rtm
#include "memchr-evex.S"

View File

@ -38,10 +38,32 @@
# define CHAR_SIZE 1 # define CHAR_SIZE 1
# endif # endif
/* In the 4x loop the RTM and non-RTM versions have data pointer
off by VEC_SIZE * 4 with RTM version being VEC_SIZE * 4 greater.
This is represented by BASE_OFFSET. As well because the RTM
version uses vpcmp which stores a bit per element compared where
the non-RTM version uses vpcmpeq which stores a bit per byte
compared RET_SCALE of CHAR_SIZE is only relevant for the RTM
version. */
# ifdef USE_IN_RTM
# define VZEROUPPER
# define BASE_OFFSET (VEC_SIZE * 4)
# define RET_SCALE CHAR_SIZE
# else
# define VZEROUPPER vzeroupper
# define BASE_OFFSET 0
# define RET_SCALE 1
# endif
/* In the return from 4x loop memchr and rawmemchr versions have
data pointers off by VEC_SIZE * 4 with memchr version being
VEC_SIZE * 4 greater. */
# ifdef USE_AS_RAWMEMCHR # ifdef USE_AS_RAWMEMCHR
# define RET_OFFSET (BASE_OFFSET - (VEC_SIZE * 4))
# define RAW_PTR_REG rcx # define RAW_PTR_REG rcx
# define ALGN_PTR_REG rdi # define ALGN_PTR_REG rdi
# else # else
# define RET_OFFSET BASE_OFFSET
# define RAW_PTR_REG rdi # define RAW_PTR_REG rdi
# define ALGN_PTR_REG rcx # define ALGN_PTR_REG rcx
# endif # endif
@ -57,11 +79,15 @@
# define YMM5 ymm21 # define YMM5 ymm21
# define YMM6 ymm22 # define YMM6 ymm22
# ifndef SECTION
# define SECTION(p) p##.evex
# endif
# define VEC_SIZE 32 # define VEC_SIZE 32
# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
# define PAGE_SIZE 4096 # define PAGE_SIZE 4096
.section .text.evex,"ax",@progbits .section SECTION(.text),"ax",@progbits
ENTRY (MEMCHR) ENTRY (MEMCHR)
# ifndef USE_AS_RAWMEMCHR # ifndef USE_AS_RAWMEMCHR
/* Check for zero length. */ /* Check for zero length. */
@ -237,14 +263,15 @@ L(cross_page_continue):
/* Check if at last CHAR_PER_VEC * 4 length. */ /* Check if at last CHAR_PER_VEC * 4 length. */
subq $(CHAR_PER_VEC * 4), %rdx subq $(CHAR_PER_VEC * 4), %rdx
jbe L(last_4x_vec_or_less_cmpeq) jbe L(last_4x_vec_or_less_cmpeq)
addq $VEC_SIZE, %rdi /* +VEC_SIZE if USE_IN_RTM otherwise +VEC_SIZE * 5. */
addq $(VEC_SIZE + (VEC_SIZE * 4 - BASE_OFFSET)), %rdi
/* Align data to VEC_SIZE * 4 for the loop and readjust length. /* Align data to VEC_SIZE * 4 for the loop and readjust length.
*/ */
# ifdef USE_AS_WMEMCHR # ifdef USE_AS_WMEMCHR
movl %edi, %ecx movl %edi, %ecx
andq $-(4 * VEC_SIZE), %rdi andq $-(4 * VEC_SIZE), %rdi
andl $(VEC_SIZE * 4 - 1), %ecx subl %edi, %ecx
/* NB: Divide bytes by 4 to get the wchar_t count. */ /* NB: Divide bytes by 4 to get the wchar_t count. */
sarl $2, %ecx sarl $2, %ecx
addq %rcx, %rdx addq %rcx, %rdx
@ -254,15 +281,28 @@ L(cross_page_continue):
subq %rdi, %rdx subq %rdi, %rdx
# endif # endif
# else # else
addq $VEC_SIZE, %rdi addq $(VEC_SIZE + (VEC_SIZE * 4 - BASE_OFFSET)), %rdi
andq $-(4 * VEC_SIZE), %rdi andq $-(4 * VEC_SIZE), %rdi
# endif # endif
# ifdef USE_IN_RTM
vpxorq %XMMZERO, %XMMZERO, %XMMZERO vpxorq %XMMZERO, %XMMZERO, %XMMZERO
# else
/* copy ymmmatch to ymm0 so we can use vpcmpeq which is not
encodable with EVEX registers (ymm16-ymm31). */
vmovdqa64 %YMMMATCH, %ymm0
# endif
/* Compare 4 * VEC at a time forward. */ /* Compare 4 * VEC at a time forward. */
.p2align 4 .p2align 4
L(loop_4x_vec): L(loop_4x_vec):
/* Two versions of the loop. One that does not require
vzeroupper by not using ymm0-ymm15 and another does that require
vzeroupper because it uses ymm0-ymm15. The reason why ymm0-ymm15
is used at all is because there is no EVEX encoding vpcmpeq and
with vpcmpeq this loop can be performed more efficiently. The
non-vzeroupper version is safe for RTM while the vzeroupper
version should be prefered if RTM are not supported. */
# ifdef USE_IN_RTM
/* It would be possible to save some instructions using 4x VPCMP /* It would be possible to save some instructions using 4x VPCMP
but bottleneck on port 5 makes it not woth it. */ but bottleneck on port 5 makes it not woth it. */
VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1 VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1
@ -273,12 +313,55 @@ L(loop_4x_vec):
/* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */ /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */
VPMINU %YMM2, %YMM3, %YMM3{%k1}{z} VPMINU %YMM2, %YMM3, %YMM3{%k1}{z}
VPCMP $0, %YMM3, %YMMZERO, %k2 VPCMP $0, %YMM3, %YMMZERO, %k2
# else
/* Since vptern can only take 3x vectors fastest to do 1 vec
seperately with EVEX vpcmp. */
# ifdef USE_AS_WMEMCHR
/* vptern can only accept masks for epi32/epi64 so can only save
instruction using not equals mask on vptern with wmemchr. */
VPCMP $4, (%rdi), %YMMMATCH, %k1
# else
VPCMP $0, (%rdi), %YMMMATCH, %k1
# endif
/* Compare 3x with vpcmpeq and or them all together with vptern.
*/
VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
# ifdef USE_AS_WMEMCHR
/* This takes the not of or between ymm2, ymm3, ymm4 as well as
combines result from VEC0 with zero mask. */
vpternlogd $1, %ymm2, %ymm3, %ymm4{%k1}{z}
vpmovmskb %ymm4, %ecx
# else
/* 254 is mask for oring ymm2, ymm3, ymm4 into ymm4. */
vpternlogd $254, %ymm2, %ymm3, %ymm4
vpmovmskb %ymm4, %ecx
kmovd %k1, %eax
# endif
# endif
# ifdef USE_AS_RAWMEMCHR # ifdef USE_AS_RAWMEMCHR
subq $-(VEC_SIZE * 4), %rdi subq $-(VEC_SIZE * 4), %rdi
# endif
# ifdef USE_IN_RTM
kortestd %k2, %k3 kortestd %k2, %k3
# else
# ifdef USE_AS_WMEMCHR
/* ecx contains not of matches. All 1s means no matches. incl will
overflow and set zeroflag if that is the case. */
incl %ecx
# else
/* If either VEC1 (eax) or VEC2-VEC4 (ecx) are not zero. Adding
to ecx is not an issue because if eax is non-zero it will be
used for returning the match. If it is zero the add does
nothing. */
addq %rax, %rcx
# endif
# endif
# ifdef USE_AS_RAWMEMCHR
jz L(loop_4x_vec) jz L(loop_4x_vec)
# else # else
kortestd %k2, %k3
jnz L(loop_4x_vec_end) jnz L(loop_4x_vec_end)
subq $-(VEC_SIZE * 4), %rdi subq $-(VEC_SIZE * 4), %rdi
@ -288,10 +371,11 @@ L(loop_4x_vec):
/* Fall through into less than 4 remaining vectors of length case. /* Fall through into less than 4 remaining vectors of length case.
*/ */
VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 VPCMP $0, BASE_OFFSET(%rdi), %YMMMATCH, %k0
addq $(BASE_OFFSET - VEC_SIZE), %rdi
kmovd %k0, %eax kmovd %k0, %eax
addq $(VEC_SIZE * 3), %rdi VZEROUPPER
.p2align 4
L(last_4x_vec_or_less): L(last_4x_vec_or_less):
/* Check if first VEC contained match. */ /* Check if first VEC contained match. */
testl %eax, %eax testl %eax, %eax
@ -338,73 +422,78 @@ L(loop_4x_vec_end):
/* rawmemchr will fall through into this if match was found in /* rawmemchr will fall through into this if match was found in
loop. */ loop. */
# if defined USE_IN_RTM || defined USE_AS_WMEMCHR
/* k1 has not of matches with VEC1. */ /* k1 has not of matches with VEC1. */
kmovd %k1, %eax kmovd %k1, %eax
# ifdef USE_AS_WMEMCHR # ifdef USE_AS_WMEMCHR
subl $((1 << CHAR_PER_VEC) - 1), %eax subl $((1 << CHAR_PER_VEC) - 1), %eax
# else # else
incl %eax incl %eax
# endif
# else
/* eax already has matches for VEC1. */
testl %eax, %eax
# endif # endif
jnz L(last_vec_x1_return) jnz L(last_vec_x1_return)
# ifdef USE_IN_RTM
VPCMP $0, %YMM2, %YMMZERO, %k0 VPCMP $0, %YMM2, %YMMZERO, %k0
kmovd %k0, %eax kmovd %k0, %eax
# else
vpmovmskb %ymm2, %eax
# endif
testl %eax, %eax testl %eax, %eax
jnz L(last_vec_x2_return) jnz L(last_vec_x2_return)
# ifdef USE_IN_RTM
kmovd %k2, %eax kmovd %k2, %eax
testl %eax, %eax testl %eax, %eax
jnz L(last_vec_x3_return) jnz L(last_vec_x3_return)
kmovd %k3, %eax kmovd %k3, %eax
tzcntl %eax, %eax tzcntl %eax, %eax
# ifdef USE_AS_RAWMEMCHR leaq (VEC_SIZE * 3 + RET_OFFSET)(%rdi, %rax, CHAR_SIZE), %rax
leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
# else # else
leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax vpmovmskb %ymm3, %eax
/* Combine matches in VEC3 (eax) with matches in VEC4 (ecx). */
salq $VEC_SIZE, %rcx
orq %rcx, %rax
tzcntq %rax, %rax
leaq (VEC_SIZE * 2 + RET_OFFSET)(%rdi, %rax), %rax
VZEROUPPER
# endif # endif
ret ret
.p2align 4 .p2align 4
L(last_vec_x1_return): L(last_vec_x1_return):
tzcntl %eax, %eax tzcntl %eax, %eax
# ifdef USE_AS_RAWMEMCHR # if defined USE_AS_WMEMCHR || RET_OFFSET != 0
# ifdef USE_AS_WMEMCHR
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
leaq (%rdi, %rax, CHAR_SIZE), %rax leaq RET_OFFSET(%rdi, %rax, CHAR_SIZE), %rax
# else
addq %rdi, %rax
# endif
# else # else
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ addq %rdi, %rax
leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
# endif # endif
VZEROUPPER
ret ret
.p2align 4 .p2align 4
L(last_vec_x2_return): L(last_vec_x2_return):
tzcntl %eax, %eax tzcntl %eax, %eax
# ifdef USE_AS_RAWMEMCHR /* NB: Multiply bytes by RET_SCALE to get the wchar_t count
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ if relevant (RET_SCALE = CHAR_SIZE if USE_AS_WMEMCHAR and
leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax USE_IN_RTM are both defined. Otherwise RET_SCALE = 1. */
# else leaq (VEC_SIZE + RET_OFFSET)(%rdi, %rax, RET_SCALE), %rax
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ VZEROUPPER
leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax
# endif
ret ret
# ifdef USE_IN_RTM
.p2align 4 .p2align 4
L(last_vec_x3_return): L(last_vec_x3_return):
tzcntl %eax, %eax tzcntl %eax, %eax
# ifdef USE_AS_RAWMEMCHR
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax leaq (VEC_SIZE * 2 + RET_OFFSET)(%rdi, %rax, CHAR_SIZE), %rax
# else
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax
# endif
ret ret
# endif
# ifndef USE_AS_RAWMEMCHR # ifndef USE_AS_RAWMEMCHR
L(last_4x_vec_or_less_cmpeq): L(last_4x_vec_or_less_cmpeq):

View File

@ -24,7 +24,7 @@
# undef memchr # undef memchr
# define SYMBOL_NAME memchr # define SYMBOL_NAME memchr
# include "ifunc-avx2.h" # include "ifunc-evex.h"
libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR ()); libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR ());
strong_alias (memchr, __memchr) strong_alias (memchr, __memchr)

View File

@ -0,0 +1,3 @@
#define MEMCHR __rawmemchr_evex_rtm
#define USE_AS_RAWMEMCHR 1
#include "memchr-evex-rtm.S"

View File

@ -26,7 +26,7 @@
# undef __rawmemchr # undef __rawmemchr
# define SYMBOL_NAME rawmemchr # define SYMBOL_NAME rawmemchr
# include "ifunc-avx2.h" # include "ifunc-evex.h"
libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
IFUNC_SELECTOR ()); IFUNC_SELECTOR ());

View File

@ -0,0 +1,3 @@
#define MEMCHR __wmemchr_evex_rtm
#define USE_AS_WMEMCHR 1
#include "memchr-evex-rtm.S"

View File

@ -26,7 +26,7 @@
# undef __wmemchr # undef __wmemchr
# define SYMBOL_NAME wmemchr # define SYMBOL_NAME wmemchr
# include "ifunc-avx2.h" # include "ifunc-evex.h"
libc_ifunc_redirected (__redirect_wmemchr, __wmemchr, IFUNC_SELECTOR ()); libc_ifunc_redirected (__redirect_wmemchr, __wmemchr, IFUNC_SELECTOR ());
weak_alias (__wmemchr, wmemchr) weak_alias (__wmemchr, wmemchr)