Loongarch: Add ifunc support and add different versions of strlen

strlen-lasx is implemeted by LASX simd instructions(256bit)
strlen-lsx is implemeted by LSX simd instructions(128bit)
strlen-align is implemented by LA basic instructions and never use unaligned memory acess
This commit is contained in:
dengjianbo 2023-08-08 14:15:44 +08:00 committed by caiyinyu
parent cb7954c4c2
commit 135407f431
9 changed files with 418 additions and 0 deletions

View File

@ -0,0 +1,7 @@
ifeq ($(subdir),string)
sysdep_routines += \
strlen-aligned \
strlen-lsx \
strlen-lasx \
# sysdep_routines
endif

View File

@ -0,0 +1,41 @@
/* Enumerate available IFUNC implementations of a function LoongArch64 version.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <assert.h>
#include <string.h>
#include <wchar.h>
#include <ldsodefs.h>
#include <ifunc-impl-list.h>
#include <stdio.h>
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t max)
{
size_t i = max;
IFUNC_IMPL (i, name, strlen,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
#endif
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
)
return i;
}

View File

@ -0,0 +1,40 @@
/* Common definition for strlen ifunc selections.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <ldsodefs.h>
#include <ifunc-init.h>
#if !defined __loongarch_soft_float
extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
#endif
extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
#if !defined __loongarch_soft_float
if (SUPPORT_LASX)
return OPTIMIZE (lasx);
else if (SUPPORT_LSX)
return OPTIMIZE (lsx);
else
#endif
return OPTIMIZE (aligned);
}

View File

@ -0,0 +1,100 @@
/* Optimized strlen implementation using basic Loongarch instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc)
# define STRLEN __strlen_aligned
#else
# define STRLEN strlen
#endif
LEAF(STRLEN, 6)
move a1, a0
bstrins.d a0, zero, 2, 0
lu12i.w a2, 0x01010
li.w t0, -1
ld.d t2, a0, 0
andi t1, a1, 0x7
ori a2, a2, 0x101
slli.d t1, t1, 3
bstrins.d a2, a2, 63, 32
sll.d t1, t0, t1
slli.d t3, a2, 7
nor a3, zero, t3
orn t2, t2, t1
sub.d t0, t2, a2
nor t1, t2, a3
and t0, t0, t1
bnez t0, L(count_pos)
addi.d a0, a0, 8
L(loop_16_7bit):
ld.d t2, a0, 0
sub.d t1, t2, a2
and t0, t1, t3
bnez t0, L(more_check)
ld.d t2, a0, 8
sub.d t1, t2, a2
and t0, t1, t3
addi.d a0, a0, 16
beqz t0, L(loop_16_7bit)
addi.d a0, a0, -8
L(more_check):
nor t0, t2, a3
and t0, t1, t0
bnez t0, L(count_pos)
addi.d a0, a0, 8
L(loop_16_8bit):
ld.d t2, a0, 0
sub.d t1, t2, a2
nor t0, t2, a3
and t0, t0, t1
bnez t0, L(count_pos)
ld.d t2, a0, 8
addi.d a0, a0, 16
sub.d t1, t2, a2
nor t0, t2, a3
and t0, t0, t1
beqz t0, L(loop_16_8bit)
addi.d a0, a0, -8
L(count_pos):
ctz.d t1, t0
sub.d a0, a0, a1
srli.d t1, t1, 3
add.d a0, a0, t1
jr ra
END(STRLEN)
libc_hidden_builtin_def (STRLEN)

View File

@ -0,0 +1,63 @@
/* Optimized strlen implementation using loongarch LASX SIMD instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc) && !defined __loongarch_soft_float
# define STRLEN __strlen_lasx
LEAF(STRLEN, 6)
move a1, a0
bstrins.d a0, zero, 4, 0
li.d t1, -1
xvld xr0, a0, 0
xvmsknz.b xr0, xr0
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0 # sign extend
sra.w t0, t0, a1
beq t0, t1, L(loop)
cto.w a0, t0
jr ra
L(loop):
xvld xr0, a0, 32
addi.d a0, a0, 32
xvsetanyeqz.b fcc0, xr0
bceqz fcc0, L(loop)
xvmsknz.b xr0, xr0
sub.d a0, a0, a1
xvpickve.w xr1, xr0, 4
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
cto.w t0, t0
add.d a0, a0, t0
jr ra
END(STRLEN)
libc_hidden_builtin_def (STRLEN)
#endif

View File

@ -0,0 +1,71 @@
/* Optimized strlen implementation using Loongarch LSX SIMD instructions.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
#include <sys/regdef.h>
#include <sys/asm.h>
#if IS_IN (libc) && !defined __loongarch_soft_float
# define STRLEN __strlen_lsx
LEAF(STRLEN, 6)
move a1, a0
bstrins.d a0, zero, 4, 0
vld vr0, a0, 0
vld vr1, a0, 16
li.d t1, -1
vmsknz.b vr0, vr0
vmsknz.b vr1, vr1
vilvl.h vr0, vr1, vr0
movfr2gr.s t0, fa0
sra.w t0, t0, a1
beq t0, t1, L(loop)
cto.w a0, t0
jr ra
nop
nop
nop
L(loop):
vld vr0, a0, 32
vld vr1, a0, 48
addi.d a0, a0, 32
vmin.bu vr2, vr0, vr1
vsetanyeqz.b fcc0, vr2
bceqz fcc0, L(loop)
vmsknz.b vr0, vr0
vmsknz.b vr1, vr1
vilvl.h vr0, vr1, vr0
sub.d a0, a0, a1
movfr2gr.s t0, fa0
cto.w t0, t0
add.d a0, a0, t0
jr ra
END(STRLEN)
libc_hidden_builtin_def (STRLEN)
#endif

View File

@ -0,0 +1,37 @@
/* Multiple versions of strlen.
All versions must be listed in ifunc-impl-list.c.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
#if IS_IN (libc)
# define strlen __redirect_strlen
# include <string.h>
# undef strlen
# define SYMBOL_NAME strlen
# include "ifunc-strlen.h"
libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
# ifdef SHARED
__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
__attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen);
# endif
#endif

View File

@ -89,6 +89,14 @@
#define fs5 $f29
#define fs6 $f30
#define fs7 $f31
#define fcc0 $fcc0
#define fcc1 $fcc1
#define fcc2 $fcc2
#define fcc3 $fcc3
#define fcc4 $fcc4
#define fcc5 $fcc5
#define fcc6 $fcc6
#define fcc7 $fcc7
#define vr0 $vr0
#define vr1 $vr1
@ -98,6 +106,30 @@
#define vr5 $vr5
#define vr6 $vr6
#define vr7 $vr7
#define vr8 $vr8
#define vr9 $vr9
#define vr10 $vr10
#define vr11 $vr11
#define vr12 $vr12
#define vr13 $vr13
#define vr14 $vr14
#define vr15 $vr15
#define vr16 $vr16
#define vr17 $vr17
#define vr18 $vr18
#define vr19 $vr19
#define vr20 $vr20
#define vr21 $vr21
#define vr22 $vr22
#define vr23 $vr23
#define vr24 $vr24
#define vr25 $vr25
#define vr26 $vr26
#define vr27 $vr27
#define vr28 $vr28
#define vr29 $vr29
#define vr30 $vr30
#define vr31 $vr31
#define xr0 $xr0
#define xr1 $xr1
@ -107,5 +139,30 @@
#define xr5 $xr5
#define xr6 $xr6
#define xr7 $xr7
#define xr7 $xr7
#define xr8 $xr8
#define xr9 $xr9
#define xr10 $xr10
#define xr11 $xr11
#define xr12 $xr12
#define xr13 $xr13
#define xr14 $xr14
#define xr15 $xr15
#define xr16 $xr16
#define xr17 $xr17
#define xr18 $xr18
#define xr19 $xr19
#define xr20 $xr20
#define xr21 $xr21
#define xr22 $xr22
#define xr23 $xr23
#define xr24 $xr24
#define xr25 $xr25
#define xr26 $xr26
#define xr27 $xr27
#define xr28 $xr28
#define xr29 $xr29
#define xr30 $xr30
#define xr31 $xr31
#endif /* _SYS_REGDEF_H */

View File

@ -25,5 +25,7 @@
#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
#define INIT_ARCH()
#endif /* _CPU_FEATURES_LOONGARCH64_H */