mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-11 05:40:06 +00:00
AArch64: Add memset_zva64
Add a specialized memset for the common ZVA size of 64 to avoid the
overhead of reading the ZVA size. Since the code is identical to
__memset_falkor, remove the latter.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 3d7090f14b
)
This commit is contained in:
parent
7e999181c2
commit
ff17116c1e
@ -101,19 +101,19 @@ L(tail64):
|
||||
ret
|
||||
|
||||
L(try_zva):
|
||||
#ifdef ZVA_MACRO
|
||||
zva_macro
|
||||
#else
|
||||
#ifndef ZVA64_ONLY
|
||||
.p2align 3
|
||||
mrs tmp1, dczid_el0
|
||||
tbnz tmp1w, 4, L(no_zva)
|
||||
and tmp1w, tmp1w, 15
|
||||
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
||||
b.ne L(zva_128)
|
||||
|
||||
nop
|
||||
#endif
|
||||
/* Write the first and last 64 byte aligned block using stp rather
|
||||
than using DC ZVA. This is faster on some cores.
|
||||
*/
|
||||
.p2align 4
|
||||
L(zva_64):
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
@ -123,7 +123,6 @@ L(zva_64):
|
||||
sub count, dstend, dst /* Count is now 128 too large. */
|
||||
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||
add dst, dst, 128
|
||||
nop
|
||||
1: dc zva, dst
|
||||
add dst, dst, 64
|
||||
subs count, count, 64
|
||||
@ -134,6 +133,7 @@ L(zva_64):
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
#ifndef ZVA64_ONLY
|
||||
.p2align 3
|
||||
L(zva_128):
|
||||
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
||||
|
@ -12,10 +12,10 @@ sysdep_routines += \
|
||||
memmove_mops \
|
||||
memset_a64fx \
|
||||
memset_emag \
|
||||
memset_falkor \
|
||||
memset_generic \
|
||||
memset_kunpeng \
|
||||
memset_mops \
|
||||
memset_zva64 \
|
||||
strlen_asimd \
|
||||
strlen_generic \
|
||||
# sysdep_routines
|
||||
|
@ -59,9 +59,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
|
||||
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
|
||||
IFUNC_IMPL (i, name, memset,
|
||||
/* Enable this on non-falkor processors too so that other cores
|
||||
can do a comparative analysis with __memset_generic. */
|
||||
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
|
||||
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64)
|
||||
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
|
||||
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
|
||||
#if HAVE_AARCH64_SVE_ASM
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
extern __typeof (__redirect_memset) __libc_memset;
|
||||
|
||||
extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
|
||||
extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden;
|
||||
extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
|
||||
extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
|
||||
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
|
||||
@ -47,18 +47,17 @@ select_memset_ifunc (void)
|
||||
{
|
||||
if (IS_A64FX (midr) && zva_size == 256)
|
||||
return __memset_a64fx;
|
||||
return __memset_generic;
|
||||
}
|
||||
|
||||
if (IS_KUNPENG920 (midr))
|
||||
return __memset_kunpeng;
|
||||
|
||||
if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64)
|
||||
return __memset_falkor;
|
||||
|
||||
if (IS_EMAG (midr))
|
||||
return __memset_emag;
|
||||
|
||||
if (zva_size == 64)
|
||||
return __memset_zva64;
|
||||
|
||||
return __memset_generic;
|
||||
}
|
||||
|
||||
|
@ -1,54 +0,0 @@
|
||||
/* Memset for falkor.
|
||||
Copyright (C) 2017-2021 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library. If not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <memset-reg.h>
|
||||
|
||||
/* Reading dczid_el0 is expensive on falkor so move it into the ifunc
|
||||
resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to
|
||||
use this function only when ZVA is enabled. */
|
||||
|
||||
#if IS_IN (libc)
|
||||
.macro zva_macro
|
||||
.p2align 4
|
||||
/* Write the first and last 64 byte aligned block using stp rather
|
||||
than using DC ZVA. This is faster on some cores. */
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
bic dst, dst, 63
|
||||
stp q0, q0, [dst, 64]
|
||||
stp q0, q0, [dst, 96]
|
||||
sub count, dstend, dst /* Count is now 128 too large. */
|
||||
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||
add dst, dst, 128
|
||||
1: dc zva, dst
|
||||
add dst, dst, 64
|
||||
subs count, count, 64
|
||||
b.hi 1b
|
||||
stp q0, q0, [dst, 0]
|
||||
stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
.endm
|
||||
|
||||
# define ZVA_MACRO zva_macro
|
||||
# define MEMSET __memset_falkor
|
||||
# include <sysdeps/aarch64/memset.S>
|
||||
#endif
|
27
sysdeps/aarch64/multiarch/memset_zva64.S
Normal file
27
sysdeps/aarch64/multiarch/memset_zva64.S
Normal file
@ -0,0 +1,27 @@
|
||||
/* Optimized memset for zva size = 64.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library. If not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
#define ZVA64_ONLY 1
|
||||
#define MEMSET __memset_zva64
|
||||
#undef libc_hidden_builtin_def
|
||||
#define libc_hidden_builtin_def(X)
|
||||
|
||||
#include "../memset.S"
|
Loading…
Reference in New Issue
Block a user