mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 21:10:07 +00:00
aarch64: Optimize __libc_mtag_tag_region
This is a target hook for memory tagging, the original was a naive implementation. The optimized version relies on "dc gva" to tag 64 bytes at a time for large allocations and optimizes small cases without adding too many branches. This was not benchmarked on real cpu, but expected to be faster than the naive implementation.
This commit is contained in:
parent
383bc24028
commit
23fd760add
@ -20,32 +20,94 @@
|
||||
|
||||
#ifdef USE_MTAG
|
||||
|
||||
/* Use the same register names and assignments as memset. */
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, MTE, LP64 ABI.
|
||||
*
|
||||
* Interface contract:
|
||||
* Address is 16 byte aligned and size is multiple of 16.
|
||||
* Returns the passed pointer.
|
||||
* The memory region may remain untagged if tagging is not enabled.
|
||||
*/
|
||||
.arch armv8.5-a
|
||||
.arch_extension memtag
|
||||
|
||||
/* NB, only supported on variants with 64-bit pointers. */
|
||||
|
||||
/* FIXME: This is a minimal implementation. We could do better than
|
||||
this for larger values of COUNT. */
|
||||
|
||||
#define dstin x0
|
||||
#define count x1
|
||||
#define dst x2
|
||||
#define dstend x3
|
||||
#define tmp x4
|
||||
#define zva_val x4
|
||||
|
||||
ENTRY_ALIGN(__libc_mtag_tag_region, 6)
|
||||
ENTRY (__libc_mtag_tag_region)
|
||||
PTR_ARG (0)
|
||||
SIZE_ARG (1)
|
||||
|
||||
mov dst, dstin
|
||||
L(loop):
|
||||
stg dst, [dst], #16
|
||||
subs count, count, 16
|
||||
bne L(loop)
|
||||
#if 0
|
||||
/* This is not currently needed, since for now we are only called
|
||||
to tag memory that is taggable. */
|
||||
ldg dstin, [dstin] // Recover the tag created (might be untagged).
|
||||
#endif
|
||||
add dstend, dstin, count
|
||||
|
||||
cmp count, 96
|
||||
b.hi L(set_long)
|
||||
|
||||
tbnz count, 6, L(set96)
|
||||
|
||||
/* Set 0, 16, 32, or 48 bytes. */
|
||||
lsr tmp, count, 5
|
||||
add tmp, dstin, tmp, lsl 4
|
||||
cbz count, L(end)
|
||||
stg dstin, [dstin]
|
||||
stg dstin, [tmp]
|
||||
stg dstin, [dstend, -16]
|
||||
L(end):
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
L(set96):
|
||||
st2g dstin, [dstin]
|
||||
st2g dstin, [dstin, 32]
|
||||
st2g dstin, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Size is > 96 bytes. */
|
||||
L(set_long):
|
||||
cmp count, 160
|
||||
b.lo L(no_zva)
|
||||
|
||||
#ifndef SKIP_ZVA_CHECK
|
||||
mrs zva_val, dczid_el0
|
||||
and zva_val, zva_val, 31
|
||||
cmp zva_val, 4 /* ZVA size is 64 bytes. */
|
||||
b.ne L(no_zva)
|
||||
#endif
|
||||
st2g dstin, [dstin]
|
||||
st2g dstin, [dstin, 32]
|
||||
bic dst, dstin, 63
|
||||
sub count, dstend, dst /* Count is now 64 too large. */
|
||||
sub count, count, 128 /* Adjust count and bias for loop. */
|
||||
|
||||
.p2align 4
|
||||
L(zva_loop):
|
||||
add dst, dst, 64
|
||||
dc gva, dst
|
||||
subs count, count, 64
|
||||
b.hi L(zva_loop)
|
||||
st2g dstin, [dstend, -64]
|
||||
st2g dstin, [dstend, -32]
|
||||
ret
|
||||
|
||||
L(no_zva):
|
||||
sub dst, dstin, 32 /* Dst is biased by -32. */
|
||||
sub count, count, 64 /* Adjust count for loop. */
|
||||
L(no_zva_loop):
|
||||
st2g dstin, [dst, 32]
|
||||
st2g dstin, [dst, 64]!
|
||||
subs count, count, 64
|
||||
b.hi L(no_zva_loop)
|
||||
st2g dstin, [dstend, -64]
|
||||
st2g dstin, [dstend, -32]
|
||||
ret
|
||||
|
||||
END (__libc_mtag_tag_region)
|
||||
#endif /* USE_MTAG */
|
||||
|
Loading…
Reference in New Issue
Block a user