mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-01 07:20:15 +00:00
219 lines
4.8 KiB
ArmAsm
219 lines
4.8 KiB
ArmAsm
/* memchr - find a character in a memory zone using base integer registers
|
|
|
|
Copyright (C) 2018-2024 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library. If not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#include <sysdep.h>
|
|
|
|
/* Assumptions:
|
|
*
|
|
* ARMv8-a, AArch64
|
|
* Use base integer registers.
|
|
*/
|
|
|
|
/* Arguments and results. */
|
|
#define srcin x0
|
|
#define chrin x1
|
|
#define cntin x2
|
|
|
|
#define result x0
|
|
|
|
#define repchr x1
|
|
|
|
#define tmp1 x2
|
|
#define tmp2 x3
|
|
#define tmp3 x4
|
|
#define tmp4 x5
|
|
|
|
#define src x6
|
|
#define srcend x7
|
|
#define srcend16 x8
|
|
|
|
#define anymore x9
|
|
|
|
#define zeroones x10
|
|
|
|
#define data1 x11
|
|
#define data2 x12
|
|
|
|
#define has_chr1 x13
|
|
#define has_chr2 x14
|
|
|
|
#define REP8_01 0x0101010101010101
|
|
#define REP8_7f 0x7f7f7f7f7f7f7f7f
|
|
|
|
|
|
ENTRY (__memchr_nosimd)
|
|
|
|
PTR_ARG (0)
|
|
SIZE_ARG (2)
|
|
|
|
/* Do not dereference srcin if no bytes to compare. */
|
|
cbz cntin, L(none_chr)
|
|
|
|
/* Start address is 16-byte aligned or not? */
|
|
tst srcin, 15
|
|
bic src, srcin, 15
|
|
|
|
mov zeroones, REP8_01
|
|
and repchr, chrin, 255
|
|
/* Generate a qword integer as |c|c|c|c|c|c|c|c|. */
|
|
mul repchr, repchr, zeroones
|
|
|
|
add srcend, srcin, cntin
|
|
/*
|
|
* srcend16 is address of the block following the last block.
|
|
*
|
|
* [A block is 16-byte aligned and sized.]
|
|
*/
|
|
add srcend16, srcend, 15
|
|
bic srcend16, srcend16, 15
|
|
|
|
b.eq L(loop)
|
|
|
|
/* Load the first block containing start address. */
|
|
ldp data1, data2, [src], 16
|
|
|
|
lsl tmp1, srcin, 3
|
|
mov tmp2, ~0
|
|
#ifdef __AARCH64EB__
|
|
lsr tmp3, tmp2, tmp1
|
|
#else
|
|
lsl tmp3, tmp2, tmp1
|
|
#endif
|
|
/* Start address is in the first or the second qword? */
|
|
tst srcin, 8
|
|
|
|
/*
|
|
* Transform any byte in the block to zero using XOR operation,
|
|
* if that byte equals the char to search. In this way, searching
|
|
* the char becomes detecting zero in the resulting two qwords.
|
|
*/
|
|
eor data1, data1, repchr
|
|
eor data2, data2, repchr
|
|
|
|
/*
|
|
* Set those unused bytes(before start address) to 0xff, so
|
|
* that they will not hit any zero detection.
|
|
*/
|
|
orn tmp1, data1, tmp3
|
|
orn tmp2, data2, tmp3
|
|
|
|
csinv data1, tmp1, xzr, eq
|
|
csel data2, data2, tmp2, eq
|
|
|
|
/*
|
|
* When the first and last block are the same, there are two cases:
|
|
* o. Memory range to search is just in one block.
|
|
* ( start address - end address) < 0
|
|
*
|
|
* o. Memory range is so large that end address wrap-around.
|
|
* ( start address - end address) > 0
|
|
*/
|
|
cmp srcin, srcend
|
|
ccmp src, srcend16, 0, mi
|
|
csetm anymore, ne
|
|
b L(find_chr)
|
|
|
|
.p2align 4
|
|
L(loop):
|
|
ldp data1, data2, [src], 16
|
|
|
|
subs anymore, src, srcend16
|
|
|
|
/*
|
|
* Transform any byte in the block to zero using XOR operation,
|
|
* if that byte equals the char to search.
|
|
*/
|
|
eor data1, data1, repchr
|
|
eor data2, data2, repchr
|
|
|
|
L(find_chr):
|
|
/*
|
|
* Use the following integer test to find out if any byte in a
|
|
* qword is zero. If do not contain zero-valued byte, test result
|
|
* is zero.
|
|
*
|
|
* (qword - 0x0101010101010101) & ~(qword) & 0x8080808080808080
|
|
* =
|
|
* (qword - 0x0101010101010101) & ~(qword | 0x7f7f7f7f7f7f7f7f)
|
|
*
|
|
*/
|
|
sub tmp1, data1, zeroones
|
|
sub tmp2, data2, zeroones
|
|
|
|
orr tmp3, data1, REP8_7f
|
|
orr tmp4, data2, REP8_7f
|
|
|
|
bic has_chr1, tmp1, tmp3
|
|
bic has_chr2, tmp2, tmp4
|
|
|
|
orr tmp1, has_chr1, has_chr2
|
|
ccmp tmp1, 0, 0, ne
|
|
|
|
b.eq L(loop)
|
|
|
|
cbz has_chr1, 1f
|
|
sub result, src, 16
|
|
#ifdef __AARCH64EB__
|
|
rev data1, data1
|
|
#else
|
|
rev has_chr1, has_chr1
|
|
#endif
|
|
b L(done)
|
|
|
|
1: cbz has_chr2, L(none_chr)
|
|
sub result, src, 8
|
|
#ifdef __AARCH64EB__
|
|
rev data1, data2
|
|
#else
|
|
rev has_chr1, has_chr2
|
|
#endif
|
|
|
|
L(done):
|
|
#ifdef __AARCH64EB__
|
|
/*
|
|
* For big-endian, can not directly use has_chr1/has_chr2 because
|
|
* two qwords has been reversed after loading from memory.
|
|
* Thus, have to perform char detection on two qwords again, which
|
|
* should be byte-swapped this time.
|
|
*/
|
|
sub tmp1, data1, zeroones
|
|
orr tmp3, data1, REP8_7f
|
|
bic has_chr1, tmp1, tmp3
|
|
rev has_chr1, has_chr1
|
|
#endif
|
|
|
|
/*
|
|
* If the specified char is found in a qword, the corresponding
|
|
* byte of in has_chr has value of 1, while this is only true for
|
|
* the first occurrence, not other occurrences.
|
|
*/
|
|
cmp anymore, 0
|
|
clz tmp1, has_chr1
|
|
add result, result, tmp1, lsr 3
|
|
ccmp result, srcend, 8, eq /* NZCV = 8000 */
|
|
csel result, result, xzr, mi
|
|
ret
|
|
|
|
L(none_chr):
|
|
mov result, 0
|
|
ret
|
|
|
|
END (__memchr_nosimd)
|