Add a simple rawmemchr implementation. Use strlen for rawmemchr(s, '\0') as it

is the fastest way to search for '\0'.  Otherwise use memchr with an infinite
size.  This is 3x faster on benchtests for large sizes.  Passes GLIBC tests.

	* sysdeps/aarch64/rawmemchr.S (__rawmemchr): New file.
	* sysdeps/aarch64/strlen.S (__strlen): Change to __strlen to avoid PLT.
This commit is contained in:
Wilco Dijkstra 2016-06-20 17:48:20 +01:00
parent b998e16e71
commit 58ec4fb881
3 changed files with 50 additions and 2 deletions

View File

@ -1,3 +1,8 @@
2016-06-20 Wilco Dijkstra <wdijkstr@arm.com>
* sysdeps/aarch64/rawmemchr.S (__rawmemchr): New file.
* sysdeps/aarch64/strlen.S (__strlen): Change to __strlen to avoid PLT.
2016-06-20 Wilco Dijkstra <wdijkstr@arm.com>
* sysdeps/aarch64/memcpy.S (memcpy):

View File

@ -0,0 +1,42 @@
/* rawmemchr - find a character in a memory zone
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
/* Special case rawmemchr (s, 0) as strlen, otherwise tailcall memchr.
Call strlen without setting up a full frame - it preserves x14/x15.
*/
ENTRY (__rawmemchr)
cbz w1, L(do_strlen)
mov x2, -1
b __memchr
L(do_strlen):
mov x15, x30
cfi_return_column (x15)
mov x14, x0
bl __strlen
add x0, x14, x0
ret x15
END (__rawmemchr)
weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)

View File

@ -84,7 +84,7 @@
whether the first fetch, which may be misaligned, crosses a page
boundary. */
ENTRY_ALIGN (strlen, 6)
ENTRY_ALIGN (__strlen, 6)
and tmp1, srcin, MIN_PAGE_SIZE - 1
mov zeroones, REP8_01
cmp tmp1, MIN_PAGE_SIZE - 16
@ -213,5 +213,6 @@ L(page_cross):
csel data1, data1, tmp4, eq
csel data2, data2, tmp2, eq
b L(page_cross_entry)
END (strlen)
END (__strlen)
weak_alias (__strlen, strlen)
libc_hidden_builtin_def (strlen)