glibc/sysdeps/i386/i586/strlen.S
Roland McGrath 7a12c6bba7 Fri May 3 13:32:08 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu>
* intl/Makefile (CPPFLAGS): Change $(nlsdir) to $(i18ndir) in
	LOCALE_ALIAS_PATH.

Fri May  3 03:14:02 1996  Ulrich Drepper  <drepper@cygnus.com>

	* intl/Makefile (routines): Add l10nflist and explodename.
	(distribute): Add loadinfo.h and locale.alias.
	(install-others): New variable to install locale.alias.

	* intl/dcgettext.c, intl/finddomain.c, intl/gettextP.h,
	intl/loadmsgcat.c: Adapt for upcoming gettext-0.10.13.  Some code
	is now shared with the locale implementation.

	* intl/explodename.c, intl/l10nflist.c, intl/loadinfo.h: New file.
        Extracted from finddomain.c.  This is also used in the locale
        implementation.

	* intl/locale.alias: New file.  Locale alias database compatible
        with X Window System's locale alias file.  Can now be used in
        locale and gettext code.

	* libio/stdio.h: Add prototypes for asprint and vasprintf.

	* locale/C-collate.c, locale/C-ctype.c, locale/C-messages.c,
	locale/C-monetary.c, locale/C-numeric.c, locale/C-time.c: Add new
	field in structure with name of locale ("C" in this case).

	* locale/Makefile (routines): Add findlocale.

	* locale/findlocale.c: New file.  Instead of trying to load the
        directly described file we now try to be much smarter when this
        fails.  Use the same code as gettext does.

	* locale/loadlocale.c, locale/setlocale.c: Rewrite to know about
        new loading scheme.

	* locale/localeinfo.h: Adapt prototypes and declarations for new
	setlocale implementation.  Remove definition of u32_t type.  We
	now use u_int32_t from <sys/types.h>.

	* locale/programs/charset.h (ILLEGAL_CHAR_VALUE): Provide type
        with constant.

	* locale/programs/config.h, locale/lc-collate.c,
        locale/localeinfo.h, locale/programs/ld-collate.c,
        locale/programs/ld-ctype.c, locale/programs/ld-messages.c,
        locale/programs/ld-monetary.c, locale/programs/ld-numeric.c,
        locale/programs/ld-time.c, locale/weight.h, string/strcoll.c:
        Change to use u_int32_t and u_int16_t.

	* locale/programs/localedef.c (construct_output_path): Change name
        of output locale to contain normalized form of the character set
        portion.

	* string/Makefile (routines): Add agrz-ctsep and argz-next.
	(tests): Add tst-strlen.

	* string/argz-ctsep.c: New file.  Implement reverse operation
	from argz-stringify.

	* string/argz-next.c: Non-inline version of function from argz.h.

	* string/argz.h, string/envz.h: Make usable as global header file.

	* string/envz.c: Fix declarations to use size_t where prototypes
	say so.

	* string/tst-strlen.c: New file.  Another test for critical
        situation in strlen implementations.

	* sysdeps/i386/i586/strlen.S: Fix bug with highest byte in word
        being zero.

	* wctype/test_wctype.c: Fix controlling comparison after change to
        32 bit character class array.

Fri May  3 12:53:12 1996  Roland McGrath  <roland@delasyd.gnu.ai.mit.edu>

	* sysdeps/unix/sysv/linux/sys/socket.h: Remove spurious doubled line.

Thu May  2 22:50:52 1996  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sysdeps/unix/sysv/linux/getpriority.c: New file.
	* sysdeps/unix/sysv/linux/syscalls.list: Add s_getpriority.

Thu May  2 22:41:31 1996  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sysdeps/unix/sysv/linux/m68k/fpu_control.h (_FPU_DEFAULT):
	Disable all exceptions.

Thu May  2 22:33:14 1996  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sysdeps/m68k/fpu/e_acos.c, sysdeps/m68k/fpu/e_acosf.c,
	sysdeps/m68k/fpu/e_fmod.c, sysdeps/m68k/fpu/e_fmodf.c,
	sysdeps/m68k/fpu/isinfl.c, sysdeps/m68k/fpu/isnanl.c,
	sysdeps/m68k/fpu/s_atan.c, sysdeps/m68k/fpu/s_atanf.c,
	sysdeps/m68k/fpu/s_frexp.c, sysdeps/m68k/fpu/s_frexpf.c,
	sysdeps/m68k/fpu/s_ilogb.c, sysdeps/m68k/fpu/s_ilogbf.c,
	sysdeps/m68k/fpu/s_isinf.c, sysdeps/m68k/fpu/s_isinff.c,
	sysdeps/m68k/fpu/s_ldexp.c, sysdeps/m68k/fpu/s_ldexpf.c,
	sysdeps/m68k/fpu/s_modf.c, sysdeps/m68k/fpu/s_modff.c: Don't
	define __NO_MATH_INLINES, which is already defined on command
	line.

Thu May  2 22:18:28 1996  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sysdeps/libm-ieee754/e_j0f.c (__ieee754_j0f, __ieee754_y0f):
	Replace 0x80000000 by 0x48000000.
	* sysdeps/libm-ieee754/e_j1f.c (__ieee754_j1f): Likewise.

Thu May  2 21:30:33 1996  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* sunrpc/svc_simple.c: Make global variable pl local to
	registerrpc.

Thu May  2 00:24:04 1996  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

	* time/Makefile (tz-cflags): New variable.
	(CFLAGS-tzfile.c): New variable.
	(CFLAGS-zic.c): Add $(tz-cflags).
	(tz-cc): Remove variable.
	($(objpfx)tzfile.o, $(objpfx)zic.o): Remove targets.

	* sysdeps/mach/hurd/getcwd.c: Jump out of both loops when we find a
	name, instead of checking for reaching end of buffer, which happens
	when the match is the last entry in the buffer.
1996-05-03 17:44:31 +00:00

183 lines
5.5 KiB
ArmAsm

/* strlen -- Compute length og NUL terminated string.
Highly optimized version for ix86, x>=5.
Copyright (C) 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <sysdep.h>
/* This version is especially optimized for the i586 (and following?)
processors. This is mainly done by using the two pipelines. The
version optimized for i486 is weak in this aspect because to get
as much parallelism we have to executs some *more* instructions.
The code below is structured to reflect the pairing of the instructions
as *I think* it is. I have no processor data book to verify this.
If you find something you think is incorrect let me know. */
/* The magic value which is used throughout in the whole code. */
#define magic 0xfefefeff
/*
INPUT PARAMETERS:
str (sp + 4)
*/
.text
ENTRY(strlen)
movl 4(%esp), %eax /* get string pointer */
movl %eax, %ecx /* duplicate it */
andl $3, %ecx /* mask alignment bits */
jz L11 /* aligned => start loop */
cmpb %ch, (%eax) /* is byte NUL? */
je L2 /* yes => return */
incl %eax /* increment pointer */
cmpl $3, %ecx /* was alignment = 3? */
je L11 /* yes => now it is aligned and start loop */
cmpb %ch, (%eax) /* is byte NUL? */
je L2 /* yes => return */
incl %eax /* increment pointer */
cmpl $2, %ecx /* was alignment = 2? */
je L11 /* yes => now it is aligned and start loop */
cmpb %ch, (%eax) /* is byte NUL? */
je L2 /* yes => return */
incl %eax /* increment pointer */
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
change any of the hole bits of LONGWORD.
1) Is this safe? Will it catch all the zero bytes?
Suppose there is a byte with all zeros. Any carry bits
propagating from its left will fall into the hole at its
least significant bit and stop. Since there will be no
carry from its most significant bit, the LSB of the
byte to the left will be unchanged, and the zero will be
detected.
2) Is this worthwhile? Will it ignore everything except
zero bytes? Suppose every byte of LONGWORD has a bit set
somewhere. There will be a carry into bit 8. If bit 8
is set, this will carry into bit 16. If bit 8 is clear,
one of bits 9-15 must be set, so there will be a carry
into bit 16. Similarly, there will be a carry into bit
24. If one of bits 24-31 is set, there will be a carry
into bit 32 (=carry flag), so all of the hole bits will
be changed. */
L11: xorl %edx, %edx /* We need %edx == 0 for later */
L1:
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L3 /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
jne L3 /* yes => determine byte */
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L3 /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
jne L3 /* yes => determine byte */
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L3 /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
jne L3 /* yes => determine byte */
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L3 /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
je L1 /* no => start loop again */
L3: subl $4, %eax /* correct too early pointer increment */
subl $magic, %ecx
cmpb $0, %cl /* lowest byte NUL? */
jz L2 /* yes => return */
inc %eax /* increment pointer */
testb %ch, %ch /* second byte NUL? */
jz L2 /* yes => return */
shrl $16, %ecx /* make upper bytes accessible */
incl %eax /* increment pointer */
cmpb $0, %cl /* is third byte NUL? */
jz L2 /* yes => return */
incl %eax /* increment pointer */
L2: subl 4(%esp), %eax /* now compute the length as difference
between start and terminating NUL
character */
ret