Detect EOL on-the-fly in strstr, strcasestr and memmem.

This commit is contained in:
Maxim Kuvyrkov 2012-05-28 22:50:15 -07:00
parent 20a71f2c8a
commit 400726deef
7 changed files with 108 additions and 15 deletions

View File

@ -1,3 +1,15 @@
2012-08-21 Maxim Kuvyrkov <maxim@codesourcery.com>
[BZ #11607]
* NEWS: Add an entry.
* string/str-two-way.h (AVAILABLE1, AVAILABLE2, RET0_IF_0): New macros,
define their defaults.
(two_way_short_needle): Detect end-of-string on-the-fly.
* string/strcasestr.c, string/strstr.c (AVAILABLE): Update.
(AVAILABLE1, AVAILABLE2, RET0_IF_0, AVAILABLE_USES_J): Define.
* string/bug-strcasestr1.c: New test.
* string/Makefile: Run it.
2012-08-21 Maxim Kuvyrkov <maxim@codesourcery.com>
[BZ #11607]

7
NEWS
View File

@ -9,8 +9,8 @@ Version 2.17
* The following bugs are resolved with this release:
6778, 6808, 9685, 13717, 13939, 14042, 14090, 14166, 14150, 14151, 14154,
14157, 14166, 14173, 14195, 14283, 14298, 14303, 14307, 14328, 14331,
6778, 6808, 9685, 11607, 13717, 13939, 14042, 14090, 14166, 14150, 14151,
14154, 14157, 14166, 14173, 14195, 14283, 14298, 14303, 14307, 14328, 14331,
14336, 14337, 14347, 14349
* Support for STT_GNU_IFUNC symbols added for s390 and s390x.
@ -25,6 +25,9 @@ Version 2.17
* SystemTap static probes have been added into the dynamic linker.
Implemented by Gary Benson.
* Optimizations of string functions strstr, strcasestr and memmem.
Implemented by Maxim Kuvyrkov.
* The minimum Linux kernel version that this version of the GNU C Library
can be used with is 2.6.16.

View File

@ -56,7 +56,7 @@ tests := tester inl-tester noinl-tester testcopy test-ffs \
tst-strtok tst-strxfrm bug-strcoll1 tst-strfry \
bug-strtok1 $(addprefix test-,$(strop-tests)) \
bug-envz1 tst-strxfrm2 tst-endian tst-svc2 \
bug-strstr1 bug-strchr1 tst-strtok_r
bug-strstr1 bug-strcasestr1 bug-strchr1 tst-strtok_r
include ../Rules
@ -74,6 +74,7 @@ CFLAGS-stratcliff.c = -fno-builtin
CFLAGS-test-ffs.c = -fno-builtin
CFLAGS-tst-inlcall.c = -fno-builtin
CFLAGS-bug-strstr1.c = -fno-builtin
CFLAGS-bug-strcasestr1.c = -fno-builtin
ifeq ($(cross-compiling),no)
tests: $(objpfx)tst-svc.out

39
string/bug-strcasestr1.c Normal file
View File

@ -0,0 +1,39 @@
/* Test for non-submitted strcasestr bug.
Copyright (C) 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <string.h>
#define TEST_FUNCTION do_test ()
static int
do_test (void)
{
const char haystack[] = "AOKB";
const char needle[] = "OK";
const char *sub = strcasestr (haystack, needle);
if (sub == NULL)
{
fprintf (stderr, "BUG: didn't find \"%s\" in \"%s\"\n", needle, haystack);
return 1;
}
return 0;
}
#include "../test-skeleton.c"

View File

@ -75,6 +75,16 @@
# define CMP_FUNC memcmp
#endif
#ifndef AVAILABLE1
# define AVAILABLE1(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
#endif
#ifndef AVAILABLE2
# define AVAILABLE2(h, h_l, j, n_l) (1)
#endif
#ifndef RET0_IF_0
# define RET0_IF_0(a) /* nothing */
#endif
/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
Return the index of the first byte in the right half, and set
*PERIOD to the global period of the right half.
@ -266,37 +276,58 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
required, and any mismatch results in a maximal shift. */
period = MAX (suffix, needle_len - suffix) + 1;
j = 0;
while (AVAILABLE (haystack, haystack_len, j, needle_len))
while (AVAILABLE1 (haystack, haystack_len, j, needle_len))
{
unsigned char haystack_char;
/* TODO: The first-character loop can be sped up by adapting
longword-at-a-time implementation of memchr/strchr. */
if (needle_suffix
!= CANON_ELEMENT (haystack[suffix + j]))
!= (haystack_char = CANON_ELEMENT (haystack[suffix + j])))
{
RET0_IF_0 (haystack_char);
++j;
continue;
}
/* Scan for matches in right half. */
i = suffix + 1;
while (i < needle_len && (CANON_ELEMENT (needle[i])
== CANON_ELEMENT (haystack[i + j])))
++i;
while (i < needle_len)
{
if (CANON_ELEMENT (needle[i])
!= (haystack_char = CANON_ELEMENT (haystack[i + j])))
{
RET0_IF_0 (haystack_char);
break;
}
++i;
}
if (needle_len <= i)
{
/* Scan for matches in left half. */
i = suffix - 1;
while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
== CANON_ELEMENT (haystack[i + j])))
--i;
while (i != SIZE_MAX)
{
if (CANON_ELEMENT (needle[i])
!= (haystack_char = CANON_ELEMENT (haystack[i + j])))
{
RET0_IF_0 (haystack_char);
break;
}
--i;
}
if (i == SIZE_MAX)
return (RETURN_TYPE) (haystack + j);
j += period;
}
else
j += i - suffix + 1;
if (!AVAILABLE2 (haystack, haystack_len, j, needle_len))
break;
}
}
ret0: __attribute__ ((unused))
return NULL;
}
@ -433,6 +464,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
}
#undef AVAILABLE
#undef AVAILABLE1
#undef AVAILABLE2
#undef CANON_ELEMENT
#undef CMP_FUNC
#undef RET0_IF_0
#undef RETURN_TYPE

View File

@ -1,6 +1,5 @@
/* Return the offset of one string within another.
Copyright (C) 1994, 1996-2000, 2004, 2008, 2009, 2010
Free Software Foundation, Inc.
Copyright (C) 1994-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -44,6 +43,9 @@
#define AVAILABLE(h, h_l, j, n_l) \
(!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
&& ((h_l) = (j) + (n_l)))
#define AVAILABLE1(h, h_l, j, n_l) (true)
#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
#define RET0_IF_0(a) if (!a) goto ret0
#define CANON_ELEMENT(c) TOLOWER (c)
#define CMP_FUNC(p1, p2, l) \
__strncasecmp ((const char *) (p1), (const char *) (p2), l)

View File

@ -1,6 +1,5 @@
/* Return the offset of one string within another.
Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009
Free Software Foundation, Inc.
Copyright (C) 1994-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -36,6 +35,9 @@
#define AVAILABLE(h, h_l, j, n_l) \
(!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
&& ((h_l) = (j) + (n_l)))
#define AVAILABLE1(h, h_l, j, n_l) (true)
#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
#define RET0_IF_0(a) if (!a) goto ret0
#include "str-two-way.h"
#undef strstr