glibc/benchtests/bench-strstr.c
Wilco Dijkstra 3ae725dfb6 Improve strstr performance
Improve strstr performance.  Strstr tends to be slow because it uses
many calls to memchr and a slow byte loop to scan for the next match.
Performance is significantly improved by using strnlen on larger blocks
and using strchr to search for the next matching character.  strcasestr
can also use strnlen to scan ahead, and memmem can use memchr to check
for the next match.

On the GLIBC bench tests the performance gains on Cortex-A72 are:
strstr: +25%
strcasestr: +4.3%
memmem: +18%

On a 256KB dataset strstr performance improves by 67%, strcasestr by 47%.

    Reviewd-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
2018-07-16 17:51:52 +01:00

180 lines
4.2 KiB
C

/* Measure strstr functions.
Copyright (C) 2013-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#define TEST_MAIN
#define TEST_NAME "strstr"
#include "bench-string.h"
#define STRSTR simple_strstr
#define libc_hidden_builtin_def(X)
#define __strnlen strnlen
#include "../string/strstr.c"
static char *
stupid_strstr (const char *s1, const char *s2)
{
ssize_t s1len = strlen (s1);
ssize_t s2len = strlen (s2);
if (s2len > s1len)
return NULL;
for (ssize_t i = 0; i <= s1len - s2len; ++i)
{
size_t j;
for (j = 0; j < s2len; ++j)
if (s1[i + j] != s2[j])
break;
if (j == s2len)
return (char *) s1 + i;
}
return NULL;
}
typedef char *(*proto_t) (const char *, const char *);
IMPL (stupid_strstr, 0)
IMPL (simple_strstr, 0)
IMPL (strstr, 1)
static void
do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
{
size_t i, iters = INNER_LOOP_ITERS;
timing_t start, stop, cur;
TIMING_NOW (start);
for (i = 0; i < iters; ++i)
{
CALL (impl, s1, s2);
}
TIMING_NOW (stop);
TIMING_DIFF (cur, start, stop);
TIMING_PRINT_MEAN ((double) cur, (double) iters);
}
static void
do_test (size_t align1, size_t align2, size_t len1, size_t len2,
int fail)
{
char *s1 = (char *) (buf1 + align1);
char *s2 = (char *) (buf2 + align2);
static const char d[] = "1234567890abcdef";
#define dl (sizeof (d) - 1)
char *ss2 = s2;
for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
{
size_t t = l > dl ? dl : l;
ss2 = mempcpy (ss2, d, t);
}
s2[len2] = '\0';
if (fail)
{
char *ss1 = s1;
for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0)
{
size_t t = l > dl ? dl : l;
memcpy (ss1, d, t);
++ss1[len2 > 7 ? 7 : len2 - 1];
ss1 += t;
}
}
else
{
memset (s1, '0', len1);
memcpy (s1 + len1 - len2, s2, len2);
}
s1[len1] = '\0';
printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
len1, len2, align1, align2, fail ? "fail" : "found");
FOR_EACH_IMPL (impl, 0)
do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
putchar ('\n');
}
static int
test_main (void)
{
test_init ();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
printf ("\t%s", impl->name);
putchar ('\n');
for (size_t klen = 2; klen < 32; ++klen)
for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen)
{
do_test (0, 0, hlen, klen, 0);
do_test (0, 0, hlen, klen, 1);
do_test (0, 3, hlen, klen, 0);
do_test (0, 3, hlen, klen, 1);
do_test (0, 9, hlen, klen, 0);
do_test (0, 9, hlen, klen, 1);
do_test (0, 15, hlen, klen, 0);
do_test (0, 15, hlen, klen, 1);
do_test (3, 0, hlen, klen, 0);
do_test (3, 0, hlen, klen, 1);
do_test (3, 3, hlen, klen, 0);
do_test (3, 3, hlen, klen, 1);
do_test (3, 9, hlen, klen, 0);
do_test (3, 9, hlen, klen, 1);
do_test (3, 15, hlen, klen, 0);
do_test (3, 15, hlen, klen, 1);
do_test (9, 0, hlen, klen, 0);
do_test (9, 0, hlen, klen, 1);
do_test (9, 3, hlen, klen, 0);
do_test (9, 3, hlen, klen, 1);
do_test (9, 9, hlen, klen, 0);
do_test (9, 9, hlen, klen, 1);
do_test (9, 15, hlen, klen, 0);
do_test (9, 15, hlen, klen, 1);
do_test (15, 0, hlen, klen, 0);
do_test (15, 0, hlen, klen, 1);
do_test (15, 3, hlen, klen, 0);
do_test (15, 3, hlen, klen, 1);
do_test (15, 9, hlen, klen, 0);
do_test (15, 9, hlen, klen, 1);
do_test (15, 15, hlen, klen, 0);
do_test (15, 15, hlen, klen, 1);
}
do_test (0, 0, page_size - 1, 16, 0);
do_test (0, 0, page_size - 1, 16, 1);
return ret;
}
#include <support/test-driver.c>