From 9f1151705e3d861a4d6d680e74158cd708119053 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 3 Jan 2012 07:54:15 -0500 Subject: [PATCH] Optimize regex a bit --- ChangeLog | 4 ++++ posix/regcomp.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 737c4ba196..73b1d419bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2012-01-03 Ulrich Drepper + + * posix/regcomp.c (init_word_char): Optimize a bit for sane encodings. + 2012-01-01 Ulrich Drepper * posix/getconf.c: Update copyright year. diff --git a/posix/regcomp.c b/posix/regcomp.c index 34ee845081..6771dbb299 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2007,2009,2010,2011 Free Software Foundation, Inc. + Copyright (C) 2002-2007,2009,2010,2011,2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -926,10 +926,38 @@ static void internal_function init_word_char (re_dfa_t *dfa) { - int i, j, ch; dfa->word_ops_used = 1; - for (i = 0, ch = 0; i < BITSET_WORDS; ++i) - for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + int i = 0; + int ch = 0; + if (BE (dfa->map_notascii == 0, 1)) + { + if (sizeof (dfa->word_char[0]) == 8) + { + dfa->word_char[0] = UINT64_C (0x03ff000000000000); + dfa->word_char[1] = UINT64_C (0x07fffffe87fffffe); + i = 2; + } + else if (sizeof (dfa->word_char[0]) == 4) + { + dfa->word_char[0] = UINT32_C (0x00000000); + dfa->word_char[1] = UINT32_C (0x03ff0000); + dfa->word_char[2] = UINT32_C (0x87fffffe); + dfa->word_char[3] = UINT32_C (0x07fffffe); + i = 4; + } + else + abort (); + ch = 128; + + if (BE (dfa->is_utf8, 1)) + { + memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8); + return; + } + } + + for (; i < BITSET_WORDS; ++i) + for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') dfa->word_char[i] |= (bitset_word_t) 1 << j; }