mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-23 03:10:05 +00:00
[BZ #1302]
2005-09-06 Paul Eggert <eggert@cs.ucla.edu> Ulrich Drepper <drepper@redhat.com> [BZ #1302] Change bitset word type from unsigned int to unsigned long int, as this has better performance on typical 64-bit hosts. Change bitset type name to bitset_t. * posix/regcomp.c (build_equiv_class, build_charclass): (build_range_exp, build_collating_symbol): Prefer bitset_t to re_bitset_ptr_t in prototypes, when the actual argument is a bitset. This is merely a style issue, but it makes it clearer that an entire array is expected. (re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps, lower_subexp): Adjust for new bitset_t definition. (lower_subexp, parse_bracket_exp, built_charclass_op): Likewise. * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain, bitset_not, bitset_merge, bitset_set_all, bitset_mask): Likewise. * posix/regexec.c (check_dst_limits_calc_pos_1, check_subexp_matching_top, build_trtable, group_nodes_into_DFAstates): Likewise. * posix/regcomp.c (utf8_sb_map): Don't assume initializer == 0xffffffff. * posix/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS. All uses changed. (BITSET_WORDS): Renamed from BITSET_UINTS. All uses changed. (bitset_word_t): New type, replacing 'unsigned int' for bitset uses. All uses changed. (BITSET_WORD_MAX): New macro. (bitset_set, bitset_clear, bitset_contain, bitset_empty, (bitset_set_all, bitset_copy): Adjust for bitset_t change. (bitset_empty, bitset_copy): Prefer sizeof (bitset_t) to multiplying it out ourselves. (bitset_not_merge): Remove; unused. (bitset_contain): Return bool, not unsigned int with one bit on. All callers changed. * posix/regexec.c (build_trtable): Don't assume bitset_t has no stricter alignment than re_node_set; do this by defining a new internal type struct dests_alloc and using it to allocate memory.
This commit is contained in:
parent
1873e3cd1a
commit
2c05d33f90
39
ChangeLog
39
ChangeLog
@ -1,3 +1,42 @@
|
||||
2005-09-06 Paul Eggert <eggert@cs.ucla.edu>
|
||||
Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
[BZ #1302]
|
||||
Change bitset word type from unsigned int to unsigned long int,
|
||||
as this has better performance on typical 64-bit hosts. Change
|
||||
bitset type name to bitset_t.
|
||||
* posix/regcomp.c (build_equiv_class, build_charclass):
|
||||
(build_range_exp, build_collating_symbol):
|
||||
Prefer bitset_t to re_bitset_ptr_t in prototypes, when the actual
|
||||
argument is a bitset. This is merely a style issue, but it makes
|
||||
it clearer that an entire array is expected.
|
||||
(re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps,
|
||||
lower_subexp): Adjust for new bitset_t definition.
|
||||
(lower_subexp, parse_bracket_exp, built_charclass_op): Likewise.
|
||||
* posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain,
|
||||
bitset_not, bitset_merge, bitset_set_all, bitset_mask): Likewise.
|
||||
* posix/regexec.c (check_dst_limits_calc_pos_1,
|
||||
check_subexp_matching_top, build_trtable, group_nodes_into_DFAstates):
|
||||
Likewise.
|
||||
* posix/regcomp.c (utf8_sb_map): Don't assume initializer
|
||||
== 0xffffffff.
|
||||
* posix/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS.
|
||||
All uses changed.
|
||||
(BITSET_WORDS): Renamed from BITSET_UINTS. All uses changed.
|
||||
(bitset_word_t): New type, replacing 'unsigned int' for bitset uses.
|
||||
All uses changed.
|
||||
(BITSET_WORD_MAX): New macro.
|
||||
(bitset_set, bitset_clear, bitset_contain, bitset_empty,
|
||||
(bitset_set_all, bitset_copy): Adjust for bitset_t change.
|
||||
(bitset_empty, bitset_copy):
|
||||
Prefer sizeof (bitset_t) to multiplying it out ourselves.
|
||||
(bitset_not_merge): Remove; unused.
|
||||
(bitset_contain): Return bool, not unsigned int with one bit on.
|
||||
All callers changed.
|
||||
* posix/regexec.c (build_trtable): Don't assume bitset_t has no
|
||||
stricter alignment than re_node_set; do this by defining a new
|
||||
internal type struct dests_alloc and using it to allocate memory.
|
||||
|
||||
2005-09-27 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
[BZ #1230]
|
||||
|
@ -113,21 +113,21 @@ static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
|
||||
# endif /* not RE_ENABLE_I18N */
|
||||
#endif /* not _LIBC */
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
|
||||
static reg_errcode_t build_equiv_class (bitset_t sbcset,
|
||||
re_charset_t *mbcset,
|
||||
int *equiv_class_alloc,
|
||||
const unsigned char *name);
|
||||
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
|
||||
re_bitset_ptr_t sbcset,
|
||||
bitset_t sbcset,
|
||||
re_charset_t *mbcset,
|
||||
int *char_class_alloc,
|
||||
const unsigned char *class_name,
|
||||
reg_syntax_t syntax);
|
||||
#else /* not RE_ENABLE_I18N */
|
||||
static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
|
||||
static reg_errcode_t build_equiv_class (bitset_t sbcset,
|
||||
const unsigned char *name);
|
||||
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
|
||||
re_bitset_ptr_t sbcset,
|
||||
bitset_t sbcset,
|
||||
const unsigned char *class_name,
|
||||
reg_syntax_t syntax);
|
||||
#endif /* not RE_ENABLE_I18N */
|
||||
@ -354,7 +354,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
|
||||
&& dfa->nodes[node].type == CHARACTER
|
||||
&& dfa->nodes[node].mb_partial)
|
||||
*p++ = dfa->nodes[node].opr.c;
|
||||
memset (&state, 0, sizeof (state));
|
||||
memset (&state, '\0', sizeof (state));
|
||||
if (mbrtowc (&wc, (const char *) buf, p - buf,
|
||||
&state) == p - buf
|
||||
&& (__wcrtomb ((char *) buf, towlower (wc), &state)
|
||||
@ -365,11 +365,15 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
|
||||
}
|
||||
else if (type == SIMPLE_BRACKET)
|
||||
{
|
||||
int i, j, ch;
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
|
||||
re_set_fastmap (fastmap, icase, ch);
|
||||
int i, ch;
|
||||
for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
|
||||
{
|
||||
int j;
|
||||
bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
|
||||
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
|
||||
if (w & ((bitset_word_t) 1 << j))
|
||||
re_set_fastmap (fastmap, icase, ch);
|
||||
}
|
||||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
else if (type == COMPLEX_BRACKET)
|
||||
@ -388,13 +392,11 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
|
||||
is a valid collation element, and don't catch
|
||||
'b' since 'b' is the only collation element
|
||||
which starts from 'b'. */
|
||||
int j, ch;
|
||||
const int32_t *table = (const int32_t *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if (table[ch] < 0)
|
||||
re_set_fastmap (fastmap, icase, ch);
|
||||
for (i = 0; i < SBC_MAX; ++i)
|
||||
if (table[i] < 0)
|
||||
re_set_fastmap (fastmap, icase, i);
|
||||
}
|
||||
# else
|
||||
if (dfa->mb_cur_max > 1)
|
||||
@ -581,14 +583,10 @@ weak_alias (__regerror, regerror)
|
||||
UTF-8 is used. Otherwise we would allocate memory just to initialize
|
||||
it the same all the time. UTF-8 is the preferred encoding so this is
|
||||
a worthwhile optimization. */
|
||||
static const bitset utf8_sb_map =
|
||||
static const bitset_t utf8_sb_map =
|
||||
{
|
||||
/* Set the first 128 bits. */
|
||||
# if UINT_MAX == 0xffffffff
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
# else
|
||||
# error "Add case for new unsigned int size"
|
||||
# endif
|
||||
[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -908,20 +906,17 @@ init_dfa (dfa, pat_len)
|
||||
{
|
||||
int i, j, ch;
|
||||
|
||||
dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
|
||||
dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
|
||||
if (BE (dfa->sb_char == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
/* Clear all bits by, then set those corresponding to single
|
||||
byte chars. */
|
||||
bitset_empty (dfa->sb_char);
|
||||
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
/* Set the bits corresponding to single byte chars. */
|
||||
for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
|
||||
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
|
||||
{
|
||||
wint_t wch = __btowc (ch);
|
||||
if (wch != WEOF)
|
||||
dfa->sb_char[i] |= 1u << j;
|
||||
dfa->sb_char[i] |= (bitset_word_t) 1 << j;
|
||||
# ifndef _LIBC
|
||||
if (isascii (ch) && wch != ch)
|
||||
dfa->map_notascii = 1;
|
||||
@ -946,10 +941,10 @@ init_word_char (dfa)
|
||||
{
|
||||
int i, j, ch;
|
||||
dfa->word_ops_used = 1;
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
|
||||
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
|
||||
if (isalnum (ch) || ch == '_')
|
||||
dfa->word_char[i] |= 1u << j;
|
||||
dfa->word_char[i] |= (bitset_word_t) 1 << j;
|
||||
}
|
||||
|
||||
/* Free the work area which are only used while compiling. */
|
||||
@ -1096,8 +1091,9 @@ optimize_utf8 (dfa)
|
||||
case COMPLEX_BRACKET:
|
||||
return;
|
||||
case SIMPLE_BRACKET:
|
||||
/* Just double check. */
|
||||
for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
|
||||
/* Just double check. The non-ASCII range starts at 0x80. */
|
||||
assert (0x80 % BITSET_WORD_BITS == 0);
|
||||
for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
|
||||
if (dfa->nodes[node].opr.sbcset[i])
|
||||
return;
|
||||
break;
|
||||
@ -1282,8 +1278,8 @@ optimize_subexps (extra, node)
|
||||
node->left->parent = node;
|
||||
|
||||
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
|
||||
if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
|
||||
dfa->used_bkref_map &= ~(1u << other_idx);
|
||||
if (other_idx < BITSET_WORD_BITS)
|
||||
dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
|
||||
}
|
||||
|
||||
return REG_NOERROR;
|
||||
@ -1331,8 +1327,9 @@ lower_subexp (err, preg, node)
|
||||
very common, so we do not lose much. An example that triggers
|
||||
this case is the sed "script" /\(\)/x. */
|
||||
&& node->left != NULL
|
||||
&& (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
|
||||
|| !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
|
||||
&& (node->token.opr.idx >= BITSET_WORD_BITS
|
||||
|| !(dfa->used_bkref_map
|
||||
& ((bitset_word_t) 1 << node->token.opr.idx))))
|
||||
return node->left;
|
||||
|
||||
/* Convert the SUBEXP node to the concatenation of an
|
||||
@ -2666,7 +2663,7 @@ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
|
||||
# else /* not RE_ENABLE_I18N */
|
||||
build_range_exp (sbcset, start_elem, end_elem)
|
||||
# endif /* not RE_ENABLE_I18N */
|
||||
re_bitset_ptr_t sbcset;
|
||||
bitset_t sbcset;
|
||||
bracket_elem_t *start_elem, *end_elem;
|
||||
{
|
||||
unsigned int start_ch, end_ch;
|
||||
@ -2788,7 +2785,7 @@ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
|
||||
# else /* not RE_ENABLE_I18N */
|
||||
build_collating_symbol (sbcset, name)
|
||||
# endif /* not RE_ENABLE_I18N */
|
||||
re_bitset_ptr_t sbcset;
|
||||
bitset_t sbcset;
|
||||
const unsigned char *name;
|
||||
{
|
||||
size_t name_len = strlen ((const char *) name);
|
||||
@ -2931,7 +2928,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
|
||||
re_charset_t *mbcset;
|
||||
int *range_alloc;
|
||||
re_bitset_ptr_t sbcset;
|
||||
bitset_t sbcset;
|
||||
bracket_elem_t *start_elem, *end_elem;
|
||||
{
|
||||
unsigned int ch;
|
||||
@ -3014,7 +3011,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
|
||||
re_charset_t *mbcset;
|
||||
int *coll_sym_alloc;
|
||||
re_bitset_ptr_t sbcset;
|
||||
bitset_t sbcset;
|
||||
const unsigned char *name;
|
||||
{
|
||||
int32_t elem, idx;
|
||||
@ -3099,7 +3096,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
_NL_COLLATE_SYMB_EXTRAMB);
|
||||
}
|
||||
#endif
|
||||
sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
|
||||
sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
@ -3309,12 +3306,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
||||
mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
|
||||
if (BE (mbc_tree == NULL, 0))
|
||||
goto parse_bracket_exp_espace;
|
||||
for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
|
||||
for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
|
||||
if (sbcset[sbc_idx])
|
||||
break;
|
||||
/* If there are no bits set in sbcset, there is no point
|
||||
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
|
||||
if (sbc_idx < BITSET_UINTS)
|
||||
if (sbc_idx < BITSET_WORDS)
|
||||
{
|
||||
/* Build a tree for simple bracket. */
|
||||
br_token.type = SIMPLE_BRACKET;
|
||||
@ -3464,7 +3461,7 @@ build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
|
||||
#else /* not RE_ENABLE_I18N */
|
||||
build_equiv_class (sbcset, name)
|
||||
#endif /* not RE_ENABLE_I18N */
|
||||
re_bitset_ptr_t sbcset;
|
||||
bitset_t sbcset;
|
||||
const unsigned char *name;
|
||||
{
|
||||
#if defined _LIBC
|
||||
@ -3560,7 +3557,7 @@ build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax)
|
||||
build_charclass (trans, sbcset, class_name, syntax)
|
||||
#endif /* not RE_ENABLE_I18N */
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
re_bitset_ptr_t sbcset;
|
||||
bitset_t sbcset;
|
||||
const unsigned char *class_name;
|
||||
reg_syntax_t syntax;
|
||||
{
|
||||
@ -3649,7 +3646,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
|
||||
re_token_t br_token;
|
||||
bin_tree_t *tree;
|
||||
|
||||
sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
|
||||
sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
|
@ -39,6 +39,9 @@
|
||||
#if defined HAVE_WCTYPE_H || defined _LIBC
|
||||
# include <wctype.h>
|
||||
#endif /* HAVE_WCTYPE_H || _LIBC */
|
||||
#if defined HAVE_STDBOOL_H || defined _LIBC
|
||||
# include <stdbool.h>
|
||||
#endif /* HAVE_STDBOOL_H || _LIBC */
|
||||
#if defined _LIBC
|
||||
# include <bits/libc-lock.h>
|
||||
#else
|
||||
@ -120,26 +123,31 @@
|
||||
extern const char __re_error_msgid[] attribute_hidden;
|
||||
extern const size_t __re_error_msgid_idx[] attribute_hidden;
|
||||
|
||||
/* Number of bits in an unsinged int. */
|
||||
#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT)
|
||||
/* Number of unsigned int in an bit_set. */
|
||||
#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
|
||||
typedef unsigned int bitset[BITSET_UINTS];
|
||||
typedef unsigned int *re_bitset_ptr_t;
|
||||
typedef const unsigned int *re_const_bitset_ptr_t;
|
||||
/* An integer used to represent a set of bits. It must be unsigned,
|
||||
and must be at least as wide as unsigned int. */
|
||||
typedef unsigned long int bitset_word_t;
|
||||
/* All bits set in a bitset_word_t. */
|
||||
#define BITSET_WORD_MAX ULONG_MAX
|
||||
/* Number of bits in a bitset_word_t. */
|
||||
#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
|
||||
/* Number of bitset_word_t in a bit_set. */
|
||||
#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
|
||||
typedef bitset_word_t bitset_t[BITSET_WORDS];
|
||||
typedef bitset_word_t *re_bitset_ptr_t;
|
||||
typedef const bitset_word_t *re_const_bitset_ptr_t;
|
||||
|
||||
#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS)
|
||||
#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS))
|
||||
#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS))
|
||||
#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
|
||||
#define bitset_set_all(set) \
|
||||
memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
|
||||
#define bitset_copy(dest,src) \
|
||||
memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
|
||||
static inline void bitset_not (bitset set);
|
||||
static inline void bitset_merge (bitset dest, const bitset src);
|
||||
static inline void bitset_not_merge (bitset dest, const bitset src);
|
||||
static inline void bitset_mask (bitset dest, const bitset src);
|
||||
#define bitset_set(set,i) \
|
||||
(set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
|
||||
#define bitset_clear(set,i) \
|
||||
(set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
|
||||
#define bitset_contain(set,i) \
|
||||
(set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
|
||||
#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
|
||||
#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
|
||||
#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
|
||||
static inline void bitset_not (bitset_t set);
|
||||
static inline void bitset_merge (bitset_t dest, const bitset_t src);
|
||||
static inline void bitset_mask (bitset_t dest, const bitset_t src);
|
||||
|
||||
#define PREV_WORD_CONSTRAINT 0x0001
|
||||
#define PREV_NOTWORD_CONSTRAINT 0x0002
|
||||
@ -648,8 +656,8 @@ struct re_dfa_t
|
||||
int nbackref; /* The number of backreference in this dfa. */
|
||||
|
||||
/* Bitmap expressing which backreference is used. */
|
||||
unsigned int used_bkref_map;
|
||||
unsigned int completed_bkref_map;
|
||||
bitset_word_t used_bkref_map;
|
||||
bitset_word_t completed_bkref_map;
|
||||
|
||||
unsigned int has_plural_match : 1;
|
||||
/* If this dfa has "multibyte node", which is a backreference or
|
||||
@ -660,7 +668,7 @@ struct re_dfa_t
|
||||
unsigned int map_notascii : 1;
|
||||
unsigned int word_ops_used : 1;
|
||||
int mb_cur_max;
|
||||
bitset word_char;
|
||||
bitset_t word_char;
|
||||
reg_syntax_t syntax;
|
||||
int *subexp_map;
|
||||
#ifdef DEBUG
|
||||
@ -734,34 +742,26 @@ typedef struct
|
||||
|
||||
/* Inline functions for bitset operation. */
|
||||
static inline void
|
||||
bitset_not (bitset set)
|
||||
bitset_not (bitset_t set)
|
||||
{
|
||||
int bitset_i;
|
||||
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
|
||||
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
|
||||
set[bitset_i] = ~set[bitset_i];
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_merge (bitset dest, const bitset src)
|
||||
bitset_merge (bitset_t dest, const bitset_t src)
|
||||
{
|
||||
int bitset_i;
|
||||
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
|
||||
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
|
||||
dest[bitset_i] |= src[bitset_i];
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_not_merge (bitset dest, const bitset src)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < BITSET_UINTS; ++i)
|
||||
dest[i] |= ~src[i];
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_mask (bitset dest, const bitset src)
|
||||
bitset_mask (bitset_t dest, const bitset_t src)
|
||||
{
|
||||
int bitset_i;
|
||||
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
|
||||
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
|
||||
dest[bitset_i] &= src[bitset_i];
|
||||
}
|
||||
|
||||
|
@ -213,7 +213,7 @@ static unsigned int find_collation_sequence_value (const unsigned char *mbs,
|
||||
static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
|
||||
const re_dfastate_t *state,
|
||||
re_node_set *states_node,
|
||||
bitset *states_ch) internal_function;
|
||||
bitset_t *states_ch) internal_function;
|
||||
static int check_node_accept (const re_match_context_t *mctx,
|
||||
const re_token_t *node, int idx)
|
||||
internal_function;
|
||||
@ -1980,9 +1980,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
|
||||
if (ent->node != node)
|
||||
continue;
|
||||
|
||||
if (subexp_idx
|
||||
< CHAR_BIT * sizeof ent->eps_reachable_subexps_map
|
||||
&& !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
|
||||
if (subexp_idx < BITSET_WORD_BITS
|
||||
&& !(ent->eps_reachable_subexps_map
|
||||
& ((bitset_word_t) 1 << subexp_idx)))
|
||||
continue;
|
||||
|
||||
/* Recurse trying to reach the OP_OPEN_SUBEXP and
|
||||
@ -2008,9 +2008,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
|
||||
if (cpos == 0 && (boundaries & 2))
|
||||
return 0;
|
||||
|
||||
if (subexp_idx
|
||||
< CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
|
||||
ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
|
||||
if (subexp_idx < BITSET_WORD_BITS)
|
||||
ent->eps_reachable_subexps_map
|
||||
&= ~((bitset_word_t) 1 << subexp_idx);
|
||||
}
|
||||
while (ent++->more);
|
||||
}
|
||||
@ -2477,8 +2477,9 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx)
|
||||
{
|
||||
int node = cur_nodes->elems[node_idx];
|
||||
if (dfa->nodes[node].type == OP_OPEN_SUBEXP
|
||||
&& dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
|
||||
&& dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
|
||||
&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
|
||||
&& (dfa->used_bkref_map
|
||||
& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
|
||||
{
|
||||
err = match_ctx_add_subtop (mctx, node, str_idx);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
@ -3363,31 +3364,37 @@ build_trtable (dfa, state)
|
||||
{
|
||||
reg_errcode_t err;
|
||||
int i, j, ch, need_word_trtable = 0;
|
||||
unsigned int elem, mask;
|
||||
int dests_node_malloced = 0, dest_states_malloced = 0;
|
||||
bitset_word_t elem, mask;
|
||||
bool dests_node_malloced = false;
|
||||
bool dest_states_malloced = false;
|
||||
int ndests; /* Number of the destination states from `state'. */
|
||||
re_dfastate_t **trtable;
|
||||
re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
|
||||
re_node_set follows, *dests_node;
|
||||
bitset *dests_ch;
|
||||
bitset acceptable;
|
||||
bitset_t *dests_ch;
|
||||
bitset_t acceptable;
|
||||
|
||||
struct dests_alloc
|
||||
{
|
||||
re_node_set dests_node[SBC_MAX];
|
||||
bitset_t dests_ch[SBC_MAX];
|
||||
} *dests_alloc;
|
||||
|
||||
/* We build DFA states which corresponds to the destination nodes
|
||||
from `state'. `dests_node[i]' represents the nodes which i-th
|
||||
destination state contains, and `dests_ch[i]' represents the
|
||||
characters which i-th destination state accepts. */
|
||||
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
|
||||
dests_node = (re_node_set *)
|
||||
alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
|
||||
if (__libc_use_alloca (sizeof (struct dests_alloc)))
|
||||
dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
|
||||
else
|
||||
{
|
||||
dests_node = (re_node_set *)
|
||||
malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
|
||||
if (BE (dests_node == NULL, 0))
|
||||
dests_alloc = re_malloc (struct dests_alloc, 1);
|
||||
if (BE (dests_alloc == NULL, 0))
|
||||
return 0;
|
||||
dests_node_malloced = 1;
|
||||
dests_node_malloced = true;
|
||||
}
|
||||
dests_ch = (bitset *) (dests_node + SBC_MAX);
|
||||
dests_node = dests_alloc->dests_node;
|
||||
dests_ch = dests_alloc->dests_ch;
|
||||
|
||||
/* Initialize transiton table. */
|
||||
state->word_trtable = state->trtable = NULL;
|
||||
@ -3398,7 +3405,7 @@ build_trtable (dfa, state)
|
||||
if (BE (ndests <= 0, 0))
|
||||
{
|
||||
if (dests_node_malloced)
|
||||
free (dests_node);
|
||||
free (dests_alloc);
|
||||
/* Return 0 in case of an error, 1 otherwise. */
|
||||
if (ndests == 0)
|
||||
{
|
||||
@ -3413,7 +3420,7 @@ build_trtable (dfa, state)
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
goto out_free;
|
||||
|
||||
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
|
||||
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
|
||||
+ ndests * 3 * sizeof (re_dfastate_t *)))
|
||||
dest_states = (re_dfastate_t **)
|
||||
alloca (ndests * 3 * sizeof (re_dfastate_t *));
|
||||
@ -3430,10 +3437,10 @@ out_free:
|
||||
for (i = 0; i < ndests; ++i)
|
||||
re_node_set_free (dests_node + i);
|
||||
if (dests_node_malloced)
|
||||
free (dests_node);
|
||||
free (dests_alloc);
|
||||
return 0;
|
||||
}
|
||||
dest_states_malloced = 1;
|
||||
dest_states_malloced = true;
|
||||
}
|
||||
dest_states_word = dest_states + ndests;
|
||||
dest_states_nl = dest_states_word + ndests;
|
||||
@ -3495,8 +3502,8 @@ out_free:
|
||||
goto out_free;
|
||||
|
||||
/* For all characters ch...: */
|
||||
for (i = 0; i < BITSET_UINTS; ++i)
|
||||
for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
|
||||
for (i = 0; i < BITSET_WORDS; ++i)
|
||||
for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
|
||||
elem;
|
||||
mask <<= 1, elem >>= 1, ++ch)
|
||||
if (BE (elem & 1, 0))
|
||||
@ -3526,8 +3533,8 @@ out_free:
|
||||
goto out_free;
|
||||
|
||||
/* For all characters ch...: */
|
||||
for (i = 0; i < BITSET_UINTS; ++i)
|
||||
for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
|
||||
for (i = 0; i < BITSET_WORDS; ++i)
|
||||
for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
|
||||
elem;
|
||||
mask <<= 1, elem >>= 1, ++ch)
|
||||
if (BE (elem & 1, 0))
|
||||
@ -3568,7 +3575,7 @@ out_free:
|
||||
re_node_set_free (dests_node + i);
|
||||
|
||||
if (dests_node_malloced)
|
||||
free (dests_node);
|
||||
free (dests_alloc);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -3583,13 +3590,13 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
const re_dfa_t *dfa;
|
||||
const re_dfastate_t *state;
|
||||
re_node_set *dests_node;
|
||||
bitset *dests_ch;
|
||||
bitset_t *dests_ch;
|
||||
{
|
||||
reg_errcode_t err;
|
||||
int result;
|
||||
int i, j, k;
|
||||
int ndests; /* Number of the destinations from `state'. */
|
||||
bitset accepts; /* Characters a node can accept. */
|
||||
bitset_t accepts; /* Characters a node can accept. */
|
||||
const re_node_set *cur_nodes = &state->nodes;
|
||||
bitset_empty (accepts);
|
||||
ndests = 0;
|
||||
@ -3624,7 +3631,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
#ifdef RE_ENABLE_I18N
|
||||
else if (type == OP_UTF8_PERIOD)
|
||||
{
|
||||
memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
|
||||
memset (accepts, '\xff', sizeof (bitset_t) / 2);
|
||||
if (!(dfa->syntax & RE_DOT_NEWLINE))
|
||||
bitset_clear (accepts, '\n');
|
||||
if (dfa->syntax & RE_DOT_NOT_NULL)
|
||||
@ -3640,7 +3647,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
{
|
||||
if (constraint & NEXT_NEWLINE_CONSTRAINT)
|
||||
{
|
||||
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
|
||||
bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
|
||||
bitset_empty (accepts);
|
||||
if (accepts_newline)
|
||||
bitset_set (accepts, NEWLINE_CHAR);
|
||||
@ -3655,7 +3662,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
|
||||
if (constraint & NEXT_WORD_CONSTRAINT)
|
||||
{
|
||||
unsigned int any_set = 0;
|
||||
bitset_word_t any_set = 0;
|
||||
if (type == CHARACTER && !node->word_char)
|
||||
{
|
||||
bitset_empty (accepts);
|
||||
@ -3663,18 +3670,18 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (dfa->mb_cur_max > 1)
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
for (j = 0; j < BITSET_WORDS; ++j)
|
||||
any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
|
||||
else
|
||||
#endif
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
for (j = 0; j < BITSET_WORDS; ++j)
|
||||
any_set |= (accepts[j] &= dfa->word_char[j]);
|
||||
if (!any_set)
|
||||
continue;
|
||||
}
|
||||
if (constraint & NEXT_NOTWORD_CONSTRAINT)
|
||||
{
|
||||
unsigned int any_set = 0;
|
||||
bitset_word_t any_set = 0;
|
||||
if (type == CHARACTER && node->word_char)
|
||||
{
|
||||
bitset_empty (accepts);
|
||||
@ -3682,11 +3689,11 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (dfa->mb_cur_max > 1)
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
for (j = 0; j < BITSET_WORDS; ++j)
|
||||
any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
|
||||
else
|
||||
#endif
|
||||
for (j = 0; j < BITSET_UINTS; ++j)
|
||||
for (j = 0; j < BITSET_WORDS; ++j)
|
||||
any_set |= (accepts[j] &= ~dfa->word_char[j]);
|
||||
if (!any_set)
|
||||
continue;
|
||||
@ -3697,10 +3704,10 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
state. Above, we make sure that accepts is not empty. */
|
||||
for (j = 0; j < ndests; ++j)
|
||||
{
|
||||
bitset intersec; /* Intersection sets, see below. */
|
||||
bitset remains;
|
||||
bitset_t intersec; /* Intersection sets, see below. */
|
||||
bitset_t remains;
|
||||
/* Flags, see below. */
|
||||
int has_intersec, not_subset, not_consumed;
|
||||
bitset_word_t has_intersec, not_subset, not_consumed;
|
||||
|
||||
/* Optimization, skip if this state doesn't accept the character. */
|
||||
if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
|
||||
@ -3708,7 +3715,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
|
||||
/* Enumerate the intersection set of this state and `accepts'. */
|
||||
has_intersec = 0;
|
||||
for (k = 0; k < BITSET_UINTS; ++k)
|
||||
for (k = 0; k < BITSET_WORDS; ++k)
|
||||
has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
|
||||
/* And skip if the intersection set is empty. */
|
||||
if (!has_intersec)
|
||||
@ -3716,7 +3723,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
|
||||
|
||||
/* Then check if this state is a subset of `accepts'. */
|
||||
not_subset = not_consumed = 0;
|
||||
for (k = 0; k < BITSET_UINTS; ++k)
|
||||
for (k = 0; k < BITSET_WORDS; ++k)
|
||||
{
|
||||
not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
|
||||
not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
|
||||
|
Loading…
Reference in New Issue
Block a user