regex: Unnest nested functions in regcomp.c

This refactor moves four functions out of a nested scope and converts
them into static always_inline functions. collseqwc, table_size,
symb_table, extra are now initialized to zero because they are passed as
function arguments.

On x86-64, .text is 16 byte larger likely due to the 4 stores.
This is nothing compared to the amount of work that regcomp has to do
looking up the collation weights, or other functions.

If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
this patch removes the last `auto inline` usage from glibc.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
This commit is contained in:
Fangrui Song 2021-11-02 10:07:59 -07:00
parent db432f033d
commit fdcd177fd3

View File

@ -2831,30 +2831,18 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
} }
#endif /* not _LIBC */ #endif /* not _LIBC */
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
static bin_tree_t *
parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
reg_syntax_t syntax, reg_errcode_t *err)
{
#ifdef _LIBC #ifdef _LIBC
const unsigned char *collseqmb; /* Local function for parse_bracket_exp used in _LIBC environment.
const char *collseqwc;
uint32_t nrules;
int32_t table_size;
const int32_t *symb_table;
const unsigned char *extra;
/* Local function for parse_bracket_exp used in _LIBC environment.
Seek the collating symbol entry corresponding to NAME. Seek the collating symbol entry corresponding to NAME.
Return the index of the symbol in the SYMB_TABLE, Return the index of the symbol in the SYMB_TABLE,
or -1 if not found. */ or -1 if not found. */
auto inline int32_t static inline int32_t
__attribute__ ((always_inline)) __attribute__ ((always_inline))
seek_collating_symbol_entry (const unsigned char *name, size_t name_len) seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
{ const int32_t *symb_table, int32_t table_size,
const unsigned char *extra)
{
int32_t elem; int32_t elem;
for (elem = 0; elem < table_size; elem++) for (elem = 0; elem < table_size; elem++)
@ -2871,21 +2859,23 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return elem; return elem;
} }
return -1; return -1;
} }
/* Local function for parse_bracket_exp used in _LIBC environment. /* Local function for parse_bracket_exp used in _LIBC environment.
Look up the collation sequence value of BR_ELEM. Look up the collation sequence value of BR_ELEM.
Return the value if succeeded, UINT_MAX otherwise. */ Return the value if succeeded, UINT_MAX otherwise. */
auto inline unsigned int static inline unsigned int
__attribute__ ((always_inline)) __attribute__ ((always_inline))
lookup_collation_sequence_value (bracket_elem_t *br_elem) lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
{ const unsigned char *collseqmb,
const char *collseqwc, int32_t table_size,
const int32_t *symb_table,
const unsigned char *extra)
{
if (br_elem->type == SB_CHAR) if (br_elem->type == SB_CHAR)
{ {
/* /* if (MB_CUR_MAX == 1) */
if (MB_CUR_MAX == 1)
*/
if (nrules == 0) if (nrules == 0)
return collseqmb[br_elem->opr.ch]; return collseqmb[br_elem->opr.ch];
else else
@ -2906,7 +2896,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
{ {
int32_t elem, idx; int32_t elem, idx;
elem = seek_collating_symbol_entry (br_elem->opr.name, elem = seek_collating_symbol_entry (br_elem->opr.name,
sym_name_len); sym_name_len,
symb_table, table_size,
extra);
if (elem != -1) if (elem != -1)
{ {
/* We found the entry. */ /* We found the entry. */
@ -2936,20 +2928,24 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return collseqmb[br_elem->opr.name[0]]; return collseqmb[br_elem->opr.name[0]];
} }
return UINT_MAX; return UINT_MAX;
} }
/* Local function for parse_bracket_exp used in _LIBC environment. /* Local function for parse_bracket_exp used in _LIBC environment.
Build the range expression which starts from START_ELEM, and ends Build the range expression which starts from START_ELEM, and ends
at END_ELEM. The result are written to MBCSET and SBCSET. at END_ELEM. The result are written to MBCSET and SBCSET.
RANGE_ALLOC is the allocated size of mbcset->range_starts, and RANGE_ALLOC is the allocated size of mbcset->range_starts, and
mbcset->range_ends, is a pointer argument since we may mbcset->range_ends, is a pointer argument since we may
update it. */ update it. */
auto inline reg_errcode_t static inline reg_errcode_t
__attribute__ ((always_inline)) __attribute__ ((always_inline))
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem) bracket_elem_t *start_elem, bracket_elem_t *end_elem,
{ re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
const unsigned char *collseqmb, const char *collseqwc,
int32_t table_size, const int32_t *symb_table,
const unsigned char *extra)
{
unsigned int ch; unsigned int ch;
uint32_t start_collseq; uint32_t start_collseq;
uint32_t end_collseq; uint32_t end_collseq;
@ -2963,8 +2959,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return REG_ERANGE; return REG_ERANGE;
/* FIXME: Implement rational ranges here, too. */ /* FIXME: Implement rational ranges here, too. */
start_collseq = lookup_collation_sequence_value (start_elem); start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
end_collseq = lookup_collation_sequence_value (end_elem); table_size, symb_table, extra);
end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
table_size, symb_table, extra);
/* Check start/end collation sequence values. */ /* Check start/end collation sequence values. */
if (__glibc_unlikely (start_collseq == UINT_MAX if (__glibc_unlikely (start_collseq == UINT_MAX
|| end_collseq == UINT_MAX)) || end_collseq == UINT_MAX))
@ -2985,7 +2983,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
/* There is not enough space, need realloc. */ /* There is not enough space, need realloc. */
uint32_t *new_array_start; uint32_t *new_array_start;
uint32_t *new_array_end; uint32_t *new_array_end;
Idx new_nranges; int new_nranges;
/* +1 in case of mbcset->nranges is 0. */ /* +1 in case of mbcset->nranges is 0. */
new_nranges = 2 * mbcset->nranges + 1; new_nranges = 2 * mbcset->nranges + 1;
@ -3011,9 +3009,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
for (ch = 0; ch < SBC_MAX; ch++) for (ch = 0; ch < SBC_MAX; ch++)
{ {
uint32_t ch_collseq; uint32_t ch_collseq;
/* /* if (MB_CUR_MAX == 1) */
if (MB_CUR_MAX == 1)
*/
if (nrules == 0) if (nrules == 0)
ch_collseq = collseqmb[ch]; ch_collseq = collseqmb[ch];
else else
@ -3022,24 +3018,27 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
bitset_set (sbcset, ch); bitset_set (sbcset, ch);
} }
return REG_NOERROR; return REG_NOERROR;
} }
/* Local function for parse_bracket_exp used in _LIBC environment. /* Local function for parse_bracket_exp used in _LIBC environment.
Build the collating element which is represented by NAME. Build the collating element which is represented by NAME.
The result are written to MBCSET and SBCSET. The result are written to MBCSET and SBCSET.
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
pointer argument since we may update it. */ pointer argument since we may update it. */
auto inline reg_errcode_t static inline reg_errcode_t
__attribute__ ((always_inline)) __attribute__ ((always_inline))
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Idx *coll_sym_alloc, const unsigned char *name) int *coll_sym_alloc, const unsigned char *name,
{ uint32_t nrules, int32_t table_size,
const int32_t *symb_table, const unsigned char *extra)
{
int32_t elem, idx; int32_t elem, idx;
size_t name_len = strlen ((const char *) name); size_t name_len = strlen ((const char *) name);
if (nrules != 0) if (nrules != 0)
{ {
elem = seek_collating_symbol_entry (name, name_len); elem = seek_collating_symbol_entry (name, name_len, symb_table,
table_size, extra);
if (elem != -1) if (elem != -1)
{ {
/* We found the entry. */ /* We found the entry. */
@ -3063,7 +3062,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
{ {
/* Not enough, realloc it. */ /* Not enough, realloc it. */
/* +1 in case of mbcset->ncoll_syms is 0. */ /* +1 in case of mbcset->ncoll_syms is 0. */
Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
/* Use realloc since mbcset->coll_syms is NULL /* Use realloc since mbcset->coll_syms is NULL
if *alloc == 0. */ if *alloc == 0. */
int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
@ -3086,7 +3085,23 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return REG_NOERROR; return REG_NOERROR;
} }
} }
} }
#endif /* _LIBC */
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
static bin_tree_t *
parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
reg_syntax_t syntax, reg_errcode_t *err)
{
#ifdef _LIBC
const unsigned char *collseqmb;
const char *collseqwc = NULL;
uint32_t nrules;
int32_t table_size = 0;
const int32_t *symb_table = NULL;
const unsigned char *extra = NULL;
#endif #endif
re_token_t br_token; re_token_t br_token;
@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef _LIBC #ifdef _LIBC
*err = build_range_exp (sbcset, mbcset, &range_alloc, *err = build_range_exp (sbcset, mbcset, &range_alloc,
&start_elem, &end_elem); &start_elem, &end_elem,
dfa, syntax, nrules, collseqmb, collseqwc,
table_size, symb_table, extra);
#else #else
# ifdef RE_ENABLE_I18N # ifdef RE_ENABLE_I18N
*err = build_range_exp (syntax, sbcset, *err = build_range_exp (syntax, sbcset,
@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
mbcset, &coll_sym_alloc, mbcset, &coll_sym_alloc,
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
start_elem.opr.name); start_elem.opr.name,
nrules, table_size, symb_table, extra);
if (__glibc_unlikely (*err != REG_NOERROR)) if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return; goto parse_bracket_exp_free_return;
break; break;