From 2d87db5b5341bd6b714f175c1c268b7136444a71 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 6 Sep 2005 21:15:13 +0000 Subject: [PATCH] * posix/regex_internal.h (re_sub_match_top_t): Remove unused member next_last_offset. (struct re_dfa_t): Remove unused member states_alloc. * posix/regcomp.c (init_dfa): Don't initialize unused members. 2005-08-25 Paul Eggert * posix/regexec.c (set_regs): Don't alloca with an unbounded size. alloca modernization/simplification for regex. * posix/regex.c: Remove portability cruft for alloca. This no longer needs to be at the start of the file, and can be moved into regex_internal.h and simplified. * posix/regex_internal.h: Include . (__libc_use_alloca) [!defined _LIBC]: New macro. * posix/regexec.c (build_trtable): Remove "#ifdef _LIBC", since the code now works outside glibc. 2005-09-06 Ulrich Drepper * include/regex.h: Remove use of _RE_ARGS. 2005-08-25 Paul Eggert * posix/regexec.c (find_recover_state): Change "err" to "*err". 2005-08-24 Paul Eggert * posix/regcomp.c (regerror): Pointer args are 'restrict', as per POSIX. * posix/regex.h (regerror): Likewise. * manual/pattern.texi (POSIX Regexp Compilation): Likewise. Similarly for regcomp and regexec. Also, first 2 args of regexec and 2nd arg of regerror are const. * posix/regex.c: Do not include , as POSIX no longer requires this. (The code never needed it.) 2005-08-20 Paul Eggert * posix/regexec.c (sift_states_bkref): re_node_set_insert returns int, not reg_errcode_t. * posix/regex_internal.c (calc_state_hash): Put 'inline' before type, since some broken compilers warn about it otherwise. * posix/regcomp.c (create_initial_state): Remove duplicate decl. 2005-08-20 Paul Eggert * posix/regex.h (_RE_ARGS): Remove. No longer needed, since we assume C89 or better. All uses removed. 2005-09-06 Ulrich Drepper * posix/regex.c: Prevent using C++ compilers. 2005-08-19 Paul Eggert * posix/regcomp.c (duplicate_node): Return new index, not an error code, and let the caller return REG_ESPACE if out of space. This removes an uninitialied-variable warning with GCC 4.0.1, and also avoids taking the address of a local variable. All callers changed. 2005-09-06 Ulrich Drepper * include/time.h (__strptime_internal): Rename parameter to avoid bogus compiler warning. 2005-08-19 Jim Meyering * posix/regexec.c (proceed_next_node): Redo local variables to avoid GCC shadowing warnings. 2005-09-06 Ulrich Drepper * posix/regex_internal.c (re_acquire_state): Minor code rearrangement. (re_acquire_state_context): Likewise. 2005-08-19 Paul Eggert * posix/regex_internal.c (re_string_realloc_buffers): (re_node_set_insert, re_node_set_insert_last, re_dfa_add_node): Rename local variables to avoid GCC shadowing warnings. 2005-07-08 Eric Blake Paul Eggert * posix/regcomp.c (init_dfa): Store __btowc value in wint_t, not wchar_t. Remove now-unnecessary cast. (build_range_exp): Likewise. --- ChangeLog | 95 ++++++++++++++++++++++++++++++++++++++++++ include/regex.h | 49 ++++++++++------------ include/time.h | 2 +- manual/pattern.texi | 6 +-- posix/regcomp.c | 74 ++++++++++++++++---------------- posix/regex.c | 31 ++------------ posix/regex.h | 83 ++++++++++++++---------------------- posix/regex_internal.c | 68 ++++++++++++++---------------- posix/regex_internal.h | 17 +++++++- posix/regexec.c | 45 ++++++++++++++------ 10 files changed, 273 insertions(+), 197 deletions(-) diff --git a/ChangeLog b/ChangeLog index ed6e786277..23e8cc9ae2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,98 @@ +2005-08-31 Paul Eggert + + * posix/regex_internal.h (re_sub_match_top_t): Remove unused member + next_last_offset. + (struct re_dfa_t): Remove unused member states_alloc. + * posix/regcomp.c (init_dfa): Don't initialize unused members. + +2005-08-25 Paul Eggert + + * posix/regexec.c (set_regs): Don't alloca with an unbounded size. + + alloca modernization/simplification for regex. + * posix/regex.c: Remove portability cruft for alloca. This no longer + needs to be at the start of the file, and can be moved into + regex_internal.h and simplified. + * posix/regex_internal.h: Include . + (__libc_use_alloca) [!defined _LIBC]: New macro. + * posix/regexec.c (build_trtable): Remove "#ifdef _LIBC", + since the code now works outside glibc. + +2005-09-06 Ulrich Drepper + + * include/regex.h: Remove use of _RE_ARGS. + +2005-08-25 Paul Eggert + + * posix/regexec.c (find_recover_state): Change "err" to "*err". + +2005-08-24 Paul Eggert + + * posix/regcomp.c (regerror): Pointer args are 'restrict', + as per POSIX. + * posix/regex.h (regerror): Likewise. + * manual/pattern.texi (POSIX Regexp Compilation): Likewise. + Similarly for regcomp and regexec. Also, first 2 args of regexec + and 2nd arg of regerror are const. + + * posix/regex.c: Do not include , as POSIX no longer + requires this. (The code never needed it.) + +2005-08-20 Paul Eggert + + * posix/regexec.c (sift_states_bkref): re_node_set_insert returns + int, not reg_errcode_t. + + * posix/regex_internal.c (calc_state_hash): Put 'inline' before type, + since some broken compilers warn about it otherwise. + + * posix/regcomp.c (create_initial_state): Remove duplicate decl. + +2005-08-20 Paul Eggert + + * posix/regex.h (_RE_ARGS): Remove. No longer needed, since we assume + C89 or better. All uses removed. + +2005-09-06 Ulrich Drepper + + * posix/regex.c: Prevent using C++ compilers. + +2005-08-19 Paul Eggert + + * posix/regcomp.c (duplicate_node): Return new index, not an error + code, and let the caller return REG_ESPACE if out of space. This + removes an uninitialied-variable warning with GCC 4.0.1, and also + avoids taking the address of a local variable. All callers + changed. + +2005-09-06 Ulrich Drepper + + * include/time.h (__strptime_internal): Rename parameter to avoid + bogus compiler warning. + +2005-08-19 Jim Meyering + + * posix/regexec.c (proceed_next_node): Redo local variables to + avoid GCC shadowing warnings. + +2005-09-06 Ulrich Drepper + + * posix/regex_internal.c (re_acquire_state): Minor code rearrangement. + (re_acquire_state_context): Likewise. + +2005-08-19 Paul Eggert + + * posix/regex_internal.c (re_string_realloc_buffers): + (re_node_set_insert, re_node_set_insert_last, re_dfa_add_node): + Rename local variables to avoid GCC shadowing warnings. + +2005-07-08 Eric Blake + Paul Eggert + + * posix/regcomp.c (init_dfa): Store __btowc value in wint_t, not + wchar_t. Remove now-unnecessary cast. + (build_range_exp): Likewise. + 2005-08-31 Bob Wilson * sysdeps/ieee754/flt-32/e_hypotf.c (__ieee754_hypotf): Remove the diff --git a/include/regex.h b/include/regex.h index 81789be897..a7606c5e0d 100644 --- a/include/regex.h +++ b/include/regex.h @@ -2,45 +2,42 @@ #include /* Document internal interfaces. */ -extern reg_syntax_t __re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t __re_set_syntax (reg_syntax_t syntax); -extern const char *__re_compile_pattern - _RE_ARGS ((const char *pattern, size_t length, - struct re_pattern_buffer *buffer)); +extern const char *__re_compile_pattern (const char *pattern, size_t length, + struct re_pattern_buffer *buffer); -extern int __re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int __re_compile_fastmap (struct re_pattern_buffer *buffer); -extern int __re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern int __re_search (struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, + struct re_registers *regs); extern int __re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); + (struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, int stop); extern int __re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); + (struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs); extern int __re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); + (struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop); extern void __re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); + (struct re_pattern_buffer *buffer, struct re_registers *regs, + unsigned num_regs, regoff_t *starts, regoff_t *ends); -extern int __regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern, - int __cflags)); +extern int __regcomp (regex_t *__preg, const char *__pattern, int __cflags); -extern int __regexec _RE_ARGS ((const regex_t *__preg, - const char *__string, size_t __nmatch, - regmatch_t __pmatch[], int __eflags)); +extern int __regexec (const regex_t *__preg, const char *__string, + size_t __nmatch, regmatch_t __pmatch[], int __eflags); -extern size_t __regerror _RE_ARGS ((int __errcode, const regex_t *__preg, - char *__errbuf, size_t __errbuf_size)); +extern size_t __regerror (int __errcode, const regex_t *__preg, + char *__errbuf, size_t __errbuf_size); -extern void __regfree _RE_ARGS ((regex_t *__preg)); +extern void __regfree (regex_t *__preg); #endif diff --git a/include/time.h b/include/time.h index 3d8fef8bf9..c2171ef449 100644 --- a/include/time.h +++ b/include/time.h @@ -94,7 +94,7 @@ enum ptime_locale_status { not, loc, raw }; extern char * __strptime_internal (const char *rp, const char *fmt, struct tm *tm, enum ptime_locale_status *decided, - int era_cnt, __locale_t loc) + int era_cnt, __locale_t locparam) internal_function; extern double __difftime (time_t time1, time_t time0); diff --git a/manual/pattern.texi b/manual/pattern.texi index 872fde05fb..c2a42cd843 100644 --- a/manual/pattern.texi +++ b/manual/pattern.texi @@ -721,7 +721,7 @@ expression into it by calling @code{regcomp}. @comment regex.h @comment POSIX.2 -@deftypefun int regcomp (regex_t *@var{compiled}, const char *@var{pattern}, int @var{cflags}) +@deftypefun int regcomp (regex_t *restrict @var{compiled}, const char *restrict @var{pattern}, int @var{cflags}) The function @code{regcomp} ``compiles'' a regular expression into a data structure that you can use with @code{regexec} to match against a string. The compiled regular expression format is designed for @@ -870,7 +870,7 @@ unless the regular expression contains anchor characters (@samp{^} or @comment regex.h @comment POSIX.2 -@deftypefun int regexec (regex_t *@var{compiled}, char *@var{string}, size_t @var{nmatch}, regmatch_t @var{matchptr} @t{[]}, int @var{eflags}) +@deftypefun int regexec (const regex_t *restrict @var{compiled}, const char *restrict @var{string}, size_t @var{nmatch}, regmatch_t @var{matchptr}[restrict], int @var{eflags}) This function tries to match the compiled regular expression @code{*@var{compiled}} against @var{string}. @@ -1049,7 +1049,7 @@ the function @code{regerror} to turn it into an error message string. @comment regex.h @comment POSIX.2 -@deftypefun size_t regerror (int @var{errcode}, regex_t *@var{compiled}, char *@var{buffer}, size_t @var{length}) +@deftypefun size_t regerror (int @var{errcode}, const regex_t *restrict @var{compiled}, char *restrict @var{buffer}, size_t @var{length}) This function produces an error message string for the error code @var{errcode}, and stores the string in @var{length} bytes of memory starting at @var{buffer}. For the @var{compiled} argument, supply the diff --git a/posix/regcomp.c b/posix/regcomp.c index 37e06797ac..d8205331d7 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -34,7 +34,6 @@ static reg_errcode_t create_initial_state (re_dfa_t *dfa); static void optimize_utf8 (re_dfa_t *dfa); #endif static reg_errcode_t analyze (regex_t *preg); -static reg_errcode_t create_initial_state (re_dfa_t *dfa); static reg_errcode_t preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), void *extra); @@ -51,8 +50,7 @@ static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, int root_node, unsigned int constraint); -static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, - unsigned int constraint); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); static int search_duplicated_node (re_dfa_t *dfa, int org_node, unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); @@ -536,8 +534,8 @@ weak_alias (__regcomp, regcomp) size_t regerror (errcode, preg, errbuf, errbuf_size) int errcode; - const regex_t *preg; - char *errbuf; + const regex_t *__restrict preg; + char *__restrict errbuf; size_t errbuf_size; { const char *msg; @@ -857,8 +855,6 @@ init_dfa (dfa, pat_len) dfa->nodes_alloc = pat_len + 1; dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); - dfa->states_alloc = pat_len + 1; - /* table_size = 2 ^ ceil(log pat_len) */ for (table_size = 1; table_size > 0; table_size <<= 1) if (table_size > pat_len) @@ -918,11 +914,11 @@ init_dfa (dfa, pat_len) for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) { - wchar_t wch = __btowc (ch); + wint_t wch = __btowc (ch); if (wch != WEOF) dfa->sb_char[i] |= 1 << j; # ifndef _LIBC - if (isascii (ch) && wch != (wchar_t) ch) + if (isascii (ch) && wch != ch) dfa->map_notascii = 1; # endif } @@ -1469,7 +1465,6 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, int top_org_node, top_clone_node, root_node; unsigned int init_constraint; { - reg_errcode_t err; int org_node, clone_node, ret; unsigned int constraint = init_constraint; for (org_node = top_org_node, clone_node = top_clone_node;;) @@ -1483,9 +1478,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, edests of the back reference. */ org_dest = dfa->nexts[org_node]; re_node_set_empty (dfa->edests + clone_node); - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; dfa->nexts[clone_node] = dfa->nexts[org_node]; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) @@ -1521,9 +1516,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, } constraint |= dfa->nodes[org_node].opr.ctx_type; } - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; @@ -1539,9 +1534,10 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, if (clone_dest == -1) { /* There are no such a duplicated node, create a new one. */ - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; @@ -1560,9 +1556,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, } org_dest = dfa->edests[org_node].elems[1]; - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); if (BE (ret < 0, 0)) return REG_ESPACE; @@ -1593,27 +1589,27 @@ search_duplicated_node (dfa, org_node, constraint) } /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. - The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, - otherwise return the error code. */ + Return the index of the new node, or -1 if insufficient storage is + available. */ -static reg_errcode_t -duplicate_node (new_idx, dfa, org_idx, constraint) +static int +duplicate_node (dfa, org_idx, constraint) re_dfa_t *dfa; - int *new_idx, org_idx; + int org_idx; unsigned int constraint; { int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); - if (BE (dup_idx == -1, 0)) - return REG_ESPACE; - dfa->nodes[dup_idx].constraint = constraint; - if (dfa->nodes[org_idx].type == ANCHOR) - dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; - dfa->nodes[dup_idx].duplicated = 1; + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + if (dfa->nodes[org_idx].type == ANCHOR) + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; + dfa->nodes[dup_idx].duplicated = 1; - /* Store the index of the original node. */ - dfa->org_indices[dup_idx] = org_idx; - *new_idx = dup_idx; - return REG_NOERROR; + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; } static reg_errcode_t @@ -2683,7 +2679,9 @@ build_range_exp (sbcset, start_elem, end_elem) # ifdef RE_ENABLE_I18N { - wchar_t wc, start_wc, end_wc; + wchar_t wc; + wint_t start_wc; + wint_t end_wc; wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch diff --git a/posix/regex.c b/posix/regex.c index 7a4f304cdd..d2d4f28e01 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002, 2003 Free Software Foundation, Inc. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -22,28 +22,9 @@ #include "config.h" #endif -#ifdef _AIX -#pragma alloca -#else -# ifndef allocax /* predefined by HP cc +Olibcalls */ -# ifdef __GNUC__ -# define alloca(size) __builtin_alloca (size) -# else -# if HAVE_ALLOCA_H -# include -# else -# ifdef __hpux - void *alloca (); -# else -# if !defined __OS2__ && !defined WIN32 - char *alloca (); -# else -# include /* OS/2 defines alloca in here */ -# endif -# endif -# endif -# endif -# endif +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" #endif #ifdef _LIBC @@ -71,10 +52,6 @@ # include "../locale/localeinfo.h" #endif -/* POSIX says that must be included (by the caller) before - . */ -#include - /* On some systems, limits.h sets RE_DUP_MAX to a lower value than GNU regex allows. Include it before , which correctly #undefs RE_DUP_MAX and sets it to the right value. */ diff --git a/posix/regex.h b/posix/regex.h index 1f73d23866..e5ec398106 100644 --- a/posix/regex.h +++ b/posix/regex.h @@ -1,6 +1,6 @@ /* Definitions for data structures and routines for the regular expression library. - Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003 + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -452,38 +452,21 @@ typedef struct /* Declarations for routines. */ -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -# define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -# define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, size_t length, - struct re_pattern_buffer *buffer)); +extern const char *re_compile_pattern (const char *pattern, size_t length, + struct re_pattern_buffer *buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int re_compile_fastmap (struct re_pattern_buffer *buffer); /* Search in the string STRING (with length LENGTH) for the pattern @@ -491,31 +474,29 @@ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern int re_search (struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, + struct re_registers *regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); +extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); +extern int re_match (struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); +extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -530,15 +511,15 @@ extern int re_match_2 Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); +extern void re_set_registers (struct re_pattern_buffer *buffer, + struct re_registers *regs, unsigned num_regs, + regoff_t *starts, regoff_t *ends); #if defined _REGEX_RE_COMP || defined _LIBC # ifndef _CRAY /* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); +extern char *re_comp (const char *); +extern int re_exec (const char *); # endif #endif @@ -563,19 +544,19 @@ extern int re_exec _RE_ARGS ((const char *)); #endif /* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *__restrict __preg, - const char *__restrict __pattern, - int __cflags)); +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); -extern int regexec _RE_ARGS ((const regex_t *__restrict __preg, - const char *__restrict __string, size_t __nmatch, - regmatch_t __pmatch[__restrict_arr], - int __eflags)); +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); -extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, - char *__errbuf, size_t __errbuf_size)); +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); -extern void regfree _RE_ARGS ((regex_t *__preg)); +extern void regfree (regex_t *__preg); #ifdef __cplusplus diff --git a/posix/regex_internal.c b/posix/regex_internal.c index baa58443ac..821ed7f45f 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -35,7 +35,7 @@ static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, unsigned int hash) internal_function; -static unsigned int inline calc_state_hash (const re_node_set *nodes, +static inline unsigned int calc_state_hash (const re_node_set *nodes, unsigned int context) internal_function; /* Functions for string operation. */ @@ -148,26 +148,26 @@ re_string_realloc_buffers (pstr, new_buf_len) #ifdef RE_ENABLE_I18N if (pstr->mb_cur_max > 1) { - wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); - if (BE (new_array == NULL, 0)) + wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) return REG_ESPACE; - pstr->wcs = new_array; + pstr->wcs = new_wcs; if (pstr->offsets != NULL) { - int *new_array = re_realloc (pstr->offsets, int, new_buf_len); - if (BE (new_array == NULL, 0)) + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) return REG_ESPACE; - pstr->offsets = new_array; + pstr->offsets = new_offsets; } } #endif /* RE_ENABLE_I18N */ if (pstr->mbs_allocated) { - unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, - new_buf_len); - if (BE (new_array == NULL, 0)) + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) return REG_ESPACE; - pstr->mbs = new_array; + pstr->mbs = new_mbs; } pstr->bufs_len = new_buf_len; return REG_NOERROR; @@ -1227,12 +1227,12 @@ re_node_set_insert (set, elem) /* Realloc if we need. */ if (set->alloc == set->nelem) { - int *new_array; + int *new_elems; set->alloc = set->alloc * 2; - new_array = re_realloc (set->elems, int, set->alloc); - if (BE (new_array == NULL, 0)) + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) return -1; - set->elems = new_array; + set->elems = new_elems; } /* Move the elements which follows the new element. Test the @@ -1267,12 +1267,12 @@ re_node_set_insert_last (set, elem) /* Realloc if we need. */ if (set->alloc == set->nelem) { - int *new_array; + int *new_elems; set->alloc = (set->alloc + 1) * 2; - new_array = re_realloc (set->elems, int, set->alloc); - if (BE (new_array == NULL, 0)) + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) return -1; - set->elems = new_array; + set->elems = new_elems; } /* Insert the new element. */ @@ -1349,11 +1349,11 @@ re_dfa_add_node (dfa, token) int *new_nexts, *new_indices; re_node_set *new_edests, *new_eclosures; - re_token_t *new_array = re_realloc (dfa->nodes, re_token_t, + re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); - if (BE (new_array == NULL, 0)) + if (BE (new_nodes == NULL, 0)) return -1; - dfa->nodes = new_array; + dfa->nodes = new_nodes; new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); @@ -1379,7 +1379,7 @@ re_dfa_add_node (dfa, token) return dfa->nodes_len++; } -static unsigned int inline +static inline unsigned int calc_state_hash (nodes, context) const re_node_set *nodes; unsigned int context; @@ -1429,13 +1429,10 @@ re_acquire_state (err, dfa, nodes) /* There are no appropriate state in the dfa, create the new one. */ new_state = create_ci_newstate (dfa, nodes, hash); - if (BE (new_state != NULL, 1)) - return new_state; - else - { - *err = REG_ESPACE; - return NULL; - } + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; } /* Search for the state whose node_set is equivalent to NODES and @@ -1477,13 +1474,10 @@ re_acquire_state_context (err, dfa, nodes, context) } /* There are no appropriate state in `dfa', create the new one. */ new_state = create_cd_newstate (dfa, nodes, context, hash); - if (BE (new_state != NULL, 1)) - return new_state; - else - { - *err = REG_ESPACE; - return NULL; - } + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; } /* Finish initialization of the new state NEWSTATE, and using its hash value diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 6db384416a..cdaa8fcfde 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -430,6 +430,21 @@ static unsigned char re_string_fetch_byte_case (re_string_t *pstr) #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) +#include + +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) #define re_free(p) free (p) @@ -541,7 +556,6 @@ typedef struct { int str_idx; int node; - int next_last_offset; state_array_t *path; int alasts; /* Allocation size of LASTS. */ int nlasts; /* The number of LASTS. */ @@ -632,7 +646,6 @@ struct re_dfa_t /* number of subexpressions `re_nsub' is in regex_t. */ unsigned int state_hash_mask; - int states_alloc; int init_node; int nbackref; /* The number of backreference in this dfa. */ diff --git a/posix/regexec.c b/posix/regexec.c index e635261d05..bdb2c4cf9e 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -1237,8 +1237,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) struct re_fail_stack_t *fs; { re_dfa_t *const dfa = mctx->dfa; - int i, err, dest_node; - dest_node = -1; + int i, err; if (IS_EPSILON_NODE (dfa->nodes[node].type)) { re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; @@ -1304,6 +1303,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) if (naccepted == 0) { + int dest_node; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -2; @@ -1317,7 +1317,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) if (naccepted != 0 || check_node_accept (mctx, dfa->nodes + node, *pidx)) { - dest_node = dfa->nexts[node]; + int dest_node = dfa->nexts[node]; *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, @@ -1395,6 +1395,7 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) struct re_fail_stack_t *fs; struct re_fail_stack_t fs_body = { 0, 2, NULL }; regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; #ifdef DEBUG assert (nmatch > 1); @@ -1413,7 +1414,18 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) cur_node = dfa->init_node; re_node_set_init_empty (&eps_via_nodes); - prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch); + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) @@ -1431,6 +1443,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) if (reg_idx == nmatch) { re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); return free_fail_stack_return (fs); } cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, @@ -1439,6 +1453,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) else { re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); return REG_NOERROR; } } @@ -1452,6 +1468,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) if (BE (cur_node == -2, 0)) { re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); free_fail_stack_return (fs); return REG_ESPACE; } @@ -1461,11 +1479,15 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) else { re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); return REG_NOMATCH; } } } re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); return free_fail_stack_return (fs); } @@ -2133,7 +2155,10 @@ sift_states_bkref (mctx, sctx, str_idx, candidates) enabled_idx = first_idx; do { - int subexp_len, to_idx, dst_node; + int subexp_len; + int to_idx; + int dst_node; + int ret; re_dfastate_t *cur_state; if (entry->node != node) @@ -2159,8 +2184,8 @@ sift_states_bkref (mctx, sctx, str_idx, candidates) } local_sctx.last_node = node; local_sctx.last_str_idx = str_idx; - err = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (err < 0, 0)) + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) { err = REG_ESPACE; goto free_return; @@ -2388,7 +2413,7 @@ find_recover_state (err, mctx) cur_state = merge_state_with_log (err, mctx, NULL); } - while (err == REG_NOERROR && cur_state == NULL); + while (*err == REG_NOERROR && cur_state == NULL); return cur_state; } @@ -3310,12 +3335,10 @@ build_trtable (dfa, state) from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ -#ifdef _LIBC if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) dests_node = (re_node_set *) alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); else -#endif { dests_node = (re_node_set *) malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); @@ -3349,13 +3372,11 @@ build_trtable (dfa, state) if (BE (err != REG_NOERROR, 0)) goto out_free; -#ifdef _LIBC if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX + ndests * 3 * sizeof (re_dfastate_t *))) dest_states = (re_dfastate_t **) alloca (ndests * 3 * sizeof (re_dfastate_t *)); else -#endif { dest_states = (re_dfastate_t **) malloc (ndests * 3 * sizeof (re_dfastate_t *));