mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 20:40:05 +00:00
Update.
2003-12-23 Paolo Bonzini <bonzini@gnu.org> * posix/regex_internal.c (re_dfa_add_node): Initialize opt_subexp. * posix/regex_internal.h (re_token_type_t): Put OP_DUP_PLUS among the tokens, rather than among the epsilon-transiting nodes. (re_token_t): Add the opt_subexp flag. * posix/regcomp.c (optimize_utf8, calc_first, calc_next, calc_epsdest): Don't consider OP_DUP_PLUS. (mark_opt_subexp, mark_opt_subexp_iter): New functions. (parse_dup_op): Mostly rewritten, lowering OP_DUP_PLUS to OP_DUP_ASTERISK and marking optional subexpressions as such using mark_opt_subexp. * posix/regexec.c (set_regs): Initialize PREV_INDEX_MATCH and pass it to update_regs. (update_regs): Use the PREV_INDEX_MATCH parameter, together with the opt_subexp flag, in order to discard a final empty match of a repeated subexpression. * posix/BOOST.tests: Adjust test vectors. * posix/PCRE.tests: Likewise. * posix/rxspencer/tests: Likewise. 2003-12-17 Paolo Bonzini <bonzini@gnu.org> 2003-12-16 Paolo Bonzini <bonzini@gnu.org> 2003-12-17 Paolo Bonzini <bonzini@gnu.org> 2003-12-16 Jakub Jelinek <jakub@redhat.com> 2003-04-06 Kaz Kojima <kkojima@rr.iij4u.or.jp> 2003-02-20 Paolo Bonzini <bonzini@gnu.org> 2003-01-12 Franz Sirl <Franz.Sirl-kernel@lauterbach.com> 2003-01-09 Richard Henderson <rth@redhat.com> 2003-01-09 Richard Henderson <rth@redhat.com> 2003-01-03 Paul Eggert <eggert@twinsun.com>
This commit is contained in:
parent
cb5b9388da
commit
6b6557e8b3
41
ChangeLog
41
ChangeLog
@ -1,3 +1,24 @@
|
||||
2003-12-23 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* posix/regex_internal.c (re_dfa_add_node): Initialize opt_subexp.
|
||||
* posix/regex_internal.h (re_token_type_t): Put OP_DUP_PLUS
|
||||
among the tokens, rather than among the epsilon-transiting nodes.
|
||||
(re_token_t): Add the opt_subexp flag.
|
||||
* posix/regcomp.c (optimize_utf8, calc_first,
|
||||
calc_next, calc_epsdest): Don't consider OP_DUP_PLUS.
|
||||
(mark_opt_subexp, mark_opt_subexp_iter): New functions.
|
||||
(parse_dup_op): Mostly rewritten, lowering OP_DUP_PLUS to
|
||||
OP_DUP_ASTERISK and marking optional subexpressions
|
||||
as such using mark_opt_subexp.
|
||||
* posix/regexec.c (set_regs): Initialize PREV_INDEX_MATCH
|
||||
and pass it to update_regs.
|
||||
(update_regs): Use the PREV_INDEX_MATCH parameter, together
|
||||
with the opt_subexp flag, in order to discard a final empty
|
||||
match of a repeated subexpression.
|
||||
* posix/BOOST.tests: Adjust test vectors.
|
||||
* posix/PCRE.tests: Likewise.
|
||||
* posix/rxspencer/tests: Likewise.
|
||||
|
||||
2000-05-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* sysdeps/i386/fpu/bits/mathinline.h (__expm1_code): Avoid using ?:
|
||||
@ -9,7 +30,7 @@
|
||||
* posix/tst-rxspencer.c: Likewise.
|
||||
Based on a patch by Alex Davis <alex14641@yahoo.com>.
|
||||
|
||||
2002-12-17 Paolo Bonzini <bonzini@gnu.org>
|
||||
2003-12-17 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* posix/regex_internal.h [!_LIBC] (internal_function): Define.
|
||||
(re_string_allocate, re_string_construct, re_string_reconstruct,
|
||||
@ -60,7 +81,7 @@
|
||||
(re_string_peek_byte_case, re_fetch_byte_case): Change
|
||||
declaration from ANSI to K&R.
|
||||
|
||||
2002-12-16 Paolo Bonzini <bonzini@gnu.org>
|
||||
2003-12-16 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* posix/regexec.c (build_trtable): Don't allocate the trtable
|
||||
until state->word_trtable is known. Don't hardcode UINT_BITS
|
||||
@ -172,7 +193,7 @@
|
||||
* sysdeps/unix/sysv/linux/powerpc/sys/procfs.h [!__PPC64_ELF_H]: Extend
|
||||
conditional to include typedef elf_vrreg_t.
|
||||
|
||||
2002-12-17 Paolo Bonzini <bonzini@gnu.org>
|
||||
2003-12-17 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* posix/regexec.c (re_search_internal): Limit search to the
|
||||
beginning of the buffer if the initial states are empty for
|
||||
@ -197,7 +218,7 @@
|
||||
* posix/regex_internal.h: Make sure the regex code compile
|
||||
with non-GCC compilers by hiding attributes.
|
||||
|
||||
2002-12-16 Jakub Jelinek <jakub@redhat.com>
|
||||
2003-12-16 Jakub Jelinek <jakub@redhat.com>
|
||||
Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* posix/regexec.c (group_nodes_into_DFAstates): Never produce
|
||||
@ -6084,7 +6105,7 @@
|
||||
invalid input if the -c option is used.
|
||||
(main): Correctly append IGNORE string for -c option.
|
||||
|
||||
2002-04-06 Kaz Kojima <kkojima@rr.iij4u.or.jp>
|
||||
2003-04-06 Kaz Kojima <kkojima@rr.iij4u.or.jp>
|
||||
|
||||
* sysdeps/sh/bits/atomic.h: Moved to ...
|
||||
* sysdeps/unix/sysv/linux/sh/bits/atomic.h: ... here. Add comments.
|
||||
@ -8037,7 +8058,7 @@
|
||||
* inet/rcmd.c (rresvport_af): Avoid using invliad values. Wrap
|
||||
around in search if port IPPORT_RESERVED/2 has been test.
|
||||
|
||||
2002-02-20 Paolo Bonzini <bonzini@gnu.org>
|
||||
2003-02-20 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
* posix/regcomp.c: Remove inclusions.
|
||||
* posix/regexec.c: Likewise.
|
||||
@ -9032,7 +9053,7 @@
|
||||
* io/ftwtest-sh: Add test for relative path argument to nftw()
|
||||
with FTW_CHDIR option.
|
||||
|
||||
2002-01-12 Franz Sirl <Franz.Sirl-kernel@lauterbach.com>
|
||||
2003-01-12 Franz Sirl <Franz.Sirl-kernel@lauterbach.com>
|
||||
|
||||
* sysdeps/unix/sysv/linux/kernel-features.h (__ASSUME_VFORK_SYSCALL):
|
||||
Define for powerpc.
|
||||
@ -9045,12 +9066,12 @@
|
||||
(INLINE_SYSCALL): Make use of INTERNAL_SYSCALL.
|
||||
* sysdeps/unix/sysv/linux/powerpc/powerpc32/vfork.S: New file.
|
||||
|
||||
2002-01-09 Richard Henderson <rth@redhat.com>
|
||||
2003-01-09 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* sysdeps/alpha/dl-machine.h (elf_machine_rela): Compute
|
||||
DTPREL64 and TPREL64 without loadbase applied.
|
||||
|
||||
2002-01-09 Richard Henderson <rth@redhat.com>
|
||||
2003-01-09 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* sysdeps/unix/alpha/sysdep.S: Use correct definition of errno
|
||||
for NOT_IN_libc.
|
||||
@ -9443,7 +9464,7 @@
|
||||
|
||||
* sysdeps/unix/sysv/linux/m68k/bits/stat.h: Add nanosecond fields.
|
||||
|
||||
2002-01-03 Paul Eggert <eggert@twinsun.com>
|
||||
2003-01-03 Paul Eggert <eggert@twinsun.com>
|
||||
|
||||
* malloc/obstack.h (__INT_TO_PTR) [__STDC__]: Cast result to
|
||||
(void *) to avoid diagnostic with native c89 on SGI IRIX 6.5
|
||||
|
@ -432,27 +432,22 @@ a(b|c){2,4}d abcbd 0 5 3 4
|
||||
a(b|c){2,4}d abcbcd 0 6 4 5
|
||||
a(b|c){2,}d abcd 0 4 2 3
|
||||
a(b|c){2,}d abcbd 0 5 3 4
|
||||
; perl only:
|
||||
a(b|c?)+d abcd 0 4 3 3
|
||||
a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
|
||||
a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
|
||||
; perl only: these conflict with the POSIX test below
|
||||
;a(b|c?)+d abcd 0 4 3 3
|
||||
;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
|
||||
;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
|
||||
|
||||
; posix only:
|
||||
- match_default extended REG_EXTENDED REG_STARTEND
|
||||
|
||||
; XXX FIXME the following 4 tests fail ATM
|
||||
;a(b|c?)+d abcd 0 4 2 3
|
||||
;a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
||||
;a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
|
||||
;a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
||||
|
||||
a(b|c?)+d abcd 0 4 2 3
|
||||
a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
||||
a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
|
||||
a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
|
||||
a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
|
||||
|
||||
; XXX FIXME the following 3 tests fail ATM
|
||||
;a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
|
||||
;a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
|
||||
;a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
|
||||
|
||||
a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
|
||||
a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
|
||||
a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
|
||||
a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
|
||||
|
||||
- match_default normal REG_PERL
|
||||
|
@ -1151,13 +1151,13 @@ No match
|
||||
/(abc|)+/
|
||||
abc
|
||||
0: abc
|
||||
1:
|
||||
1: abc
|
||||
abcabc
|
||||
0: abcabc
|
||||
1:
|
||||
1: abc
|
||||
abcabcabc
|
||||
0: abcabcabc
|
||||
1:
|
||||
1: abc
|
||||
xyz
|
||||
0:
|
||||
1:
|
||||
@ -1165,46 +1165,44 @@ No match
|
||||
/([a]*)*/
|
||||
a
|
||||
0: a
|
||||
1:
|
||||
1: a
|
||||
aaaaa
|
||||
0: aaaaa
|
||||
1:
|
||||
1: aaaaa
|
||||
|
||||
/([ab]*)*/
|
||||
a
|
||||
0: a
|
||||
1:
|
||||
1: a
|
||||
b
|
||||
0: b
|
||||
1:
|
||||
1: b
|
||||
ababab
|
||||
0: ababab
|
||||
1:
|
||||
1: ababab
|
||||
aaaabcde
|
||||
0: aaaab
|
||||
1:
|
||||
1: aaaab
|
||||
bbbb
|
||||
0: bbbb
|
||||
1:
|
||||
1: bbbb
|
||||
|
||||
/([^a]*)*/
|
||||
b
|
||||
0: b
|
||||
1:
|
||||
1: b
|
||||
bbbb
|
||||
0: bbbb
|
||||
1:
|
||||
1: bbbb
|
||||
aaa
|
||||
0:
|
||||
1:
|
||||
|
||||
/([^ab]*)*/
|
||||
cccc
|
||||
0: cccc
|
||||
1:
|
||||
1: cccc
|
||||
abab
|
||||
0:
|
||||
1:
|
||||
|
||||
/abc/
|
||||
abc
|
||||
|
249
posix/regcomp.c
249
posix/regcomp.c
@ -135,6 +135,8 @@ static bin_tree_t *re_dfa_add_tree_node (re_dfa_t *dfa,
|
||||
const re_token_t *token)
|
||||
__attribute ((noinline));
|
||||
static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
|
||||
static void mark_opt_subexp (const bin_tree_t *src, re_dfa_t *dfa);
|
||||
static void mark_opt_subexp_iter (const bin_tree_t *src, re_dfa_t *dfa, int idx);
|
||||
|
||||
/* This table gives an error message for each of the error codes listed
|
||||
in regex.h. Obviously the order here has to be same as there.
|
||||
@ -1031,7 +1033,6 @@ optimize_utf8 (dfa)
|
||||
case END_OF_RE:
|
||||
case OP_DUP_ASTERISK:
|
||||
case OP_DUP_QUESTION:
|
||||
case OP_DUP_PLUS:
|
||||
case OP_OPEN_SUBEXP:
|
||||
case OP_CLOSE_SUBEXP:
|
||||
break;
|
||||
@ -1150,6 +1151,7 @@ calc_first (dfa, node)
|
||||
case OP_CLOSE_BRACKET:
|
||||
case OP_OPEN_DUP_NUM:
|
||||
case OP_CLOSE_DUP_NUM:
|
||||
case OP_DUP_PLUS:
|
||||
case OP_NON_MATCH_LIST:
|
||||
case OP_OPEN_COLL_ELEM:
|
||||
case OP_CLOSE_COLL_ELEM:
|
||||
@ -1176,14 +1178,6 @@ calc_first (dfa, node)
|
||||
case OP_CLOSE_SUBEXP:
|
||||
node->first = idx;
|
||||
break;
|
||||
case OP_DUP_PLUS:
|
||||
#ifdef DEBUG
|
||||
assert (node->left != NULL);
|
||||
#endif
|
||||
if (node->left->first == -1)
|
||||
calc_first (dfa, node->left);
|
||||
node->first = node->left->first;
|
||||
break;
|
||||
case OP_ALT:
|
||||
node->first = idx;
|
||||
break;
|
||||
@ -1223,7 +1217,6 @@ calc_next (dfa, node)
|
||||
switch (type)
|
||||
{
|
||||
case OP_DUP_ASTERISK:
|
||||
case OP_DUP_PLUS:
|
||||
node->next = idx;
|
||||
break;
|
||||
case CONCAT:
|
||||
@ -1258,7 +1251,6 @@ calc_epsdest (dfa, node)
|
||||
if (node->type == 0)
|
||||
{
|
||||
if (dfa->nodes[idx].type == OP_DUP_ASTERISK
|
||||
|| dfa->nodes[idx].type == OP_DUP_PLUS
|
||||
|| dfa->nodes[idx].type == OP_DUP_QUESTION)
|
||||
{
|
||||
if (node->left->first == -1)
|
||||
@ -2377,8 +2369,8 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
|
||||
/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
|
||||
|
||||
static bin_tree_t *
|
||||
parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
||||
bin_tree_t *dup_elem;
|
||||
parse_dup_op (elem, regexp, dfa, token, syntax, err)
|
||||
bin_tree_t *elem;
|
||||
re_string_t *regexp;
|
||||
re_dfa_t *dfa;
|
||||
re_token_t *token;
|
||||
@ -2386,15 +2378,14 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
||||
reg_errcode_t *err;
|
||||
{
|
||||
re_token_t dup_token;
|
||||
bin_tree_t *tree = dup_elem, *work_tree;
|
||||
int start_idx = re_string_cur_idx (regexp);
|
||||
bin_tree_t *tree = NULL;
|
||||
int i, start, end, start_idx = re_string_cur_idx (regexp);
|
||||
re_token_t start_token = *token;
|
||||
|
||||
if (token->type == OP_OPEN_DUP_NUM)
|
||||
{
|
||||
int i;
|
||||
int end = 0;
|
||||
int start = fetch_number (regexp, token, syntax);
|
||||
bin_tree_t *elem;
|
||||
end = 0;
|
||||
start = fetch_number (regexp, token, syntax);
|
||||
if (start == -1)
|
||||
{
|
||||
if (token->type == CHARACTER && token->opr.c == ',')
|
||||
@ -2415,123 +2406,104 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
||||
if (BE (start == -2 || end == -2, 0))
|
||||
{
|
||||
/* Invalid sequence. */
|
||||
if (token->type == OP_CLOSE_DUP_NUM)
|
||||
goto parse_dup_op_invalid_interval;
|
||||
else
|
||||
goto parse_dup_op_ebrace;
|
||||
}
|
||||
if (BE ((start == 0 && end == 0) || tree == NULL, 0))
|
||||
{
|
||||
/* We treat "<re>{0}" and "<re>{0,0}" as null string.
|
||||
Similarly "<re>{0}{m,n}". */
|
||||
fetch_token (token, regexp, syntax);
|
||||
return NULL;
|
||||
if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
|
||||
{
|
||||
if (token->type == END_OF_RE)
|
||||
*err = REG_EBRACE;
|
||||
else
|
||||
*err = REG_BADBR;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If the syntax bit is set, rollback. */
|
||||
re_string_set_index (regexp, start_idx);
|
||||
*token = start_token;
|
||||
token->type = CHARACTER;
|
||||
/* mb_partial and word_char bits should be already initialized by
|
||||
peek_token. */
|
||||
return elem;
|
||||
}
|
||||
|
||||
/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
|
||||
elem = tree;
|
||||
for (i = 1; i < start; ++i)
|
||||
if (BE (end != -1 && start > end, 0))
|
||||
{
|
||||
work_tree = duplicate_tree (elem, dfa);
|
||||
tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
|
||||
if (BE (work_tree == NULL || tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
}
|
||||
|
||||
if (end == -1)
|
||||
{
|
||||
/* We treat "<re>{0,}" as "<re>*". */
|
||||
dup_token.type = OP_DUP_ASTERISK;
|
||||
if (start > 0)
|
||||
{
|
||||
elem = duplicate_tree (elem, dfa);
|
||||
work_tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
|
||||
tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
|
||||
if (BE (elem == NULL || work_tree == NULL || tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
}
|
||||
else
|
||||
{
|
||||
tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
|
||||
if (BE (tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
}
|
||||
}
|
||||
else if (BE (start > end, 0))
|
||||
{
|
||||
/* First number greater than first. */
|
||||
/* First number greater than second. */
|
||||
*err = REG_BADBR;
|
||||
return NULL;
|
||||
}
|
||||
else if (end - start > 0)
|
||||
{
|
||||
/* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?". */
|
||||
dup_token.type = OP_DUP_QUESTION;
|
||||
if (start > 0)
|
||||
{
|
||||
elem = duplicate_tree (elem, dfa);
|
||||
elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
|
||||
tree = create_tree (dfa, tree, elem, CONCAT, 0);
|
||||
if (BE (elem == NULL || tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
}
|
||||
else
|
||||
{
|
||||
tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
|
||||
if (BE (tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
}
|
||||
for (i = 1; i < end - start; ++i)
|
||||
{
|
||||
work_tree = duplicate_tree (elem, dfa);
|
||||
tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
|
||||
if (BE (work_tree == NULL || tree == NULL, 0))
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Treat "<re>{0}*" etc. as "<re>{0}". */
|
||||
else if (tree == NULL)
|
||||
;
|
||||
else
|
||||
{
|
||||
tree = re_dfa_add_tree_node (dfa, tree, NULL, token);
|
||||
if (BE (tree == NULL, 0))
|
||||
start = (token->type == OP_DUP_PLUS) ? 1 : 0;
|
||||
end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
|
||||
}
|
||||
|
||||
/* Treat "<re>{0}*" etc. as "<re>{0}". */
|
||||
if (BE (elem == NULL, 0))
|
||||
start = end = 0;
|
||||
|
||||
/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
|
||||
else if (BE (start > 0, 0))
|
||||
{
|
||||
tree = elem;
|
||||
for (i = 2; i <= start; ++i)
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
elem = duplicate_tree (elem, dfa);
|
||||
tree = create_tree (dfa, tree, elem, CONCAT, 0);
|
||||
if (BE (elem == NULL || tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
}
|
||||
}
|
||||
|
||||
if (BE (end != start, 1))
|
||||
{
|
||||
dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
|
||||
if (BE (start > 0, 0))
|
||||
{
|
||||
elem = duplicate_tree (elem, dfa);
|
||||
if (BE (elem == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
|
||||
/* This subexpression will be marked as optional, so that
|
||||
empty matches do not touch the registers. */
|
||||
mark_opt_subexp (elem, dfa);
|
||||
|
||||
/* Prepare the tree with the modifier. */
|
||||
elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
|
||||
tree = create_tree (dfa, tree, elem, CONCAT, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We do not need to duplicate the tree because we have not
|
||||
created it yet. */
|
||||
mark_opt_subexp (elem, dfa);
|
||||
tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
|
||||
}
|
||||
|
||||
if (BE (elem == NULL || tree == NULL, 0))
|
||||
goto parse_dup_op_espace;
|
||||
|
||||
/* This loop is actually executed only when end != -1,
|
||||
to rewrite <re>{0,n} as <re>?<re>?<re>?... We have
|
||||
already created the start+1-th copy. */
|
||||
for (i = start + 2; i <= end; ++i)
|
||||
{
|
||||
elem = duplicate_tree (elem, dfa);
|
||||
tree = create_tree (dfa, tree, elem, CONCAT, 0);
|
||||
if (BE (elem == NULL || tree == NULL, 0))
|
||||
{
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fetch_token (token, regexp, syntax);
|
||||
return tree;
|
||||
|
||||
parse_dup_op_espace:
|
||||
*err = REG_ESPACE;
|
||||
return NULL;
|
||||
|
||||
parse_dup_op_ebrace:
|
||||
if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
|
||||
{
|
||||
*err = REG_EBRACE;
|
||||
return NULL;
|
||||
}
|
||||
goto parse_dup_op_rollback;
|
||||
parse_dup_op_invalid_interval:
|
||||
if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
|
||||
{
|
||||
*err = REG_BADBR;
|
||||
return NULL;
|
||||
}
|
||||
parse_dup_op_rollback:
|
||||
re_string_set_index (regexp, start_idx);
|
||||
*token = start_token;
|
||||
token->type = CHARACTER;
|
||||
/* mb_partial and word_char bits should be already initialized by
|
||||
peek_token. */
|
||||
return dup_elem;
|
||||
}
|
||||
|
||||
/* Size of the names for collating symbol/equivalence_class/character_class.
|
||||
@ -3737,6 +3709,47 @@ re_dfa_add_tree_node (dfa, left, right, token)
|
||||
return create_tree (dfa, left, right, 0, new_idx);
|
||||
}
|
||||
|
||||
/* Mark the tree SRC as an optional subexpression. */
|
||||
|
||||
static void
|
||||
mark_opt_subexp (src, dfa)
|
||||
const bin_tree_t *src;
|
||||
re_dfa_t *dfa;
|
||||
{
|
||||
/* Pass an OPT_SUBEXP_IDX which is != 1 if the duplicated tree is
|
||||
a subexpression. */
|
||||
if (src->type == CONCAT
|
||||
&& src->left->type == NON_TYPE
|
||||
&& dfa->nodes[src->left->node_idx].type == OP_OPEN_SUBEXP)
|
||||
mark_opt_subexp_iter (src, dfa, dfa->nodes[src->left->node_idx].opr.idx);
|
||||
}
|
||||
|
||||
|
||||
/* Recursive tree walker for mark_opt_subexp. */
|
||||
|
||||
static void
|
||||
mark_opt_subexp_iter (src, dfa, idx)
|
||||
const bin_tree_t *src;
|
||||
re_dfa_t *dfa;
|
||||
{
|
||||
int node_idx;
|
||||
|
||||
if (src->type == NON_TYPE)
|
||||
{
|
||||
node_idx = src->node_idx;
|
||||
if ((dfa->nodes[node_idx].type == OP_OPEN_SUBEXP
|
||||
|| dfa->nodes[node_idx].type == OP_CLOSE_SUBEXP)
|
||||
&& dfa->nodes[node_idx].opr.idx == idx)
|
||||
dfa->nodes[node_idx].opt_subexp = 1;
|
||||
}
|
||||
|
||||
if (src->left != NULL)
|
||||
mark_opt_subexp_iter (src->left, dfa, idx);
|
||||
|
||||
if (src->right != NULL)
|
||||
mark_opt_subexp_iter (src->right, dfa, idx);
|
||||
}
|
||||
|
||||
|
||||
/* Duplicate the node SRC, and return new node. */
|
||||
|
||||
|
@ -1306,6 +1306,7 @@ re_dfa_add_node (dfa, token, mode)
|
||||
dfa->nodes_alloc = new_nodes_alloc;
|
||||
}
|
||||
dfa->nodes[dfa->nodes_len] = token;
|
||||
dfa->nodes[dfa->nodes_len].opt_subexp = 0;
|
||||
dfa->nodes[dfa->nodes_len].duplicated = 0;
|
||||
dfa->nodes[dfa->nodes_len].constraint = 0;
|
||||
return dfa->nodes_len++;
|
||||
|
@ -281,6 +281,7 @@ typedef struct
|
||||
#endif
|
||||
unsigned int constraint : 10; /* context constraint */
|
||||
unsigned int duplicated : 1;
|
||||
unsigned int opt_subexp : 1;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* These 2 bits can be moved into the union if needed (e.g. if running out
|
||||
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
|
||||
|
@ -62,7 +62,8 @@ static int check_halt_node_context (const re_dfa_t *dfa, int node,
|
||||
static int check_halt_state_context (const regex_t *preg,
|
||||
const re_dfastate_t *state,
|
||||
const re_match_context_t *mctx, int idx) internal_function;
|
||||
static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
|
||||
static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
|
||||
regmatch_t *prev_idx_match, int cur_node,
|
||||
int cur_idx, int nmatch) internal_function;
|
||||
static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
|
||||
const re_match_context_t *mctx,
|
||||
@ -1277,11 +1278,13 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
|
||||
regmatch_t *pmatch;
|
||||
int fl_backtrack;
|
||||
{
|
||||
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||
int idx, cur_node, real_nmatch;
|
||||
re_node_set eps_via_nodes;
|
||||
struct re_fail_stack_t *fs;
|
||||
struct re_fail_stack_t fs_body = {0, 2, NULL};
|
||||
struct re_fail_stack_t fs_body = { 0, 2, NULL };
|
||||
regmatch_t *prev_idx_match;
|
||||
|
||||
#ifdef DEBUG
|
||||
assert (nmatch > 1);
|
||||
assert (mctx->state_log != NULL);
|
||||
@ -1290,15 +1293,23 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
|
||||
{
|
||||
fs = &fs_body;
|
||||
fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
|
||||
if (fs->stack == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
else
|
||||
fs = NULL;
|
||||
|
||||
cur_node = dfa->init_node;
|
||||
real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
|
||||
re_node_set_init_empty (&eps_via_nodes);
|
||||
|
||||
prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
|
||||
memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
|
||||
|
||||
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
|
||||
{
|
||||
update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
|
||||
update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
|
||||
|
||||
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
|
||||
{
|
||||
int reg_idx;
|
||||
@ -1362,31 +1373,55 @@ free_fail_stack_return (fs)
|
||||
}
|
||||
|
||||
static void
|
||||
update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
|
||||
update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
|
||||
re_dfa_t *dfa;
|
||||
regmatch_t *pmatch;
|
||||
regmatch_t *pmatch, *prev_idx_match;
|
||||
int cur_node, cur_idx, nmatch;
|
||||
{
|
||||
int type = dfa->nodes[cur_node].type;
|
||||
int reg_num;
|
||||
if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
|
||||
return;
|
||||
reg_num = dfa->nodes[cur_node].opr.idx + 1;
|
||||
if (reg_num >= nmatch)
|
||||
return;
|
||||
if (type == OP_OPEN_SUBEXP)
|
||||
{
|
||||
int reg_num = dfa->nodes[cur_node].opr.idx + 1;
|
||||
|
||||
/* We are at the first node of this sub expression. */
|
||||
pmatch[reg_num].rm_so = cur_idx;
|
||||
pmatch[reg_num].rm_eo = -1;
|
||||
if (reg_num < nmatch)
|
||||
{
|
||||
pmatch[reg_num].rm_so = cur_idx;
|
||||
pmatch[reg_num].rm_eo = -1;
|
||||
}
|
||||
}
|
||||
else if (type == OP_CLOSE_SUBEXP)
|
||||
/* We are at the first node of this sub expression. */
|
||||
pmatch[reg_num].rm_eo = cur_idx;
|
||||
{
|
||||
int reg_num = dfa->nodes[cur_node].opr.idx + 1;
|
||||
if (reg_num < nmatch)
|
||||
{
|
||||
/* We are at the last node of this sub expression. */
|
||||
if (pmatch[reg_num].rm_so < cur_idx)
|
||||
{
|
||||
pmatch[reg_num].rm_eo = cur_idx;
|
||||
/* This is a non-empty match or we are not inside an optional
|
||||
subexpression. Accept this right away. */
|
||||
memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dfa->nodes[cur_node].opt_subexp
|
||||
&& prev_idx_match[reg_num].rm_so != -1)
|
||||
/* We transited through an empty match for an optional
|
||||
subexpression, like (a?)*, and this is not the subexp's
|
||||
first match. Copy back the old content of the registers
|
||||
so that matches of an inner subexpression are undone as
|
||||
well, like in ((a?))*. */
|
||||
memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
|
||||
else
|
||||
/* We completed a subexpression, but it may be part of
|
||||
an optional one, so do not update PREV_IDX_MATCH. */
|
||||
pmatch[reg_num].rm_eo = cur_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define NUMBER_OF_STATE 1
|
||||
|
||||
/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
|
||||
and sift the nodes in each states according to the following rules.
|
||||
Updated state_log will be wrote to STATE_LOG.
|
||||
@ -3279,7 +3314,7 @@ out_free:
|
||||
character ch. See group_nodes_into_DFAstates. */
|
||||
for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
|
||||
;
|
||||
|
||||
|
||||
/* j-th destination accepts the word character ch. */
|
||||
if (IS_WORD_CHAR (ch))
|
||||
trtable[ch] = dest_states_word[j];
|
||||
@ -3298,7 +3333,7 @@ out_free:
|
||||
2 * SBC_MAX);
|
||||
if (BE (trtable == NULL, 0))
|
||||
goto out_free;
|
||||
|
||||
|
||||
/* For all characters ch...: */
|
||||
for (i = 0; i < BITSET_UINTS; ++i)
|
||||
for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
|
||||
@ -3310,13 +3345,13 @@ out_free:
|
||||
character ch. See group_nodes_into_DFAstates. */
|
||||
for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
|
||||
;
|
||||
|
||||
|
||||
/* j-th destination accepts the word character ch. */
|
||||
trtable[ch] = dest_states[j];
|
||||
trtable[ch + SBC_MAX] = dest_states_word[j];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* new line */
|
||||
if (bitset_contain (acceptable, NEWLINE_CHAR))
|
||||
{
|
||||
|
@ -431,7 +431,7 @@ a(b|c)*d - abcd abcd c
|
||||
a(b|c)+d - abd abd b
|
||||
a(b|c)+d - abcd abcd c
|
||||
a(b|c?)+d - ad ad @d
|
||||
a(b|c?)+d - abcd abcd @d
|
||||
a(b|c?)+d - abcd abcd c
|
||||
a(b|c){0,0}d - ad ad -
|
||||
a(b|c){0,1}d - ad ad -
|
||||
a(b|c){0,1}d - abd abd b
|
||||
@ -452,9 +452,8 @@ a(b|c){2,4}d - abcbd abcbd b
|
||||
a(b|c){2,4}d - abcbcd abcbcd c
|
||||
a(b|c){2,}d - abcd abcd c
|
||||
a(b|c){2,}d - abcbd abcbd b
|
||||
a(b+|((c)*))+d - abd abd @d,@d,-
|
||||
# XXX Needs to be checked.
|
||||
#a(b+|((c)*))+d - abcd abcd @d,@d,-
|
||||
a(b+|((c)*))+d - abd abd b,-,-
|
||||
a(b+|((c)*))+d - abcd abcd c,c,c
|
||||
|
||||
# check out the STARTEND option
|
||||
[abc] &# a(b)c b
|
||||
|
Loading…
Reference in New Issue
Block a user