ICU-11369 Regex, fix incorrect optimization of patterns with a zero length quantifier {0}
X-SVN-Rev: 36727
This commit is contained in:
parent
9db433b745
commit
f5d14979c6
@ -2339,7 +2339,15 @@ UBool RegexCompile::compileInlineInterval() {
|
||||
int32_t topOfBlock = blockTopLoc(FALSE);
|
||||
if (fIntervalUpper == 0) {
|
||||
// Pathological case. Attempt no matches, as if the block doesn't exist.
|
||||
// Discard the generated code for the block.
|
||||
// If the block included parens, discard the info pertaining to them as well.
|
||||
fRXPat->fCompiledPat->setSize(topOfBlock);
|
||||
if (fMatchOpenParen >= topOfBlock) {
|
||||
fMatchOpenParen = -1;
|
||||
}
|
||||
if (fMatchCloseParen >= topOfBlock) {
|
||||
fMatchCloseParen = -1;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,9 @@ private:
|
||||
int32_t fMatchOpenParen; // The position in the compiled pattern
|
||||
// of the slot reserved for a state save
|
||||
// at the start of the most recently processed
|
||||
// parenthesized block.
|
||||
// parenthesized block. Updated when processing
|
||||
// a close to the location for the corresponding open.
|
||||
|
||||
int32_t fMatchCloseParen; // The position in the pattern of the first
|
||||
// location after the most recently processed
|
||||
// parenthesized block.
|
||||
|
10
icu4c/source/test/testdata/regextst.txt
vendored
10
icu4c/source/test/testdata/regextst.txt
vendored
@ -1201,6 +1201,15 @@
|
||||
"A|B|\U00012345" "hello <0>\U00012345</0>"
|
||||
"A|B|\U00010000" "hello \ud800"
|
||||
|
||||
# Bug 11369
|
||||
# Incorrect optimization of patterns with a zero length quantifier {0}
|
||||
|
||||
"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE"
|
||||
"(|b)ab(c)" "<0><1></1>ab<2>c</2></0>"
|
||||
"(|b){0}a{3}(D*)" "<0>aaa<2></2></0>"
|
||||
"(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>"
|
||||
"((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>"
|
||||
|
||||
# Bug 11370
|
||||
# Max match length computation of look-behind expression gives result that is too big to fit in the
|
||||
# in the 24 bit operand portion of the compiled code. Expressions should fail to compile
|
||||
@ -1209,6 +1218,7 @@
|
||||
"(?<!(0123456789a){10000000})x" E "no match"
|
||||
"(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
|
||||
|
||||
|
||||
# Random debugging, Temporary
|
||||
#
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user