ICU-4888 regex pattern compilation bug with flag settings.

X-SVN-Rev: 18703
This commit is contained in:
Andy Heninger 2005-10-20 15:54:01 +00:00
parent c642152760
commit 8a1e1ba83a
3 changed files with 22 additions and 5 deletions

View File

@ -60,7 +60,7 @@ RegexCompile::RegexCompile(RegexPattern *rxp, UErrorCode &status) : fParenStack(
fCharNum = 0;
fQuoteMode = FALSE;
fInBackslashQuote = FALSE;
fModeFlags = fRXPat->fFlags;
fModeFlags = fRXPat->fFlags | 0x80000000;
fEOLComments = TRUE;
fMatchOpenParen = -1;
@ -339,11 +339,10 @@ UBool RegexCompile::doParseActions(EParseAction action)
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus);
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP, 3), *fStatus);
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus);
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
fParenStack.push(-1, *fStatus); // Begin a Paren Stack Frame
fParenStack.push( 3, *fStatus); // Push location of first NOP
// Standard open nonCapture paren action emits the two NOPs and
// sets up the paren stack frame.
doParseActions((EParseAction)doOpenNonCaptureParen);
break;
case doPatFinish:
@ -1333,6 +1332,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
case doSetMatchMode:
// We've got a (?i) or similar. The match mode is being changed, but
// the change is not scoped to a parenthesized block.
U_ASSERT(fNewModeFlags < 0);
fModeFlags = fNewModeFlags;
// Prevent any string from spanning across the change of match mode.
@ -1363,6 +1363,7 @@ UBool RegexCompile::doParseActions(EParseAction action)
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
// Set the current mode flags to the new values.
U_ASSERT(fNewModeFlags < 0);
fModeFlags = fNewModeFlags;
}
break;
@ -1620,6 +1621,7 @@ void RegexCompile::insertOp(int32_t where) {
// the compiled pattern. (Negative values are frame boundaries, and don't need fixing.)
for (loc=0; loc<fParenStack.size(); loc++) {
int32_t x = fParenStack.elementAti(loc);
U_ASSERT(x < code->size());
if (x>where) {
x++;
fParenStack.setElementAt(x, loc);
@ -1726,6 +1728,7 @@ void RegexCompile::handleCloseParen() {
// the value they had at the open paren. Saved value is
// at the top of the paren stack.
fModeFlags = fParenStack.popi();
U_ASSERT(fModeFlags < 0);
// DO any additional fixups, depending on the specific kind of
// parentesized grouping this is

View File

@ -154,6 +154,9 @@ private:
// Data associated with the generation of the pcode for the match engine
//
int32_t fModeFlags; // Match Flags. (Case Insensitive, etc.)
// Always has high bit (31) set so that flag values
// on the paren stack are distinguished from relocatable
// pcode addresses.
int32_t fNewModeFlags; // New flags, while compiling (?i, holds state
// until last flag is scanned.
UBool fSetModeFlag; // true for (?ismx, false for (?-ismx

View File

@ -518,6 +518,17 @@
"[$](P|C|D);" "$P;"
"[$$](P|C|D);" "<0>$<1>P</1>;</0>"
#
# bug 4888?
#
"((a){2})|(#)" is "no"
"((a){2})|(#)" is "<0><1>a<2>a</2></1></0>#"
#"((a){2})|(#)" is "a<0><3>#</3><0>"
"((a|b){2})|c" is "<0>c</0>"
"((a|b){2})|c" is "<0>C</0>"
"((a|b){2})|c" s "C"
#
# Random debugging, Temporary
#