ICU-7224 bug in regular expressions with negative lookahead

X-SVN-Rev: 26848
This commit is contained in:
Andy Heninger 2009-11-06 01:27:56 +00:00
parent 8f8f3449e5
commit 44bf429a55
2 changed files with 14 additions and 6 deletions

View File

@ -1,8 +1,7 @@
//
// file: regexcmp.cpp
//
// Copyright (C) 2002-2008 International Business Machines Corporation and others.
// Copyright (C) 2002-2009 International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains the ICU regular expression compiler, which is responsible
@ -596,7 +595,7 @@ UBool RegexCompile::doParseActions(int32_t action)
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE location
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location
// Instructions #5 and #6 will be added when the ')' is encountered.
// Instructions #5 - #7 will be added when the ')' is encountered.
}
break;
@ -2132,7 +2131,7 @@ void RegexCompile::handleCloseParen() {
fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_BACKTRACK, 0);
fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_LA_END, 0);
op = URX_BUILD(URX_LA_END, dataLoc);
fRXPat->fCompiledPat->addElement(op, *fStatus);
// Patch the URX_SAVE near the top of the block.

View File

@ -1,4 +1,4 @@
# Copyright (c) 2001-2007 International Business Machines
# Copyright (c) 2001-2009 International Business Machines
# Corporation and others. All Rights Reserved.
#
# file:
@ -38,7 +38,7 @@
#
# a Use non-Anchoring Bounds.
# b Use Transparent Bounds.
# The a and t options only make a difference if
# The a and b options only make a difference if
# a <r>region</r> has been specified in the string.
# z|Z hitEnd was expected(z) or not expected (Z).
# With neither, hitEnd is not checked.
@ -49,6 +49,9 @@
# Look-ahead expressions
#
"(?!0{5})(\d{5})" "<0><1>00001</1></0>zzzz"
"(?!0{5})(\d{5})z" "<0><1>00001</1>z</0>zzz"
"(?!0{5})(\d{5})(?!y)" "<0><1>00001</1></0>zzzz"
"abc(?=def)" "<0>abc</0>def"
"(.*)(?=c)" "<0><1>ab</1></0>cdef"
@ -1010,6 +1013,12 @@
"a+?" "<0>a</0>aaaaaaaaaaaa"
"a+?" M "<0>aaaaaaaaaaaaa</0>"
#
# Bug 7724. Expression to validate zip codes.
#
"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "<0><1>94040</1><2>-3344</2></0>"
"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "94040-0000"
"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "00000-3344"
#
# Random debugging, Temporary
#