ICU-7224 bug in regular expressions with negative lookahead

X-SVN-Rev: 26848
This commit is contained in:
Andy Heninger 2009-11-06 01:27:56 +00:00
parent 8f8f3449e5
commit 44bf429a55
2 changed files with 14 additions and 6 deletions

View File

@ -1,8 +1,7 @@
// //
// file: regexcmp.cpp // file: regexcmp.cpp
// //
// Copyright (C) 2002-2008 International Business Machines Corporation and others. // Copyright (C) 2002-2009 International Business Machines Corporation and others.
// All Rights Reserved. // All Rights Reserved.
// //
// This file contains the ICU regular expression compiler, which is responsible // This file contains the ICU regular expression compiler, which is responsible
@ -596,7 +595,7 @@ UBool RegexCompile::doParseActions(int32_t action)
fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE location fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE location
fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location
// Instructions #5 and #6 will be added when the ')' is encountered. // Instructions #5 - #7 will be added when the ')' is encountered.
} }
break; break;
@ -2132,7 +2131,7 @@ void RegexCompile::handleCloseParen() {
fRXPat->fCompiledPat->addElement(op, *fStatus); fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_BACKTRACK, 0); op = URX_BUILD(URX_BACKTRACK, 0);
fRXPat->fCompiledPat->addElement(op, *fStatus); fRXPat->fCompiledPat->addElement(op, *fStatus);
op = URX_BUILD(URX_LA_END, 0); op = URX_BUILD(URX_LA_END, dataLoc);
fRXPat->fCompiledPat->addElement(op, *fStatus); fRXPat->fCompiledPat->addElement(op, *fStatus);
// Patch the URX_SAVE near the top of the block. // Patch the URX_SAVE near the top of the block.

View File

@ -1,4 +1,4 @@
# Copyright (c) 2001-2007 International Business Machines # Copyright (c) 2001-2009 International Business Machines
# Corporation and others. All Rights Reserved. # Corporation and others. All Rights Reserved.
# #
# file: # file:
@ -38,7 +38,7 @@
# #
# a Use non-Anchoring Bounds. # a Use non-Anchoring Bounds.
# b Use Transparent Bounds. # b Use Transparent Bounds.
# The a and t options only make a difference if # The a and b options only make a difference if
# a <r>region</r> has been specified in the string. # a <r>region</r> has been specified in the string.
# z|Z hitEnd was expected(z) or not expected (Z). # z|Z hitEnd was expected(z) or not expected (Z).
# With neither, hitEnd is not checked. # With neither, hitEnd is not checked.
@ -49,6 +49,9 @@
# Look-ahead expressions # Look-ahead expressions
# #
"(?!0{5})(\d{5})" "<0><1>00001</1></0>zzzz"
"(?!0{5})(\d{5})z" "<0><1>00001</1>z</0>zzz"
"(?!0{5})(\d{5})(?!y)" "<0><1>00001</1></0>zzzz"
"abc(?=def)" "<0>abc</0>def" "abc(?=def)" "<0>abc</0>def"
"(.*)(?=c)" "<0><1>ab</1></0>cdef" "(.*)(?=c)" "<0><1>ab</1></0>cdef"
@ -1010,6 +1013,12 @@
"a+?" "<0>a</0>aaaaaaaaaaaa" "a+?" "<0>a</0>aaaaaaaaaaaa"
"a+?" M "<0>aaaaaaaaaaaaa</0>" "a+?" M "<0>aaaaaaaaaaaaa</0>"
#
# Bug 7724. Expression to validate zip codes.
#
"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "<0><1>94040</1><2>-3344</2></0>"
"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "94040-0000"
"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "00000-3344"
# #
# Random debugging, Temporary # Random debugging, Temporary
# #