ICU-11049 fix regex find() memory overrun.
X-SVN-Rev: 36124
This commit is contained in:
parent
1c22b8cf53
commit
e03585d7cf
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**************************************************************************
|
||||
* Copyright (C) 2002-2013 International Business Machines Corporation *
|
||||
* Copyright (C) 2002-2014 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
**************************************************************************
|
||||
*/
|
||||
@ -983,7 +983,7 @@ UBool RegexMatcher::findUsingChunk() {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
if (pos >= testLen) {
|
||||
if (startPos > testLen) {
|
||||
fMatch = FALSE;
|
||||
fHitEnd = TRUE;
|
||||
return FALSE;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "intltest.h"
|
||||
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/regex.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucnv.h"
|
||||
@ -140,7 +141,9 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
||||
case 23: name = "TestCaseInsensitiveStarters";
|
||||
if (exec) TestCaseInsensitiveStarters();
|
||||
break;
|
||||
|
||||
case 24: name = "TestBug11049";
|
||||
if (exec) TestBug11049();
|
||||
break;
|
||||
default: name = "";
|
||||
break; //needed to end loop
|
||||
}
|
||||
@ -5303,5 +5306,51 @@ void RegexTest::TestCaseInsensitiveStarters() {
|
||||
}
|
||||
|
||||
|
||||
void RegexTest::TestBug11049() {
|
||||
// Original bug report: pattern with match start consisting of one of several individual characters,
|
||||
// and the text being matched ending with a supplementary character. find() would read past the
|
||||
// end of the input text when searching for potential match starting points.
|
||||
|
||||
// To see the problem, the text must exactly fill an allocated buffer, so that valgrind will
|
||||
// detect the bad read.
|
||||
|
||||
UnicodeString patternString("A|B|C");
|
||||
UnicodeString txtString = UnicodeString("a string \\ud800\\udc00").unescape();
|
||||
UChar *exactBuffer = new UChar[txtString.length()];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
txtString.extract(exactBuffer, txtString.length(), status);
|
||||
UText *ut = utext_openUChars(NULL, exactBuffer, txtString.length(), &status);
|
||||
|
||||
LocalPointer<RegexPattern> pattern(RegexPattern::compile(patternString, 0, status));
|
||||
REGEX_CHECK_STATUS;
|
||||
LocalPointer<RegexMatcher> matcher(pattern->matcher(status));
|
||||
matcher->reset(ut);
|
||||
REGEX_CHECK_STATUS;
|
||||
UBool result = matcher->find();
|
||||
REGEX_ASSERT(result == FALSE);
|
||||
|
||||
// Verify that match starting on the last char in input will be found.
|
||||
txtString = UnicodeString("string matches at end C");
|
||||
matcher->reset(txtString);
|
||||
result = matcher->find();
|
||||
REGEX_ASSERT(result == TRUE);
|
||||
|
||||
// Put an unpaired surrogate at the end of the input text,
|
||||
// let valgrind verify that find() doesn't look off the end.
|
||||
txtString = UnicodeString("a string \\ud800").unescape();
|
||||
delete [] exactBuffer;
|
||||
exactBuffer = new UChar[txtString.length()];
|
||||
txtString.extract(exactBuffer, txtString.length(), status);
|
||||
utext_openUChars(ut, exactBuffer, txtString.length(), &status);
|
||||
matcher->reset(ut);
|
||||
result = matcher->find();
|
||||
REGEX_ASSERT(result == FALSE);
|
||||
REGEX_CHECK_STATUS;
|
||||
|
||||
utext_close(ut);
|
||||
delete [] exactBuffer;
|
||||
}
|
||||
|
||||
|
||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||
|
||||
|
@ -49,6 +49,7 @@ public:
|
||||
virtual void CheckInvBufSize();
|
||||
virtual void Bug10459();
|
||||
virtual void TestCaseInsensitiveStarters();
|
||||
virtual void TestBug11049();
|
||||
|
||||
// The following functions are internal to the regexp tests.
|
||||
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);
|
||||
|
9
icu4c/source/test/testdata/regextst.txt
vendored
9
icu4c/source/test/testdata/regextst.txt
vendored
@ -1192,6 +1192,15 @@
|
||||
"^(\w+\d\w+:\w+)$" "<0><1>DiesIst1Beispiel:text</1></0>"
|
||||
"^(\w+\d\w+:\w+)$" i "<0><1>DiesIst1Beispiel:text</1></0>"
|
||||
|
||||
# Bug 11049
|
||||
# Edge cases in find() when pattern match begins with set of code points
|
||||
# and the match begins at the end of the string.
|
||||
|
||||
"A|B|C" "hello <0>A</0>"
|
||||
"A|B|C" "hello \U00011234"
|
||||
"A|B|\U00012345" "hello <0>\U00012345</0>"
|
||||
"A|B|\U00010000" "hello \ud800"
|
||||
|
||||
# Random debugging, Temporary
|
||||
#
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user