ICU-11049 fix regex find() memory overrun.
X-SVN-Rev: 36124
This commit is contained in:
parent
1c22b8cf53
commit
e03585d7cf
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
**************************************************************************
|
**************************************************************************
|
||||||
* Copyright (C) 2002-2013 International Business Machines Corporation *
|
* Copyright (C) 2002-2014 International Business Machines Corporation *
|
||||||
* and others. All rights reserved. *
|
* and others. All rights reserved. *
|
||||||
**************************************************************************
|
**************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -983,7 +983,7 @@ UBool RegexMatcher::findUsingChunk() {
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pos >= testLen) {
|
if (startPos > testLen) {
|
||||||
fMatch = FALSE;
|
fMatch = FALSE;
|
||||||
fHitEnd = TRUE;
|
fHitEnd = TRUE;
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include "intltest.h"
|
#include "intltest.h"
|
||||||
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
||||||
|
|
||||||
|
#include "unicode/localpointer.h"
|
||||||
#include "unicode/regex.h"
|
#include "unicode/regex.h"
|
||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/ucnv.h"
|
#include "unicode/ucnv.h"
|
||||||
@ -140,7 +141,9 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
|||||||
case 23: name = "TestCaseInsensitiveStarters";
|
case 23: name = "TestCaseInsensitiveStarters";
|
||||||
if (exec) TestCaseInsensitiveStarters();
|
if (exec) TestCaseInsensitiveStarters();
|
||||||
break;
|
break;
|
||||||
|
case 24: name = "TestBug11049";
|
||||||
|
if (exec) TestBug11049();
|
||||||
|
break;
|
||||||
default: name = "";
|
default: name = "";
|
||||||
break; //needed to end loop
|
break; //needed to end loop
|
||||||
}
|
}
|
||||||
@ -5303,5 +5306,51 @@ void RegexTest::TestCaseInsensitiveStarters() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void RegexTest::TestBug11049() {
|
||||||
|
// Original bug report: pattern with match start consisting of one of several individual characters,
|
||||||
|
// and the text being matched ending with a supplementary character. find() would read past the
|
||||||
|
// end of the input text when searching for potential match starting points.
|
||||||
|
|
||||||
|
// To see the problem, the text must exactly fill an allocated buffer, so that valgrind will
|
||||||
|
// detect the bad read.
|
||||||
|
|
||||||
|
UnicodeString patternString("A|B|C");
|
||||||
|
UnicodeString txtString = UnicodeString("a string \\ud800\\udc00").unescape();
|
||||||
|
UChar *exactBuffer = new UChar[txtString.length()];
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
txtString.extract(exactBuffer, txtString.length(), status);
|
||||||
|
UText *ut = utext_openUChars(NULL, exactBuffer, txtString.length(), &status);
|
||||||
|
|
||||||
|
LocalPointer<RegexPattern> pattern(RegexPattern::compile(patternString, 0, status));
|
||||||
|
REGEX_CHECK_STATUS;
|
||||||
|
LocalPointer<RegexMatcher> matcher(pattern->matcher(status));
|
||||||
|
matcher->reset(ut);
|
||||||
|
REGEX_CHECK_STATUS;
|
||||||
|
UBool result = matcher->find();
|
||||||
|
REGEX_ASSERT(result == FALSE);
|
||||||
|
|
||||||
|
// Verify that match starting on the last char in input will be found.
|
||||||
|
txtString = UnicodeString("string matches at end C");
|
||||||
|
matcher->reset(txtString);
|
||||||
|
result = matcher->find();
|
||||||
|
REGEX_ASSERT(result == TRUE);
|
||||||
|
|
||||||
|
// Put an unpaired surrogate at the end of the input text,
|
||||||
|
// let valgrind verify that find() doesn't look off the end.
|
||||||
|
txtString = UnicodeString("a string \\ud800").unescape();
|
||||||
|
delete [] exactBuffer;
|
||||||
|
exactBuffer = new UChar[txtString.length()];
|
||||||
|
txtString.extract(exactBuffer, txtString.length(), status);
|
||||||
|
utext_openUChars(ut, exactBuffer, txtString.length(), &status);
|
||||||
|
matcher->reset(ut);
|
||||||
|
result = matcher->find();
|
||||||
|
REGEX_ASSERT(result == FALSE);
|
||||||
|
REGEX_CHECK_STATUS;
|
||||||
|
|
||||||
|
utext_close(ut);
|
||||||
|
delete [] exactBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ public:
|
|||||||
virtual void CheckInvBufSize();
|
virtual void CheckInvBufSize();
|
||||||
virtual void Bug10459();
|
virtual void Bug10459();
|
||||||
virtual void TestCaseInsensitiveStarters();
|
virtual void TestCaseInsensitiveStarters();
|
||||||
|
virtual void TestBug11049();
|
||||||
|
|
||||||
// The following functions are internal to the regexp tests.
|
// The following functions are internal to the regexp tests.
|
||||||
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);
|
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);
|
||||||
|
9
icu4c/source/test/testdata/regextst.txt
vendored
9
icu4c/source/test/testdata/regextst.txt
vendored
@ -1192,6 +1192,15 @@
|
|||||||
"^(\w+\d\w+:\w+)$" "<0><1>DiesIst1Beispiel:text</1></0>"
|
"^(\w+\d\w+:\w+)$" "<0><1>DiesIst1Beispiel:text</1></0>"
|
||||||
"^(\w+\d\w+:\w+)$" i "<0><1>DiesIst1Beispiel:text</1></0>"
|
"^(\w+\d\w+:\w+)$" i "<0><1>DiesIst1Beispiel:text</1></0>"
|
||||||
|
|
||||||
|
# Bug 11049
|
||||||
|
# Edge cases in find() when pattern match begins with set of code points
|
||||||
|
# and the match begins at the end of the string.
|
||||||
|
|
||||||
|
"A|B|C" "hello <0>A</0>"
|
||||||
|
"A|B|C" "hello \U00011234"
|
||||||
|
"A|B|\U00012345" "hello <0>\U00012345</0>"
|
||||||
|
"A|B|\U00010000" "hello \ud800"
|
||||||
|
|
||||||
# Random debugging, Temporary
|
# Random debugging, Temporary
|
||||||
#
|
#
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user