ICU-11371 Improved checking of regular expression pattern size limits.

X-SVN-Rev: 36801
This commit is contained in:
Andy Heninger 2014-12-02 21:58:18 +00:00
parent b3bd196383
commit 63758dca88
7 changed files with 321 additions and 252 deletions

View File

@ -647,6 +647,7 @@ typedef enum UErrorCode {
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */ U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @draft ICU 55 */
U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */ U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
/* /*

View File

@ -1,7 +1,7 @@
/* /*
****************************************************************************** ******************************************************************************
* *
* Copyright (C) 1997-2011, International Business Machines * Copyright (C) 1997-2014, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
* *
****************************************************************************** ******************************************************************************
@ -165,7 +165,8 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
"U_REGEX_INVALID_RANGE", "U_REGEX_INVALID_RANGE",
"U_REGEX_STACK_OVERFLOW", "U_REGEX_STACK_OVERFLOW",
"U_REGEX_TIME_OUT", "U_REGEX_TIME_OUT",
"U_REGEX_STOPPED_BY_CALLER" "U_REGEX_STOPPED_BY_CALLER",
"U_REGEX_PATTERN_TOO_BIG"
}; };
static const char * const static const char * const

File diff suppressed because it is too large Load Diff

View File

@ -104,6 +104,13 @@ private:
void fixLiterals(UBool split=FALSE); // Generate code for pending literal characters. void fixLiterals(UBool split=FALSE); // Generate code for pending literal characters.
void insertOp(int32_t where); // Open up a slot for a new op in the void insertOp(int32_t where); // Open up a slot for a new op in the
// generated code at the specified location. // generated code at the specified location.
void appendOp(int32_t op); // Append a new op to the compiled pattern.
void appendOp(int32_t type, int32_t val); // Build & append a new op to the compiled pattern.
int32_t buildOp(int32_t type, int32_t val); // Construct a new pcode instruction.
int32_t allocateData(int32_t size); // Allocate space in the matcher data area.
// Return index of the newly allocated data.
int32_t allocateStackData(int32_t size); // Allocate space in the match back-track stack frame.
// Return offset index in the frame.
int32_t minMatchLength(int32_t start, int32_t minMatchLength(int32_t start,
int32_t end); int32_t end);
int32_t maxMatchLength(int32_t start, int32_t maxMatchLength(int32_t start,

View File

@ -1,5 +1,5 @@
// //
// Copyright (C) 2002-2013 International Business Machines Corporation // Copyright (C) 2002-2014 International Business Machines Corporation
// and others. All rights reserved. // and others. All rights reserved.
// //
// file: regeximp.h // file: regeximp.h
@ -241,7 +241,6 @@ enum {
// //
// Convenience macros for assembling and disassembling a compiled operation. // Convenience macros for assembling and disassembling a compiled operation.
// //
#define URX_BUILD(type, val) (int32_t)((type << 24) | (val))
#define URX_TYPE(x) ((uint32_t)(x) >> 24) #define URX_TYPE(x) ((uint32_t)(x) >> 24)
#define URX_VAL(x) ((x) & 0xffffff) #define URX_VAL(x) ((x) & 0xffffff)

View File

@ -144,6 +144,9 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
case 24: name = "TestBug11049"; case 24: name = "TestBug11049";
if (exec) TestBug11049(); if (exec) TestBug11049();
break; break;
case 25: name = "TestBug11371";
if (exec) TestBug11371();
break;
default: name = ""; default: name = "";
break; //needed to end loop break; //needed to end loop
} }
@ -5367,6 +5370,49 @@ void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expec
} }
void RegexTest::TestBug11371() {
if (quick) {
logln("Skipping test. Runs in exhuastive mode only.");
return;
}
UErrorCode status = U_ZERO_ERROR;
UnicodeString patternString;
for (int i=0; i<8000000; i++) {
patternString.append(UnicodeString("()"));
}
LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
if (status != U_REGEX_PATTERN_TOO_BIG) {
errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
__FILE__, __LINE__, u_errorName(status));
}
status = U_ZERO_ERROR;
patternString = "(";
for (int i=0; i<20000000; i++) {
patternString.append(UnicodeString("A++"));
}
patternString.append(UnicodeString("){0}B++"));
LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));
if (status != U_REGEX_PATTERN_TOO_BIG) {
errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
__FILE__, __LINE__, u_errorName(status));
}
// Pattern with too much string data, such that string indexes overflow operand data field size
// in compiled instruction.
status = U_ZERO_ERROR;
patternString = "";
while (patternString.length() < 0x00ffffff) {
patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n"));
}
patternString.append(UnicodeString("X? trailing string"));
LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));
if (status != U_REGEX_PATTERN_TOO_BIG) {
errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
__FILE__, __LINE__, u_errorName(status));
}
}
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

View File

@ -50,6 +50,7 @@ public:
virtual void Bug10459(); virtual void Bug10459();
virtual void TestCaseInsensitiveStarters(); virtual void TestCaseInsensitiveStarters();
virtual void TestBug11049(); virtual void TestBug11049();
virtual void TestBug11371();
// The following functions are internal to the regexp tests. // The following functions are internal to the regexp tests.
virtual void assertUText(const char *expected, UText *actual, const char *file, int line); virtual void assertUText(const char *expected, UText *actual, const char *file, int line);