ICU-11371 Improved checking of regular expression pattern size limits.
X-SVN-Rev: 36801
This commit is contained in:
parent
b3bd196383
commit
63758dca88
@ -647,6 +647,7 @@ typedef enum UErrorCode {
|
|||||||
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
|
||||||
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
|
||||||
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
|
||||||
|
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @draft ICU 55 */
|
||||||
U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
|
U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*
|
*
|
||||||
* Copyright (C) 1997-2011, International Business Machines
|
* Copyright (C) 1997-2014, International Business Machines
|
||||||
* Corporation and others. All Rights Reserved.
|
* Corporation and others. All Rights Reserved.
|
||||||
*
|
*
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
@ -165,7 +165,8 @@ _uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = {
|
|||||||
"U_REGEX_INVALID_RANGE",
|
"U_REGEX_INVALID_RANGE",
|
||||||
"U_REGEX_STACK_OVERFLOW",
|
"U_REGEX_STACK_OVERFLOW",
|
||||||
"U_REGEX_TIME_OUT",
|
"U_REGEX_TIME_OUT",
|
||||||
"U_REGEX_STOPPED_BY_CALLER"
|
"U_REGEX_STOPPED_BY_CALLER",
|
||||||
|
"U_REGEX_PATTERN_TOO_BIG"
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * const
|
static const char * const
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -104,6 +104,13 @@ private:
|
|||||||
void fixLiterals(UBool split=FALSE); // Generate code for pending literal characters.
|
void fixLiterals(UBool split=FALSE); // Generate code for pending literal characters.
|
||||||
void insertOp(int32_t where); // Open up a slot for a new op in the
|
void insertOp(int32_t where); // Open up a slot for a new op in the
|
||||||
// generated code at the specified location.
|
// generated code at the specified location.
|
||||||
|
void appendOp(int32_t op); // Append a new op to the compiled pattern.
|
||||||
|
void appendOp(int32_t type, int32_t val); // Build & append a new op to the compiled pattern.
|
||||||
|
int32_t buildOp(int32_t type, int32_t val); // Construct a new pcode instruction.
|
||||||
|
int32_t allocateData(int32_t size); // Allocate space in the matcher data area.
|
||||||
|
// Return index of the newly allocated data.
|
||||||
|
int32_t allocateStackData(int32_t size); // Allocate space in the match back-track stack frame.
|
||||||
|
// Return offset index in the frame.
|
||||||
int32_t minMatchLength(int32_t start,
|
int32_t minMatchLength(int32_t start,
|
||||||
int32_t end);
|
int32_t end);
|
||||||
int32_t maxMatchLength(int32_t start,
|
int32_t maxMatchLength(int32_t start,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (C) 2002-2013 International Business Machines Corporation
|
// Copyright (C) 2002-2014 International Business Machines Corporation
|
||||||
// and others. All rights reserved.
|
// and others. All rights reserved.
|
||||||
//
|
//
|
||||||
// file: regeximp.h
|
// file: regeximp.h
|
||||||
@ -241,7 +241,6 @@ enum {
|
|||||||
//
|
//
|
||||||
// Convenience macros for assembling and disassembling a compiled operation.
|
// Convenience macros for assembling and disassembling a compiled operation.
|
||||||
//
|
//
|
||||||
#define URX_BUILD(type, val) (int32_t)((type << 24) | (val))
|
|
||||||
#define URX_TYPE(x) ((uint32_t)(x) >> 24)
|
#define URX_TYPE(x) ((uint32_t)(x) >> 24)
|
||||||
#define URX_VAL(x) ((x) & 0xffffff)
|
#define URX_VAL(x) ((x) & 0xffffff)
|
||||||
|
|
||||||
|
@ -144,6 +144,9 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
|||||||
case 24: name = "TestBug11049";
|
case 24: name = "TestBug11049";
|
||||||
if (exec) TestBug11049();
|
if (exec) TestBug11049();
|
||||||
break;
|
break;
|
||||||
|
case 25: name = "TestBug11371";
|
||||||
|
if (exec) TestBug11371();
|
||||||
|
break;
|
||||||
default: name = "";
|
default: name = "";
|
||||||
break; //needed to end loop
|
break; //needed to end loop
|
||||||
}
|
}
|
||||||
@ -5367,6 +5370,49 @@ void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expec
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void RegexTest::TestBug11371() {
|
||||||
|
if (quick) {
|
||||||
|
logln("Skipping test. Runs in exhuastive mode only.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UnicodeString patternString;
|
||||||
|
|
||||||
|
for (int i=0; i<8000000; i++) {
|
||||||
|
patternString.append(UnicodeString("()"));
|
||||||
|
}
|
||||||
|
LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
|
||||||
|
if (status != U_REGEX_PATTERN_TOO_BIG) {
|
||||||
|
errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
|
||||||
|
__FILE__, __LINE__, u_errorName(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
patternString = "(";
|
||||||
|
for (int i=0; i<20000000; i++) {
|
||||||
|
patternString.append(UnicodeString("A++"));
|
||||||
|
}
|
||||||
|
patternString.append(UnicodeString("){0}B++"));
|
||||||
|
LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));
|
||||||
|
if (status != U_REGEX_PATTERN_TOO_BIG) {
|
||||||
|
errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
|
||||||
|
__FILE__, __LINE__, u_errorName(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pattern with too much string data, such that string indexes overflow operand data field size
|
||||||
|
// in compiled instruction.
|
||||||
|
status = U_ZERO_ERROR;
|
||||||
|
patternString = "";
|
||||||
|
while (patternString.length() < 0x00ffffff) {
|
||||||
|
patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n"));
|
||||||
|
}
|
||||||
|
patternString.append(UnicodeString("X? trailing string"));
|
||||||
|
LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));
|
||||||
|
if (status != U_REGEX_PATTERN_TOO_BIG) {
|
||||||
|
errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
|
||||||
|
__FILE__, __LINE__, u_errorName(status));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ public:
|
|||||||
virtual void Bug10459();
|
virtual void Bug10459();
|
||||||
virtual void TestCaseInsensitiveStarters();
|
virtual void TestCaseInsensitiveStarters();
|
||||||
virtual void TestBug11049();
|
virtual void TestBug11049();
|
||||||
|
virtual void TestBug11371();
|
||||||
|
|
||||||
// The following functions are internal to the regexp tests.
|
// The following functions are internal to the regexp tests.
|
||||||
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);
|
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);
|
||||||
|
Loading…
Reference in New Issue
Block a user