ICU-12930 Fix assertion failure in regex compile.

X-SVN-Rev: 39663
This commit is contained in:
Andy Heninger 2017-02-10 23:30:24 +00:00
parent 870b039296
commit 8e5c017e03
2 changed files with 34 additions and 26 deletions

View File

@ -2637,6 +2637,16 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh
}
// Increment with overflow check.
// val and delta will both be positive.
static int32_t safeIncrement(int32_t val, int32_t delta) {
if (INT32_MAX - val > delta) {
return val + delta;
} else {
return INT32_MAX;
}
}
//------------------------------------------------------------------------------
@ -2737,7 +2747,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->add(URX_VAL(op));
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2750,7 +2760,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(*s);
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2787,7 +2797,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(*s);
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2802,7 +2812,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(sc);
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2819,7 +2829,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2836,7 +2846,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2855,7 +2865,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2879,7 +2889,7 @@ void RegexCompile::matchStartType() {
}
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2895,7 +2905,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->complement();
numInitialStrings += 2;
}
currentLen++;
currentLen = safeIncrement(currentLen, 1);
atStart = FALSE;
break;
@ -2975,7 +2985,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialStringLen = stringLen;
}
currentLen += stringLen;
currentLen = safeIncrement(currentLen, stringLen);
atStart = FALSE;
}
break;
@ -3000,7 +3010,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2; // Matching on an initial string not possible.
}
currentLen += stringLen;
currentLen = safeIncrement(currentLen, stringLen);
atStart = FALSE;
}
break;
@ -3258,7 +3268,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
case URX_DOTANY_ALL: // . matches one or two.
case URX_DOTANY:
case URX_DOTANY_UNIX:
currentLen++;
currentLen = safeIncrement(currentLen, 1);
break;
@ -3310,7 +3320,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
{
loc++;
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
currentLen += URX_VAL(stringLenOp);
currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
}
break;
@ -3323,7 +3333,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
// Assume a min length of one for now. A min length of zero causes
// optimization failures for a pattern like "string"+
// currentLen += URX_VAL(stringLenOp);
currentLen += 1;
currentLen = safeIncrement(currentLen, 1);
}
break;
@ -3433,18 +3443,6 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
return currentLen;
}
// Increment with overflow check.
// val and delta will both be positive.
static int32_t safeIncrement(int32_t val, int32_t delta) {
if (INT32_MAX - val > delta) {
return val + delta;
} else {
return INT32_MAX;
}
}
//------------------------------------------------------------------------------
//
// maxMatchLength Calculate the length of the longest string that could

View File

@ -1346,6 +1346,16 @@
"(?<!aą)spam" "**bą<0>spam</0>**"
"(?<!ąabc)spam" "**ąabx<0>spam</0>**"
# Bug #12930
#
# Minimum Match Length computation, int32_t overflow on an empty set in the pattern.
# The empty set, with no match possible, has a min match length of INT32_MAX.
# Was incremented subsequently. Caused assertion failure on pattern compile.
"[^\u0000-\U0010ffff]bc?" "bc no match"
"[^\u0000-\U0010ffff]?bc?" "<0>bc</0> has a match"
# Random debugging, Temporary
#