ICU-12930 Fix assertion failure in regex compile.

X-SVN-Rev: 39663
This commit is contained in:
Andy Heninger 2017-02-10 23:30:24 +00:00
parent 870b039296
commit 8e5c017e03
2 changed files with 34 additions and 26 deletions

View File

@ -2637,6 +2637,16 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh
} }
// Increment with overflow check.
// val and delta will both be positive.
static int32_t safeIncrement(int32_t val, int32_t delta) {
if (INT32_MAX - val > delta) {
return val + delta;
} else {
return INT32_MAX;
}
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -2737,7 +2747,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->add(URX_VAL(op)); fRXPat->fInitialChars->add(URX_VAL(op));
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2750,7 +2760,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(*s); fRXPat->fInitialChars->addAll(*s);
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2787,7 +2797,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(*s); fRXPat->fInitialChars->addAll(*s);
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2802,7 +2812,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(sc); fRXPat->fInitialChars->addAll(sc);
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2819,7 +2829,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s); fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2836,7 +2846,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s); fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2855,7 +2865,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s); fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2879,7 +2889,7 @@ void RegexCompile::matchStartType() {
} }
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2895,7 +2905,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->complement(); fRXPat->fInitialChars->complement();
numInitialStrings += 2; numInitialStrings += 2;
} }
currentLen++; currentLen = safeIncrement(currentLen, 1);
atStart = FALSE; atStart = FALSE;
break; break;
@ -2975,7 +2985,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialStringLen = stringLen; fRXPat->fInitialStringLen = stringLen;
} }
currentLen += stringLen; currentLen = safeIncrement(currentLen, stringLen);
atStart = FALSE; atStart = FALSE;
} }
break; break;
@ -3000,7 +3010,7 @@ void RegexCompile::matchStartType() {
fRXPat->fInitialChars->addAll(s); fRXPat->fInitialChars->addAll(s);
numInitialStrings += 2; // Matching on an initial string not possible. numInitialStrings += 2; // Matching on an initial string not possible.
} }
currentLen += stringLen; currentLen = safeIncrement(currentLen, stringLen);
atStart = FALSE; atStart = FALSE;
} }
break; break;
@ -3258,7 +3268,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
case URX_DOTANY_ALL: // . matches one or two. case URX_DOTANY_ALL: // . matches one or two.
case URX_DOTANY: case URX_DOTANY:
case URX_DOTANY_UNIX: case URX_DOTANY_UNIX:
currentLen++; currentLen = safeIncrement(currentLen, 1);
break; break;
@ -3310,7 +3320,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
{ {
loc++; loc++;
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc); int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
currentLen += URX_VAL(stringLenOp); currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
} }
break; break;
@ -3323,7 +3333,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
// Assume a min length of one for now. A min length of zero causes // Assume a min length of one for now. A min length of zero causes
// optimization failures for a pattern like "string"+ // optimization failures for a pattern like "string"+
// currentLen += URX_VAL(stringLenOp); // currentLen += URX_VAL(stringLenOp);
currentLen += 1; currentLen = safeIncrement(currentLen, 1);
} }
break; break;
@ -3433,18 +3443,6 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
return currentLen; return currentLen;
} }
// Increment with overflow check.
// val and delta will both be positive.
static int32_t safeIncrement(int32_t val, int32_t delta) {
if (INT32_MAX - val > delta) {
return val + delta;
} else {
return INT32_MAX;
}
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// //
// maxMatchLength Calculate the length of the longest string that could // maxMatchLength Calculate the length of the longest string that could

View File

@ -1346,6 +1346,16 @@
"(?<!aą)spam" "**bą<0>spam</0>**" "(?<!aą)spam" "**bą<0>spam</0>**"
"(?<!ąabc)spam" "**ąabx<0>spam</0>**" "(?<!ąabc)spam" "**ąabx<0>spam</0>**"
# Bug #12930
#
# Minimum Match Length computation, int32_t overflow on an empty set in the pattern.
# The empty set, with no match possible, has a min match length of INT32_MAX.
# Was incremented subsequently. Caused assertion failure on pattern compile.
"[^\u0000-\U0010ffff]bc?" "bc no match"
"[^\u0000-\U0010ffff]?bc?" "<0>bc</0> has a match"
# Random debugging, Temporary # Random debugging, Temporary
# #