ICU-12930 Fix assertion failure in regex compile.
X-SVN-Rev: 39663
This commit is contained in:
parent
870b039296
commit
8e5c017e03
@ -2637,6 +2637,16 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Increment with overflow check.
|
||||||
|
// val and delta will both be positive.
|
||||||
|
|
||||||
|
static int32_t safeIncrement(int32_t val, int32_t delta) {
|
||||||
|
if (INT32_MAX - val > delta) {
|
||||||
|
return val + delta;
|
||||||
|
} else {
|
||||||
|
return INT32_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -2737,7 +2747,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->add(URX_VAL(op));
|
fRXPat->fInitialChars->add(URX_VAL(op));
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2750,7 +2760,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(*s);
|
fRXPat->fInitialChars->addAll(*s);
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2787,7 +2797,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(*s);
|
fRXPat->fInitialChars->addAll(*s);
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2802,7 +2812,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(sc);
|
fRXPat->fInitialChars->addAll(sc);
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2819,7 +2829,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(s);
|
fRXPat->fInitialChars->addAll(s);
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2836,7 +2846,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(s);
|
fRXPat->fInitialChars->addAll(s);
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2855,7 +2865,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(s);
|
fRXPat->fInitialChars->addAll(s);
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2879,7 +2889,7 @@ void RegexCompile::matchStartType() {
|
|||||||
}
|
}
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2895,7 +2905,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->complement();
|
fRXPat->fInitialChars->complement();
|
||||||
numInitialStrings += 2;
|
numInitialStrings += 2;
|
||||||
}
|
}
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -2975,7 +2985,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialStringLen = stringLen;
|
fRXPat->fInitialStringLen = stringLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
currentLen += stringLen;
|
currentLen = safeIncrement(currentLen, stringLen);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -3000,7 +3010,7 @@ void RegexCompile::matchStartType() {
|
|||||||
fRXPat->fInitialChars->addAll(s);
|
fRXPat->fInitialChars->addAll(s);
|
||||||
numInitialStrings += 2; // Matching on an initial string not possible.
|
numInitialStrings += 2; // Matching on an initial string not possible.
|
||||||
}
|
}
|
||||||
currentLen += stringLen;
|
currentLen = safeIncrement(currentLen, stringLen);
|
||||||
atStart = FALSE;
|
atStart = FALSE;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -3258,7 +3268,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||||||
case URX_DOTANY_ALL: // . matches one or two.
|
case URX_DOTANY_ALL: // . matches one or two.
|
||||||
case URX_DOTANY:
|
case URX_DOTANY:
|
||||||
case URX_DOTANY_UNIX:
|
case URX_DOTANY_UNIX:
|
||||||
currentLen++;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
@ -3310,7 +3320,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||||||
{
|
{
|
||||||
loc++;
|
loc++;
|
||||||
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||||
currentLen += URX_VAL(stringLenOp);
|
currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -3323,7 +3333,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||||||
// Assume a min length of one for now. A min length of zero causes
|
// Assume a min length of one for now. A min length of zero causes
|
||||||
// optimization failures for a pattern like "string"+
|
// optimization failures for a pattern like "string"+
|
||||||
// currentLen += URX_VAL(stringLenOp);
|
// currentLen += URX_VAL(stringLenOp);
|
||||||
currentLen += 1;
|
currentLen = safeIncrement(currentLen, 1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -3433,18 +3443,6 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||||||
return currentLen;
|
return currentLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Increment with overflow check.
|
|
||||||
// val and delta will both be positive.
|
|
||||||
|
|
||||||
static int32_t safeIncrement(int32_t val, int32_t delta) {
|
|
||||||
if (INT32_MAX - val > delta) {
|
|
||||||
return val + delta;
|
|
||||||
} else {
|
|
||||||
return INT32_MAX;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// maxMatchLength Calculate the length of the longest string that could
|
// maxMatchLength Calculate the length of the longest string that could
|
||||||
|
10
icu4c/source/test/testdata/regextst.txt
vendored
10
icu4c/source/test/testdata/regextst.txt
vendored
@ -1346,6 +1346,16 @@
|
|||||||
"(?<!aą)spam" "**bą<0>spam</0>**"
|
"(?<!aą)spam" "**bą<0>spam</0>**"
|
||||||
"(?<!ąabc)spam" "**ąabx<0>spam</0>**"
|
"(?<!ąabc)spam" "**ąabx<0>spam</0>**"
|
||||||
|
|
||||||
|
# Bug #12930
|
||||||
|
#
|
||||||
|
# Minimum Match Length computation, int32_t overflow on an empty set in the pattern.
|
||||||
|
# The empty set, with no match possible, has a min match length of INT32_MAX.
|
||||||
|
# Was incremented subsequently. Caused assertion failure on pattern compile.
|
||||||
|
|
||||||
|
"[^\u0000-\U0010ffff]bc?" "bc no match"
|
||||||
|
"[^\u0000-\U0010ffff]?bc?" "<0>bc</0> has a match"
|
||||||
|
|
||||||
|
|
||||||
# Random debugging, Temporary
|
# Random debugging, Temporary
|
||||||
#
|
#
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user