ICU-20292 u_charFromName() prevent code point integer overflow, and limit to at most 8 hex digits

This commit is contained in:
Markus Scherer 2018-12-11 22:05:08 -08:00
parent 3db38553ad
commit 3b16ae86c6
4 changed files with 118 additions and 43 deletions

View File

@ -1526,7 +1526,7 @@ u_charFromName(UCharNameChoice nameChoice,
uint32_t i;
UChar32 cp = 0;
char c0;
UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return error;
@ -1560,39 +1560,45 @@ u_charFromName(UCharNameChoice nameChoice,
/* try extended names first */
if (lower[0] == '<') {
if (nameChoice == U_EXTENDED_CHAR_NAME) {
if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
while (i >= 3 && lower[--i] != '-') {}
uint32_t limit = i;
while (i >= 3 && lower[--i] != '-') {}
if (i >= 2 && lower[i] == '-') {
uint32_t cIdx;
// There should be 1 to 8 hex digits.
int32_t hexLength = limit - (i + 1);
if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
uint32_t cIdx;
lower[i] = 0;
lower[i] = 0;
for (++i; lower[i] != '>'; ++i) {
if (lower[i] >= '0' && lower[i] <= '9') {
cp = (cp << 4) + lower[i] - '0';
} else if (lower[i] >= 'a' && lower[i] <= 'f') {
cp = (cp << 4) + lower[i] - 'a' + 10;
} else {
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
return error;
}
for (++i; i < limit; ++i) {
if (lower[i] >= '0' && lower[i] <= '9') {
cp = (cp << 4) + lower[i] - '0';
} else if (lower[i] >= 'a' && lower[i] <= 'f') {
cp = (cp << 4) + lower[i] - 'a' + 10;
} else {
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
return error;
}
// Prevent signed-integer overflow and out-of-range code points.
if (cp > UCHAR_MAX_VALUE) {
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
return error;
}
}
/* Now validate the category name.
We could use a binary search, or a trie, if
we really wanted to. */
/* Now validate the category name.
We could use a binary search, or a trie, if
we really wanted to. */
uint8_t cat = getCharCat(cp);
for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
if (getCharCat(cp) == cIdx) {
return cp;
}
break;
if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
if (cat == cIdx) {
return cp;
}
break;
}
}
}

View File

@ -2005,29 +2005,62 @@ TestCharNames() {
static void
TestUCharFromNameUnderflow() {
// Ticket #10889: Underflow crash when there is no dash.
const char *name="<NO BREAK SPACE>";
UErrorCode errorCode=U_ZERO_ERROR;
UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
// Test related edge cases.
name="<-00a0>";
errorCode=U_ZERO_ERROR;
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
name="<control->";
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
name="<control-111111>";
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
// ICU-20292: integer overflow
errorCode=U_ZERO_ERROR;
name="<noncharacter-10010FFFF>";
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
name="<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
name="<noncharacter-fFFf>>";
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
if(U_SUCCESS(errorCode)) {
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
name, c, u_errorName(errorCode));
}
}

View File

@ -1350,6 +1350,12 @@ public final class UCharacterName
int startIndex = name.lastIndexOf('-');
if (startIndex >= 0) { // We've got a category.
startIndex ++;
// There should be 1 to 8 hex digits.
int hexLength = endIndex - startIndex;
if (hexLength < 1 || 8 < hexLength) {
return -1;
}
int result = -1;
try {
result = Integer.parseInt(
@ -1359,13 +1365,17 @@ public final class UCharacterName
catch (NumberFormatException e) {
return -1;
}
if (result < 0 || 0x10ffff < result) {
return -1;
}
// Now validate the category name. We could use a
// binary search, or a trie, if we really wanted to.
int charType = getType(result);
String type = name.substring(1, startIndex - 1);
int length = TYPE_NAMES_.length;
for (int i = 0; i < length; ++ i) {
if (type.compareTo(TYPE_NAMES_[i]) == 0) {
if (getType(result) == i) {
if (charType == i) {
return result;
}
break;

View File

@ -1232,28 +1232,54 @@ public final class UCharacterTest extends TestFmwk
@Test
public void TestUCharFromNameUnderflow() {
// Ticket #10889: Underflow crash when there is no dash.
int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>");
String name = "<NO BREAK SPACE>";
int c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) +
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
// Test related edge cases.
c = UCharacter.getCharFromExtendedName("<-00a0>");
name = "<-00a0>";
c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) +
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
c = UCharacter.getCharFromExtendedName("<control->");
name = "<control->";
c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) +
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
c = UCharacter.getCharFromExtendedName("<control-111111>");
name = "<control-111111>";
c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) +
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
// ICU-20292: integer overflow
name = "<noncharacter-10010FFFF>";
c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
name = "<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
name = "<noncharacter-fFFf>>";
c = UCharacter.getCharFromExtendedName(name);
if(c >= 0) {
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
" but should fail (-1)");
}
}