ICU-20292 u_charFromName() prevent code point integer overflow, and limit to at most 8 hex digits
This commit is contained in:
parent
3db38553ad
commit
3b16ae86c6
@ -1526,7 +1526,7 @@ u_charFromName(UCharNameChoice nameChoice,
|
||||
uint32_t i;
|
||||
UChar32 cp = 0;
|
||||
char c0;
|
||||
UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
|
||||
static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return error;
|
||||
@ -1560,39 +1560,45 @@ u_charFromName(UCharNameChoice nameChoice,
|
||||
|
||||
/* try extended names first */
|
||||
if (lower[0] == '<') {
|
||||
if (nameChoice == U_EXTENDED_CHAR_NAME) {
|
||||
if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
|
||||
// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
|
||||
if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
|
||||
while (i >= 3 && lower[--i] != '-') {}
|
||||
uint32_t limit = i;
|
||||
while (i >= 3 && lower[--i] != '-') {}
|
||||
|
||||
if (i >= 2 && lower[i] == '-') {
|
||||
uint32_t cIdx;
|
||||
// There should be 1 to 8 hex digits.
|
||||
int32_t hexLength = limit - (i + 1);
|
||||
if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
|
||||
uint32_t cIdx;
|
||||
|
||||
lower[i] = 0;
|
||||
lower[i] = 0;
|
||||
|
||||
for (++i; lower[i] != '>'; ++i) {
|
||||
if (lower[i] >= '0' && lower[i] <= '9') {
|
||||
cp = (cp << 4) + lower[i] - '0';
|
||||
} else if (lower[i] >= 'a' && lower[i] <= 'f') {
|
||||
cp = (cp << 4) + lower[i] - 'a' + 10;
|
||||
} else {
|
||||
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
|
||||
return error;
|
||||
}
|
||||
for (++i; i < limit; ++i) {
|
||||
if (lower[i] >= '0' && lower[i] <= '9') {
|
||||
cp = (cp << 4) + lower[i] - '0';
|
||||
} else if (lower[i] >= 'a' && lower[i] <= 'f') {
|
||||
cp = (cp << 4) + lower[i] - 'a' + 10;
|
||||
} else {
|
||||
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
|
||||
return error;
|
||||
}
|
||||
// Prevent signed-integer overflow and out-of-range code points.
|
||||
if (cp > UCHAR_MAX_VALUE) {
|
||||
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now validate the category name.
|
||||
We could use a binary search, or a trie, if
|
||||
we really wanted to. */
|
||||
/* Now validate the category name.
|
||||
We could use a binary search, or a trie, if
|
||||
we really wanted to. */
|
||||
uint8_t cat = getCharCat(cp);
|
||||
for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
|
||||
|
||||
for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
|
||||
|
||||
if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
|
||||
if (getCharCat(cp) == cIdx) {
|
||||
return cp;
|
||||
}
|
||||
break;
|
||||
if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
|
||||
if (cat == cIdx) {
|
||||
return cp;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2005,29 +2005,62 @@ TestCharNames() {
|
||||
static void
|
||||
TestUCharFromNameUnderflow() {
|
||||
// Ticket #10889: Underflow crash when there is no dash.
|
||||
const char *name="<NO BREAK SPACE>";
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
|
||||
UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
// Test related edge cases.
|
||||
name="<-00a0>";
|
||||
errorCode=U_ZERO_ERROR;
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
|
||||
name="<control->";
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
|
||||
name="<control-111111>";
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
// ICU-20292: integer overflow
|
||||
errorCode=U_ZERO_ERROR;
|
||||
name="<noncharacter-10010FFFF>";
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
name="<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
name="<noncharacter-fFFf>>";
|
||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||
name, c, u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1350,6 +1350,12 @@ public final class UCharacterName
|
||||
int startIndex = name.lastIndexOf('-');
|
||||
if (startIndex >= 0) { // We've got a category.
|
||||
startIndex ++;
|
||||
|
||||
// There should be 1 to 8 hex digits.
|
||||
int hexLength = endIndex - startIndex;
|
||||
if (hexLength < 1 || 8 < hexLength) {
|
||||
return -1;
|
||||
}
|
||||
int result = -1;
|
||||
try {
|
||||
result = Integer.parseInt(
|
||||
@ -1359,13 +1365,17 @@ public final class UCharacterName
|
||||
catch (NumberFormatException e) {
|
||||
return -1;
|
||||
}
|
||||
if (result < 0 || 0x10ffff < result) {
|
||||
return -1;
|
||||
}
|
||||
// Now validate the category name. We could use a
|
||||
// binary search, or a trie, if we really wanted to.
|
||||
int charType = getType(result);
|
||||
String type = name.substring(1, startIndex - 1);
|
||||
int length = TYPE_NAMES_.length;
|
||||
for (int i = 0; i < length; ++ i) {
|
||||
if (type.compareTo(TYPE_NAMES_[i]) == 0) {
|
||||
if (getType(result) == i) {
|
||||
if (charType == i) {
|
||||
return result;
|
||||
}
|
||||
break;
|
||||
|
@ -1232,28 +1232,54 @@ public final class UCharacterTest extends TestFmwk
|
||||
@Test
|
||||
public void TestUCharFromNameUnderflow() {
|
||||
// Ticket #10889: Underflow crash when there is no dash.
|
||||
int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>");
|
||||
String name = "<NO BREAK SPACE>";
|
||||
int c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) +
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
|
||||
// Test related edge cases.
|
||||
c = UCharacter.getCharFromExtendedName("<-00a0>");
|
||||
name = "<-00a0>";
|
||||
c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) +
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
|
||||
c = UCharacter.getCharFromExtendedName("<control->");
|
||||
name = "<control->";
|
||||
c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) +
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
|
||||
c = UCharacter.getCharFromExtendedName("<control-111111>");
|
||||
name = "<control-111111>";
|
||||
c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) +
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
|
||||
// ICU-20292: integer overflow
|
||||
name = "<noncharacter-10010FFFF>";
|
||||
c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
|
||||
name = "<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
|
||||
c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
|
||||
name = "<noncharacter-fFFf>>";
|
||||
c = UCharacter.getCharFromExtendedName(name);
|
||||
if(c >= 0) {
|
||||
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||
" but should fail (-1)");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user