ICU-20292 u_charFromName() prevent code point integer overflow, and limit to at most 8 hex digits
This commit is contained in:
parent
3db38553ad
commit
3b16ae86c6
@ -1526,7 +1526,7 @@ u_charFromName(UCharNameChoice nameChoice,
|
|||||||
uint32_t i;
|
uint32_t i;
|
||||||
UChar32 cp = 0;
|
UChar32 cp = 0;
|
||||||
char c0;
|
char c0;
|
||||||
UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
|
static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
|
||||||
|
|
||||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||||
return error;
|
return error;
|
||||||
@ -1560,39 +1560,45 @@ u_charFromName(UCharNameChoice nameChoice,
|
|||||||
|
|
||||||
/* try extended names first */
|
/* try extended names first */
|
||||||
if (lower[0] == '<') {
|
if (lower[0] == '<') {
|
||||||
if (nameChoice == U_EXTENDED_CHAR_NAME) {
|
if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
|
||||||
// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
|
// Parse a string like "<category-HHHH>" where HHHH is a hex code point.
|
||||||
if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
|
uint32_t limit = i;
|
||||||
while (i >= 3 && lower[--i] != '-') {}
|
while (i >= 3 && lower[--i] != '-') {}
|
||||||
|
|
||||||
if (i >= 2 && lower[i] == '-') {
|
// There should be 1 to 8 hex digits.
|
||||||
uint32_t cIdx;
|
int32_t hexLength = limit - (i + 1);
|
||||||
|
if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
|
||||||
|
uint32_t cIdx;
|
||||||
|
|
||||||
lower[i] = 0;
|
lower[i] = 0;
|
||||||
|
|
||||||
for (++i; lower[i] != '>'; ++i) {
|
for (++i; i < limit; ++i) {
|
||||||
if (lower[i] >= '0' && lower[i] <= '9') {
|
if (lower[i] >= '0' && lower[i] <= '9') {
|
||||||
cp = (cp << 4) + lower[i] - '0';
|
cp = (cp << 4) + lower[i] - '0';
|
||||||
} else if (lower[i] >= 'a' && lower[i] <= 'f') {
|
} else if (lower[i] >= 'a' && lower[i] <= 'f') {
|
||||||
cp = (cp << 4) + lower[i] - 'a' + 10;
|
cp = (cp << 4) + lower[i] - 'a' + 10;
|
||||||
} else {
|
} else {
|
||||||
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
|
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
|
||||||
return error;
|
return error;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// Prevent signed-integer overflow and out-of-range code points.
|
||||||
|
if (cp > UCHAR_MAX_VALUE) {
|
||||||
|
*pErrorCode = U_ILLEGAL_CHAR_FOUND;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Now validate the category name.
|
/* Now validate the category name.
|
||||||
We could use a binary search, or a trie, if
|
We could use a binary search, or a trie, if
|
||||||
we really wanted to. */
|
we really wanted to. */
|
||||||
|
uint8_t cat = getCharCat(cp);
|
||||||
|
for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
|
||||||
|
|
||||||
for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
|
if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
|
||||||
|
if (cat == cIdx) {
|
||||||
if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
|
return cp;
|
||||||
if (getCharCat(cp) == cIdx) {
|
|
||||||
return cp;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2005,29 +2005,62 @@ TestCharNames() {
|
|||||||
static void
|
static void
|
||||||
TestUCharFromNameUnderflow() {
|
TestUCharFromNameUnderflow() {
|
||||||
// Ticket #10889: Underflow crash when there is no dash.
|
// Ticket #10889: Underflow crash when there is no dash.
|
||||||
|
const char *name="<NO BREAK SPACE>";
|
||||||
UErrorCode errorCode=U_ZERO_ERROR;
|
UErrorCode errorCode=U_ZERO_ERROR;
|
||||||
UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
|
UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
if(U_SUCCESS(errorCode)) {
|
if(U_SUCCESS(errorCode)) {
|
||||||
log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test related edge cases.
|
// Test related edge cases.
|
||||||
|
name="<-00a0>";
|
||||||
errorCode=U_ZERO_ERROR;
|
errorCode=U_ZERO_ERROR;
|
||||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
|
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
if(U_SUCCESS(errorCode)) {
|
if(U_SUCCESS(errorCode)) {
|
||||||
log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
}
|
}
|
||||||
|
|
||||||
errorCode=U_ZERO_ERROR;
|
errorCode=U_ZERO_ERROR;
|
||||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
|
name="<control->";
|
||||||
|
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
if(U_SUCCESS(errorCode)) {
|
if(U_SUCCESS(errorCode)) {
|
||||||
log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
}
|
}
|
||||||
|
|
||||||
errorCode=U_ZERO_ERROR;
|
errorCode=U_ZERO_ERROR;
|
||||||
c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
|
name="<control-111111>";
|
||||||
|
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
if(U_SUCCESS(errorCode)) {
|
if(U_SUCCESS(errorCode)) {
|
||||||
log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ICU-20292: integer overflow
|
||||||
|
errorCode=U_ZERO_ERROR;
|
||||||
|
name="<noncharacter-10010FFFF>";
|
||||||
|
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
|
}
|
||||||
|
|
||||||
|
errorCode=U_ZERO_ERROR;
|
||||||
|
name="<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
|
||||||
|
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
|
}
|
||||||
|
|
||||||
|
errorCode=U_ZERO_ERROR;
|
||||||
|
name="<noncharacter-fFFf>>";
|
||||||
|
c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
|
||||||
|
if(U_SUCCESS(errorCode)) {
|
||||||
|
log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
|
||||||
|
name, c, u_errorName(errorCode));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1350,6 +1350,12 @@ public final class UCharacterName
|
|||||||
int startIndex = name.lastIndexOf('-');
|
int startIndex = name.lastIndexOf('-');
|
||||||
if (startIndex >= 0) { // We've got a category.
|
if (startIndex >= 0) { // We've got a category.
|
||||||
startIndex ++;
|
startIndex ++;
|
||||||
|
|
||||||
|
// There should be 1 to 8 hex digits.
|
||||||
|
int hexLength = endIndex - startIndex;
|
||||||
|
if (hexLength < 1 || 8 < hexLength) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
int result = -1;
|
int result = -1;
|
||||||
try {
|
try {
|
||||||
result = Integer.parseInt(
|
result = Integer.parseInt(
|
||||||
@ -1359,13 +1365,17 @@ public final class UCharacterName
|
|||||||
catch (NumberFormatException e) {
|
catch (NumberFormatException e) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
if (result < 0 || 0x10ffff < result) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
// Now validate the category name. We could use a
|
// Now validate the category name. We could use a
|
||||||
// binary search, or a trie, if we really wanted to.
|
// binary search, or a trie, if we really wanted to.
|
||||||
|
int charType = getType(result);
|
||||||
String type = name.substring(1, startIndex - 1);
|
String type = name.substring(1, startIndex - 1);
|
||||||
int length = TYPE_NAMES_.length;
|
int length = TYPE_NAMES_.length;
|
||||||
for (int i = 0; i < length; ++ i) {
|
for (int i = 0; i < length; ++ i) {
|
||||||
if (type.compareTo(TYPE_NAMES_[i]) == 0) {
|
if (type.compareTo(TYPE_NAMES_[i]) == 0) {
|
||||||
if (getType(result) == i) {
|
if (charType == i) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -1232,28 +1232,54 @@ public final class UCharacterTest extends TestFmwk
|
|||||||
@Test
|
@Test
|
||||||
public void TestUCharFromNameUnderflow() {
|
public void TestUCharFromNameUnderflow() {
|
||||||
// Ticket #10889: Underflow crash when there is no dash.
|
// Ticket #10889: Underflow crash when there is no dash.
|
||||||
int c = UCharacter.getCharFromExtendedName("<NO BREAK SPACE>");
|
String name = "<NO BREAK SPACE>";
|
||||||
|
int c = UCharacter.getCharFromExtendedName(name);
|
||||||
if(c >= 0) {
|
if(c >= 0) {
|
||||||
errln("UCharacter.getCharFromExtendedName(<NO BREAK SPACE>) = U+" + hex(c) +
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
" but should fail (-1)");
|
" but should fail (-1)");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test related edge cases.
|
// Test related edge cases.
|
||||||
c = UCharacter.getCharFromExtendedName("<-00a0>");
|
name = "<-00a0>";
|
||||||
|
c = UCharacter.getCharFromExtendedName(name);
|
||||||
if(c >= 0) {
|
if(c >= 0) {
|
||||||
errln("UCharacter.getCharFromExtendedName(<-00a0>) = U+" + hex(c) +
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
" but should fail (-1)");
|
" but should fail (-1)");
|
||||||
}
|
}
|
||||||
|
|
||||||
c = UCharacter.getCharFromExtendedName("<control->");
|
name = "<control->";
|
||||||
|
c = UCharacter.getCharFromExtendedName(name);
|
||||||
if(c >= 0) {
|
if(c >= 0) {
|
||||||
errln("UCharacter.getCharFromExtendedName(<control->) = U+" + hex(c) +
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
" but should fail (-1)");
|
" but should fail (-1)");
|
||||||
}
|
}
|
||||||
|
|
||||||
c = UCharacter.getCharFromExtendedName("<control-111111>");
|
name = "<control-111111>";
|
||||||
|
c = UCharacter.getCharFromExtendedName(name);
|
||||||
if(c >= 0) {
|
if(c >= 0) {
|
||||||
errln("UCharacter.getCharFromExtendedName(<control-111111>) = U+" + hex(c) +
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
|
" but should fail (-1)");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ICU-20292: integer overflow
|
||||||
|
name = "<noncharacter-10010FFFF>";
|
||||||
|
c = UCharacter.getCharFromExtendedName(name);
|
||||||
|
if(c >= 0) {
|
||||||
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
|
" but should fail (-1)");
|
||||||
|
}
|
||||||
|
|
||||||
|
name = "<noncharacter-00010FFFF>"; // too many digits even if only leading 0s
|
||||||
|
c = UCharacter.getCharFromExtendedName(name);
|
||||||
|
if(c >= 0) {
|
||||||
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
|
" but should fail (-1)");
|
||||||
|
}
|
||||||
|
|
||||||
|
name = "<noncharacter-fFFf>>";
|
||||||
|
c = UCharacter.getCharFromExtendedName(name);
|
||||||
|
if(c >= 0) {
|
||||||
|
errln("UCharacter.getCharFromExtendedName(" + name + ") = U+" + hex(c) +
|
||||||
" but should fail (-1)");
|
" but should fail (-1)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user