ICU-21212 full range check for Punycode digits
This commit is contained in:
parent
e3f2c0dd70
commit
e19d12997b
@ -107,36 +107,26 @@ digitToBasic(int32_t digit, UBool uppercase) {
|
||||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
* @return the numeric value of a basic code point (for use in representing integers)
|
||||
* in the range 0 to BASE-1, or a negative value if cp is invalid.
|
||||
*/
|
||||
static const int8_t
|
||||
basicToDigit[256]={
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
static int32_t decodeDigit(int32_t cp) {
|
||||
if(cp<=u'Z') {
|
||||
if(cp<=u'9') {
|
||||
if(cp<u'0') {
|
||||
return -1;
|
||||
} else {
|
||||
return cp-u'0'+26; // 0..9 -> 26..35
|
||||
}
|
||||
} else {
|
||||
return cp-u'A'; // A-Z -> 0..25
|
||||
}
|
||||
} else if(cp<=u'z') {
|
||||
return cp-'a'; // a..z -> 0..25
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline char
|
||||
asciiCaseMap(char b, UBool uppercase) {
|
||||
@ -455,7 +445,7 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
|
||||
return 0;
|
||||
}
|
||||
|
||||
digit=basicToDigit[(uint8_t)src[in++]];
|
||||
digit=decodeDigit(src[in++]);
|
||||
if(digit<0) {
|
||||
*pErrorCode=U_INVALID_CHAR_FOUND;
|
||||
return 0;
|
||||
|
@ -39,6 +39,7 @@ public:
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
|
||||
void TestAPI();
|
||||
void TestNotSTD3();
|
||||
void TestInvalidPunycodeDigits();
|
||||
void TestSomeCases();
|
||||
void IdnaTest();
|
||||
|
||||
@ -82,6 +83,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(TestAPI);
|
||||
TESTCASE_AUTO(TestNotSTD3);
|
||||
TESTCASE_AUTO(TestInvalidPunycodeDigits);
|
||||
TESTCASE_AUTO(TestSomeCases);
|
||||
TESTCASE_AUTO(IdnaTest);
|
||||
TESTCASE_AUTO_END;
|
||||
@ -245,6 +247,71 @@ void UTS46Test::TestNotSTD3() {
|
||||
}
|
||||
}
|
||||
|
||||
void UTS46Test::TestInvalidPunycodeDigits() {
|
||||
IcuTestErrorCode errorCode(*this, "TestInvalidPunycodeDigits()");
|
||||
LocalPointer<IDNA> idna(IDNA::createUTS46Instance(0, errorCode));
|
||||
if(errorCode.isFailure()) {
|
||||
return;
|
||||
}
|
||||
UnicodeString result;
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--pleP", result, info, errorCode); // P=U+0050
|
||||
assertFalse("nameToUnicode() should succeed",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
assertEquals("normal result", u"ᔼᔴ", result);
|
||||
}
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--pleѐ", result, info, errorCode); // ends with non-ASCII U+0450
|
||||
assertTrue("nameToUnicode() should detect non-ASCII",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
|
||||
// Test with ASCII characters adjacent to LDH.
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--ple/", result, info, errorCode);
|
||||
assertTrue("nameToUnicode() should detect '/'",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--ple:", result, info, errorCode);
|
||||
assertTrue("nameToUnicode() should detect ':'",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--ple@", result, info, errorCode);
|
||||
assertTrue("nameToUnicode() should detect '@'",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--ple[", result, info, errorCode);
|
||||
assertTrue("nameToUnicode() should detect '['",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--ple`", result, info, errorCode);
|
||||
assertTrue("nameToUnicode() should detect '`'",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
|
||||
{
|
||||
IDNAInfo info;
|
||||
idna->nameToUnicode(u"xn--ple{", result, info, errorCode);
|
||||
assertTrue("nameToUnicode() should detect '{'",
|
||||
(info.getErrors()&UIDNA_ERROR_PUNYCODE)!=0);
|
||||
}
|
||||
}
|
||||
|
||||
struct TestCase {
|
||||
// Input string and options string (Nontransitional/Transitional/Both).
|
||||
const char *s, *o;
|
||||
|
@ -13,7 +13,7 @@ import com.ibm.icu.text.StringPrepParseException;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* Ported code from ICU punycode.c
|
||||
* Ported code from ICU punycode.c
|
||||
* @author ram
|
||||
*/
|
||||
public final class Punycode {
|
||||
@ -26,17 +26,17 @@ public final class Punycode {
|
||||
private static final int DAMP = 700;
|
||||
private static final int INITIAL_BIAS = 72;
|
||||
private static final int INITIAL_N = 0x80;
|
||||
|
||||
|
||||
/* "Basic" Unicode/ASCII code points */
|
||||
private static final char HYPHEN = 0x2d;
|
||||
private static final char DELIMITER = HYPHEN;
|
||||
|
||||
|
||||
private static final int ZERO = 0x30;
|
||||
//private static final int NINE = 0x39;
|
||||
|
||||
|
||||
private static final int SMALL_A = 0x61;
|
||||
private static final int SMALL_Z = 0x7a;
|
||||
|
||||
|
||||
private static final int CAPITAL_A = 0x41;
|
||||
private static final int CAPITAL_Z = 0x5a;
|
||||
|
||||
@ -53,39 +53,30 @@ public final class Punycode {
|
||||
delta/=(BASE-TMIN);
|
||||
}
|
||||
|
||||
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
|
||||
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
|
||||
}
|
||||
|
||||
/**
|
||||
* basicToDigit[] contains the numeric value of a basic code
|
||||
* point (for use in representing integers) in the range 0 to
|
||||
* BASE-1, or -1 if b is does not represent a value.
|
||||
* @return the numeric value of a basic code point (for use in representing integers)
|
||||
* in the range 0 to BASE-1, or a negative value if cp is invalid.
|
||||
*/
|
||||
static final int[] basicToDigit= new int[]{
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
private static final int decodeDigit(int cp) {
|
||||
if(cp<='Z') {
|
||||
if(cp<='9') {
|
||||
if(cp<'0') {
|
||||
return -1;
|
||||
} else {
|
||||
return cp-'0'+26; // 0..9 -> 26..35
|
||||
}
|
||||
} else {
|
||||
return cp-'A'; // A-Z -> 0..25
|
||||
}
|
||||
} else if(cp<='z') {
|
||||
return cp-'a'; // a..z -> 0..25
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
///CLOVER:OFF
|
||||
private static char asciiCaseMap(char b, boolean uppercase) {
|
||||
@ -99,7 +90,7 @@ public final class Punycode {
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
}
|
||||
///CLOVER:ON
|
||||
/**
|
||||
* digitToBasic() returns the basic code point whose value
|
||||
@ -124,7 +115,7 @@ public final class Punycode {
|
||||
* Converts Unicode to Punycode.
|
||||
* The input string must not contain single, unpaired surrogates.
|
||||
* The output will be represented as an array of ASCII code points.
|
||||
*
|
||||
*
|
||||
* @param src The source of the String Buffer passed.
|
||||
* @param caseFlags The boolean array of case flags.
|
||||
* @return An array of ASCII code points.
|
||||
@ -140,7 +131,7 @@ public final class Punycode {
|
||||
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
|
||||
*/
|
||||
srcCPCount=0;
|
||||
|
||||
|
||||
for(j=0; j<srcLength; ++j) {
|
||||
c=src.charAt(j);
|
||||
if(isBasic(c)) {
|
||||
@ -152,7 +143,7 @@ public final class Punycode {
|
||||
n|=c;
|
||||
} else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
|
||||
++j;
|
||||
|
||||
|
||||
n|=UCharacter.getCodePoint(c, c2);
|
||||
} else {
|
||||
/* error: unmatched surrogate */
|
||||
@ -211,7 +202,7 @@ public final class Punycode {
|
||||
/* Represent delta as a generalized variable-length integer: */
|
||||
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
|
||||
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
@ -220,7 +211,7 @@ public final class Punycode {
|
||||
t=TMAX;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
t=k-bias;
|
||||
if(t<TMIN) {
|
||||
t=TMIN;
|
||||
@ -249,7 +240,7 @@ public final class Punycode {
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
private static boolean isBasic(int ch){
|
||||
return (ch < INITIAL_N);
|
||||
}
|
||||
@ -264,12 +255,12 @@ public final class Punycode {
|
||||
/**
|
||||
* Converts Punycode to Unicode.
|
||||
* The Unicode string will be at most as long as the Punycode string.
|
||||
*
|
||||
*
|
||||
* @param src The source of the string buffer being passed.
|
||||
* @param caseFlags The array of boolean case flags.
|
||||
* @return StringBuilder string.
|
||||
*/
|
||||
public static StringBuilder decode(CharSequence src, boolean[] caseFlags)
|
||||
public static StringBuilder decode(CharSequence src, boolean[] caseFlags)
|
||||
throws StringPrepParseException{
|
||||
int srcLength = src.length();
|
||||
StringBuilder dest = new StringBuilder(src.length());
|
||||
@ -330,7 +321,7 @@ public final class Punycode {
|
||||
throw new StringPrepParseException("Illegal char found", StringPrepParseException.ILLEGAL_CHAR_FOUND);
|
||||
}
|
||||
|
||||
digit=basicToDigit[src.charAt(in++) & 0xFF];
|
||||
digit=decodeDigit(src.charAt(in++));
|
||||
if(digit<0) {
|
||||
throw new StringPrepParseException("Invalid char found", StringPrepParseException.INVALID_CHAR_FOUND);
|
||||
}
|
||||
|
@ -105,9 +105,56 @@ public class UTS46Test extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestInvalidPunycodeDigits() {
|
||||
IDNA idna=IDNA.getUTS46Instance(0);
|
||||
StringBuilder result=new StringBuilder();
|
||||
IDNA.Info info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--pleP", result, info); // P=U+0050
|
||||
assertFalse("nameToUnicode() should succeed",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
assertEquals("normal result", "ᔼᔴ", result.toString());
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--pleѐ", result, info); // ends with non-ASCII U+0450
|
||||
assertTrue("nameToUnicode() should detect non-ASCII",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
|
||||
// Test with ASCII characters adjacent to LDH.
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--PLE/", result, info);
|
||||
assertTrue("nameToUnicode() should detect '/'",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--ple:", result, info);
|
||||
assertTrue("nameToUnicode() should detect ':'",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--ple@", result, info);
|
||||
assertTrue("nameToUnicode() should detect '@'",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--ple[", result, info);
|
||||
assertTrue("nameToUnicode() should detect '['",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--ple`", result, info);
|
||||
assertTrue("nameToUnicode() should detect '`'",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
|
||||
info=new IDNA.Info();
|
||||
idna.nameToUnicode("xn--ple{", result, info);
|
||||
assertTrue("nameToUnicode() should detect '{'",
|
||||
info.getErrors().contains(IDNA.Error.PUNYCODE));
|
||||
}
|
||||
|
||||
private static final Map<String, IDNA.Error> errorNamesToErrors;
|
||||
static {
|
||||
errorNamesToErrors=new TreeMap<String, IDNA.Error>();
|
||||
errorNamesToErrors=new TreeMap<>();
|
||||
errorNamesToErrors.put("UIDNA_ERROR_EMPTY_LABEL", IDNA.Error.EMPTY_LABEL);
|
||||
errorNamesToErrors.put("UIDNA_ERROR_LABEL_TOO_LONG", IDNA.Error.LABEL_TOO_LONG);
|
||||
errorNamesToErrors.put("UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", IDNA.Error.DOMAIN_NAME_TOO_LONG);
|
||||
|
Loading…
Reference in New Issue
Block a user