ICU-8130 UTS 46 conformance test using Unicode IdnaTest.txt
X-SVN-Rev: 40130
This commit is contained in:
parent
1b2cc7d1fb
commit
b2ead3e2e1
@ -1015,8 +1015,8 @@ UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) c
|
|||||||
) {
|
) {
|
||||||
info.isOkBiDi=FALSE;
|
info.isOkBiDi=FALSE;
|
||||||
}
|
}
|
||||||
// Get the directionalities of the intervening characters.
|
// Add the directionalities of the intervening characters.
|
||||||
uint32_t mask=0;
|
uint32_t mask=firstMask|lastMask;
|
||||||
while(i<labelLength) {
|
while(i<labelLength) {
|
||||||
U16_NEXT_UNSAFE(label, i, c);
|
U16_NEXT_UNSAFE(label, i, c);
|
||||||
mask|=U_MASK(u_charDirection(c));
|
mask|=U_MASK(u_charDirection(c));
|
||||||
@ -1045,7 +1045,7 @@ UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) c
|
|||||||
// label. [...]
|
// label. [...]
|
||||||
// The following rule, consisting of six conditions, applies to labels
|
// The following rule, consisting of six conditions, applies to labels
|
||||||
// in BIDI domain names.
|
// in BIDI domain names.
|
||||||
if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
|
if((mask&R_AL_AN_MASK)!=0) {
|
||||||
info.isBiDi=TRUE;
|
info.isBiDi=TRUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -373,7 +373,7 @@ or
|
|||||||
cd $ICU_SRC/icu4c/source/data/unidata
|
cd $ICU_SRC/icu4c/source/data/unidata
|
||||||
cp confusables.txt confusablesWholeScript.txt NormalizationCorrections.txt NormalizationTest.txt SpecialCasing.txt UnicodeData.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
cp confusables.txt confusablesWholeScript.txt NormalizationCorrections.txt NormalizationTest.txt SpecialCasing.txt UnicodeData.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||||
cd ../../test/testdata
|
cd ../../test/testdata
|
||||||
cp BidiCharacterTest.txt BidiTest.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
cp BidiCharacterTest.txt BidiTest.txt IdnaTest.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||||
cp $UNICODE_DATA/ucd/CompositionExclusions.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
cp $UNICODE_DATA/ucd/CompositionExclusions.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||||
|
|
||||||
* run & fix ICU4J tests
|
* run & fix ICU4J tests
|
||||||
|
@ -26,8 +26,10 @@
|
|||||||
#include "unicode/stringpiece.h"
|
#include "unicode/stringpiece.h"
|
||||||
#include "unicode/uidna.h"
|
#include "unicode/uidna.h"
|
||||||
#include "unicode/unistr.h"
|
#include "unicode/unistr.h"
|
||||||
#include "intltest.h"
|
#include "charstr.h"
|
||||||
#include "cmemory.h"
|
#include "cmemory.h"
|
||||||
|
#include "intltest.h"
|
||||||
|
#include "uparse.h"
|
||||||
|
|
||||||
class UTS46Test : public IntlTest {
|
class UTS46Test : public IntlTest {
|
||||||
public:
|
public:
|
||||||
@ -38,6 +40,13 @@ public:
|
|||||||
void TestAPI();
|
void TestAPI();
|
||||||
void TestNotSTD3();
|
void TestNotSTD3();
|
||||||
void TestSomeCases();
|
void TestSomeCases();
|
||||||
|
void IdnaTest();
|
||||||
|
|
||||||
|
void checkIdnaTestResult(const char *line, const char *type,
|
||||||
|
const UnicodeString &expected, const UnicodeString &result,
|
||||||
|
const IDNAInfo &info);
|
||||||
|
void idnaTestOneLine(char *fields[][2], UErrorCode &errorCode);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IDNA *trans, *nontrans;
|
IDNA *trans, *nontrans;
|
||||||
};
|
};
|
||||||
@ -74,6 +83,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
|||||||
TESTCASE_AUTO(TestAPI);
|
TESTCASE_AUTO(TestAPI);
|
||||||
TESTCASE_AUTO(TestNotSTD3);
|
TESTCASE_AUTO(TestNotSTD3);
|
||||||
TESTCASE_AUTO(TestSomeCases);
|
TESTCASE_AUTO(TestSomeCases);
|
||||||
|
TESTCASE_AUTO(IdnaTest);
|
||||||
TESTCASE_AUTO_END;
|
TESTCASE_AUTO_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -517,8 +527,11 @@ static const TestCase testCases[]={
|
|||||||
{ "\\u05D07\\u05EA", "B", "\\u05D07\\u05EA", 0 },
|
{ "\\u05D07\\u05EA", "B", "\\u05D07\\u05EA", 0 },
|
||||||
{ "\\u05D0\\u0667\\u05EA", "B", "\\u05D0\\u0667\\u05EA", 0 }, // Arabic 7 in the middle
|
{ "\\u05D0\\u0667\\u05EA", "B", "\\u05D0\\u0667\\u05EA", 0 }, // Arabic 7 in the middle
|
||||||
{ "a7\\u0667z", "B", "a7\\u0667z", UIDNA_ERROR_BIDI }, // AN digit in LTR
|
{ "a7\\u0667z", "B", "a7\\u0667z", UIDNA_ERROR_BIDI }, // AN digit in LTR
|
||||||
|
{ "a7\\u0667", "B", "a7\\u0667", UIDNA_ERROR_BIDI }, // AN digit in LTR
|
||||||
{ "\\u05D07\\u0667\\u05EA", "B", // mixed EN/AN digits in RTL
|
{ "\\u05D07\\u0667\\u05EA", "B", // mixed EN/AN digits in RTL
|
||||||
"\\u05D07\\u0667\\u05EA", UIDNA_ERROR_BIDI },
|
"\\u05D07\\u0667\\u05EA", UIDNA_ERROR_BIDI },
|
||||||
|
{ "\\u05D07\\u0667", "B", // mixed EN/AN digits in RTL
|
||||||
|
"\\u05D07\\u0667", UIDNA_ERROR_BIDI },
|
||||||
// ZWJ
|
// ZWJ
|
||||||
{ "\\u0BB9\\u0BCD\\u200D", "N", "\\u0BB9\\u0BCD\\u200D", 0 }, // Virama+ZWJ
|
{ "\\u0BB9\\u0BCD\\u200D", "N", "\\u0BB9\\u0BCD\\u200D", 0 }, // Virama+ZWJ
|
||||||
{ "\\u0BB9\\u200D", "N", "\\u0BB9\\u200D", UIDNA_ERROR_CONTEXTJ }, // no Virama
|
{ "\\u0BB9\\u200D", "N", "\\u0BB9\\u200D", UIDNA_ERROR_CONTEXTJ }, // no Virama
|
||||||
@ -881,4 +894,117 @@ void UTS46Test::TestSomeCases() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
const int32_t kNumFields = 4; // Will need 5 when we read NV8 from the optional fifth column.
|
||||||
|
|
||||||
|
void U_CALLCONV
|
||||||
|
idnaTestLineFn(void *context,
|
||||||
|
char *fields[][2], int32_t /* fieldCount */,
|
||||||
|
UErrorCode *pErrorCode) {
|
||||||
|
reinterpret_cast<UTS46Test *>(context)->idnaTestOneLine(fields, *pErrorCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
|
||||||
|
const UnicodeString &expected, const UnicodeString &result,
|
||||||
|
const IDNAInfo &info) {
|
||||||
|
// An error in toUnicode or toASCII is indicated by a value in square brackets,
|
||||||
|
// such as "[B5 B6]".
|
||||||
|
UBool expectedHasErrors = !expected.isEmpty() && expected[0] == u'[';
|
||||||
|
if (expectedHasErrors != info.hasErrors()) {
|
||||||
|
errln("%s expected errors %d != %d = actual has errors: %04lx\n %s",
|
||||||
|
type, expectedHasErrors, info.hasErrors(), (long)info.getErrors(), line);
|
||||||
|
}
|
||||||
|
if (!expectedHasErrors && expected != result) {
|
||||||
|
errln("%s expected != actual\n %s", type, line);
|
||||||
|
errln(UnicodeString(u" ") + expected);
|
||||||
|
errln(UnicodeString(u" ") + result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void UTS46Test::idnaTestOneLine(char *fields[][2], UErrorCode &errorCode) {
|
||||||
|
// Column 1: type - T for transitional, N for nontransitional, B for both
|
||||||
|
const char *typePtr = u_skipWhitespace(fields[0][0]);
|
||||||
|
const char *limit;
|
||||||
|
char typeChar;
|
||||||
|
if (typePtr == fields[0][1] ||
|
||||||
|
((typeChar = *typePtr) != 'B' && typeChar != 'N' && typeChar != 'T') ||
|
||||||
|
(limit = u_skipWhitespace(typePtr + 1)) != fields[0][1]) {
|
||||||
|
errln("empty or unknown type field: %s", fields[0][0]);
|
||||||
|
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 2: source - the source string to be tested
|
||||||
|
int32_t length = (int32_t)(fields[1][1] - fields[1][0]);
|
||||||
|
UnicodeString source16 = UnicodeString::fromUTF8(StringPiece(fields[1][0], length)).
|
||||||
|
trim().unescape();
|
||||||
|
|
||||||
|
// Column 3: toUnicode - the result of applying toUnicode to the source.
|
||||||
|
// A blank value means the same as the source value.
|
||||||
|
length = (int32_t)(fields[2][1] - fields[2][0]);
|
||||||
|
UnicodeString unicode16 = UnicodeString::fromUTF8(StringPiece(fields[2][0], length)).
|
||||||
|
trim().unescape();
|
||||||
|
if (unicode16.isEmpty()) {
|
||||||
|
unicode16 = source16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 4: toASCII - the result of applying toASCII to the source, using the specified type.
|
||||||
|
// A blank value means the same as the toUnicode value.
|
||||||
|
length = (int32_t)(fields[3][1] - fields[3][0]);
|
||||||
|
UnicodeString ascii16 = UnicodeString::fromUTF8(StringPiece(fields[3][0], length)).
|
||||||
|
trim().unescape();
|
||||||
|
if (ascii16.isEmpty()) {
|
||||||
|
ascii16 = unicode16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 5: NV8 - present if the toUnicode value would not be a valid domain name under IDNA2008. Not a normative field.
|
||||||
|
// Ignored as long as we do not implement and test vanilla IDNA2008.
|
||||||
|
|
||||||
|
// ToASCII/ToUnicode, transitional/nontransitional
|
||||||
|
UnicodeString uN, aN, aT;
|
||||||
|
IDNAInfo uNInfo, aNInfo, aTInfo;
|
||||||
|
nontrans->nameToUnicode(source16, uN, uNInfo, errorCode);
|
||||||
|
checkIdnaTestResult(fields[0][0], "toUnicodeNontrans", unicode16, uN, uNInfo);
|
||||||
|
if (typeChar == 'T' || typeChar == 'B') {
|
||||||
|
trans->nameToASCII(source16, aT, aTInfo, errorCode);
|
||||||
|
checkIdnaTestResult(fields[0][0], "toASCIITrans", ascii16, aT, aTInfo);
|
||||||
|
}
|
||||||
|
if (typeChar == 'N' || typeChar == 'B') {
|
||||||
|
nontrans->nameToASCII(source16, aN, aNInfo, errorCode);
|
||||||
|
checkIdnaTestResult(fields[0][0], "toASCIINontrans", ascii16, aN, aNInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: de-duplicate
|
||||||
|
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
|
||||||
|
|
||||||
|
// http://www.unicode.org/Public/idna/latest/IdnaTest.txt
|
||||||
|
void UTS46Test::IdnaTest() {
|
||||||
|
IcuTestErrorCode errorCode(*this, "IdnaTest");
|
||||||
|
const char *sourceTestDataPath = getSourceTestData(errorCode);
|
||||||
|
if (errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
|
||||||
|
"folder (getSourceTestData())")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
CharString path(sourceTestDataPath, errorCode);
|
||||||
|
path.appendPathPart("IdnaTest.txt", errorCode);
|
||||||
|
LocalStdioFilePointer idnaTestFile(fopen(path.data(), "r"));
|
||||||
|
if (idnaTestFile.isNull()) {
|
||||||
|
errln("unable to open %s", path.data());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Columns (c1, c2,...) are separated by semicolons.
|
||||||
|
// Leading and trailing spaces and tabs in each column are ignored.
|
||||||
|
// Comments are indicated with hash marks.
|
||||||
|
char *fields[kNumFields][2];
|
||||||
|
u_parseDelimitedFile(path.data(), ';', fields, kNumFields, idnaTestLineFn, this, errorCode);
|
||||||
|
if (errorCode.logIfFailureAndReset("error parsing IdnaTest.txt")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // UCONFIG_NO_IDNA
|
#endif // UCONFIG_NO_IDNA
|
||||||
|
7844
icu4c/source/test/testdata/IdnaTest.txt
vendored
Normal file
7844
icu4c/source/test/testdata/IdnaTest.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -77,7 +77,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
|
|||||||
UParseLineFn *lineFn, void *context,
|
UParseLineFn *lineFn, void *context,
|
||||||
UErrorCode *pErrorCode) {
|
UErrorCode *pErrorCode) {
|
||||||
FileStream *file;
|
FileStream *file;
|
||||||
char line[300];
|
char line[10000];
|
||||||
char *start, *limit;
|
char *start, *limit;
|
||||||
int32_t i, length;
|
int32_t i, length;
|
||||||
|
|
||||||
@ -163,7 +163,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* error in a field function? */
|
/* too few fields? */
|
||||||
if(U_FAILURE(*pErrorCode)) {
|
if(U_FAILURE(*pErrorCode)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -586,8 +586,8 @@ public final class UTS46 extends IDNA {
|
|||||||
) {
|
) {
|
||||||
setNotOkBiDi(info);
|
setNotOkBiDi(info);
|
||||||
}
|
}
|
||||||
// Get the directionalities of the intervening characters.
|
// Add the directionalities of the intervening characters.
|
||||||
int mask=0;
|
int mask=firstMask|lastMask;
|
||||||
while(i<labelLimit) {
|
while(i<labelLimit) {
|
||||||
c=Character.codePointAt(label, i);
|
c=Character.codePointAt(label, i);
|
||||||
i+=Character.charCount(c);
|
i+=Character.charCount(c);
|
||||||
@ -617,7 +617,7 @@ public final class UTS46 extends IDNA {
|
|||||||
// label. [...]
|
// label. [...]
|
||||||
// The following rule, consisting of six conditions, applies to labels
|
// The following rule, consisting of six conditions, applies to labels
|
||||||
// in BIDI domain names.
|
// in BIDI domain names.
|
||||||
if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
|
if((mask&R_AL_AN_MASK)!=0) {
|
||||||
setBiDi(info);
|
setBiDi(info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
7844
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/IdnaTest.txt
Normal file
7844
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/IdnaTest.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,16 +8,21 @@
|
|||||||
*/
|
*/
|
||||||
package com.ibm.icu.dev.test.normalizer;
|
package com.ibm.icu.dev.test.normalizer;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.ibm.icu.dev.test.TestFmwk;
|
import com.ibm.icu.dev.test.TestFmwk;
|
||||||
|
import com.ibm.icu.dev.test.TestUtil;
|
||||||
import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
|
import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
|
||||||
|
import com.ibm.icu.impl.Utility;
|
||||||
import com.ibm.icu.text.IDNA;
|
import com.ibm.icu.text.IDNA;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -413,8 +418,11 @@ public class UTS46Test extends TestFmwk {
|
|||||||
{ "\u05D07\u05EA", "B", "\u05D07\u05EA", "" },
|
{ "\u05D07\u05EA", "B", "\u05D07\u05EA", "" },
|
||||||
{ "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" }, // Arabic 7 in the middle
|
{ "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" }, // Arabic 7 in the middle
|
||||||
{ "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" }, // AN digit in LTR
|
{ "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" }, // AN digit in LTR
|
||||||
|
{ "a7\u0667", "B", "a7\u0667", "UIDNA_ERROR_BIDI" }, // AN digit in LTR
|
||||||
{ "\u05D07\u0667\u05EA", "B", // mixed EN/AN digits in RTL
|
{ "\u05D07\u0667\u05EA", "B", // mixed EN/AN digits in RTL
|
||||||
"\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" },
|
"\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" },
|
||||||
|
{ "\u05D07\u0667", "B", // mixed EN/AN digits in RTL
|
||||||
|
"\u05D07\u0667", "UIDNA_ERROR_BIDI" },
|
||||||
// ZWJ
|
// ZWJ
|
||||||
{ "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" }, // Virama+ZWJ
|
{ "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" }, // Virama+ZWJ
|
||||||
{ "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
|
{ "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
|
||||||
@ -716,6 +724,88 @@ public class UTS46Test extends TestFmwk {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkIdnaTestResult(String line, String type,
|
||||||
|
String expected, CharSequence result, IDNA.Info info) {
|
||||||
|
// An error in toUnicode or toASCII is indicated by a value in square brackets,
|
||||||
|
// such as "[B5 B6]".
|
||||||
|
boolean expectedHasErrors = !expected.isEmpty() && expected.charAt(0) == '[';
|
||||||
|
if (expectedHasErrors != info.hasErrors()) {
|
||||||
|
errln(String.format(
|
||||||
|
"%s expected errors %b != %b = actual has errors: %s\n %s",
|
||||||
|
type, expectedHasErrors, info.hasErrors(), info.getErrors(), line));
|
||||||
|
}
|
||||||
|
if (!expectedHasErrors && !UTF16Plus.equal(expected, result)) {
|
||||||
|
errln(String.format("%s expected != actual\n %s", type, line));
|
||||||
|
errln(" " + expected);
|
||||||
|
errln(" " + result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void IdnaTest() throws IOException {
|
||||||
|
BufferedReader idnaTestFile = TestUtil.getDataReader("unicode/IdnaTest.txt");
|
||||||
|
Pattern semi = Pattern.compile(";");
|
||||||
|
try {
|
||||||
|
String line;
|
||||||
|
while ((line = idnaTestFile.readLine()) != null) {
|
||||||
|
// Remove trailing comments and whitespace.
|
||||||
|
int commentStart = line.indexOf('#');
|
||||||
|
if (commentStart >= 0) {
|
||||||
|
line = line.substring(0, commentStart);
|
||||||
|
}
|
||||||
|
String[] fields = semi.split(line, -1);
|
||||||
|
if (fields.length <= 1) {
|
||||||
|
continue; // Skip empty and comment-only lines.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 1: type - T for transitional, N for nontransitional, B for both
|
||||||
|
String type = fields[0].trim();
|
||||||
|
char typeChar;
|
||||||
|
if (type.length() != 1 ||
|
||||||
|
((typeChar = type.charAt(0)) != 'B' && typeChar != 'N' && typeChar != 'T')) {
|
||||||
|
errln("empty or unknown type field: " + line);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 2: source - the source string to be tested
|
||||||
|
String source16 = Utility.unescape(fields[1].trim());
|
||||||
|
|
||||||
|
// Column 3: toUnicode - the result of applying toUnicode to the source.
|
||||||
|
// A blank value means the same as the source value.
|
||||||
|
String unicode16 = Utility.unescape(fields[2].trim());
|
||||||
|
if (unicode16.isEmpty()) {
|
||||||
|
unicode16 = source16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 4: toASCII - the result of applying toASCII to the source, using the specified type.
|
||||||
|
// A blank value means the same as the toUnicode value.
|
||||||
|
String ascii16 = Utility.unescape(fields[3].trim());
|
||||||
|
if (ascii16.isEmpty()) {
|
||||||
|
ascii16 = unicode16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column 5: NV8 - present if the toUnicode value would not be a valid domain name under IDNA2008. Not a normative field.
|
||||||
|
// Ignored as long as we do not implement and test vanilla IDNA2008.
|
||||||
|
|
||||||
|
// ToASCII/ToUnicode, transitional/nontransitional
|
||||||
|
StringBuilder uN, aN, aT;
|
||||||
|
IDNA.Info uNInfo, aNInfo, aTInfo;
|
||||||
|
nontrans.nameToUnicode(source16, uN = new StringBuilder(), uNInfo = new IDNA.Info());
|
||||||
|
checkIdnaTestResult(line, "toUnicodeNontrans", unicode16, uN, uNInfo);
|
||||||
|
if (typeChar == 'T' || typeChar == 'B') {
|
||||||
|
trans.nameToASCII(source16, aT = new StringBuilder(), aTInfo = new IDNA.Info());
|
||||||
|
checkIdnaTestResult(line, "toASCIITrans", ascii16, aT, aTInfo);
|
||||||
|
}
|
||||||
|
if (typeChar == 'N' || typeChar == 'B') {
|
||||||
|
nontrans.nameToASCII(source16, aN = new StringBuilder(), aNInfo = new IDNA.Info());
|
||||||
|
checkIdnaTestResult(line, "toASCIINontrans", ascii16, aN, aNInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
idnaTestFile.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private final IDNA trans, nontrans;
|
private final IDNA trans, nontrans;
|
||||||
|
|
||||||
private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of(
|
private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of(
|
||||||
|
@ -1493,65 +1493,63 @@ _code_point_re = re.compile("\s*([0-9a-fA-F]+)\s*;")
|
|||||||
|
|
||||||
def CopyAndStripWithOptionalMerge(s, t, do_merge):
|
def CopyAndStripWithOptionalMerge(s, t, do_merge):
|
||||||
# TODO: We do not seem to need the do_merge argument and logic any more.
|
# TODO: We do not seem to need the do_merge argument and logic any more.
|
||||||
# TODO: With Python 2.7+, combine the two with statements into one.
|
with open(s, "r") as in_file, open(t, "w") as out_file:
|
||||||
with open(s, "r") as in_file:
|
first = -1 # First code point with first_data.
|
||||||
with open(t, "w") as out_file:
|
last = -1 # Last code point with first_data.
|
||||||
first = -1 # First code point with first_data.
|
first_data = "" # Common data for code points [first..last].
|
||||||
last = -1 # Last code point with first_data.
|
for line in in_file:
|
||||||
first_data = "" # Common data for code points [first..last].
|
match = _strip_re.match(line)
|
||||||
for line in in_file:
|
if match:
|
||||||
match = _strip_re.match(line)
|
line = match.group(1)
|
||||||
|
else:
|
||||||
|
line = line.rstrip()
|
||||||
|
if do_merge:
|
||||||
|
match = _code_point_re.match(line)
|
||||||
if match:
|
if match:
|
||||||
line = match.group(1)
|
c = int(match.group(1), 16)
|
||||||
|
data = line[match.end() - 1:]
|
||||||
else:
|
else:
|
||||||
line = line.rstrip()
|
c = -1
|
||||||
if do_merge:
|
data = ""
|
||||||
match = _code_point_re.match(line)
|
if last >= 0 and (c != (last + 1) or data != first_data):
|
||||||
if match:
|
# output the current range
|
||||||
c = int(match.group(1), 16)
|
if first == last:
|
||||||
data = line[match.end() - 1:]
|
out_file.write("%04X%s\n" % (first, first_data))
|
||||||
else:
|
else:
|
||||||
c = -1
|
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
||||||
data = ""
|
first = -1
|
||||||
if last >= 0 and (c != (last + 1) or data != first_data):
|
last = -1
|
||||||
# output the current range
|
first_data = ""
|
||||||
if first == last:
|
if c < 0:
|
||||||
out_file.write("%04X%s\n" % (first, first_data))
|
# no data on this line, output as is
|
||||||
else:
|
|
||||||
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
|
||||||
first = -1
|
|
||||||
last = -1
|
|
||||||
first_data = ""
|
|
||||||
if c < 0:
|
|
||||||
# no data on this line, output as is
|
|
||||||
out_file.write(line)
|
|
||||||
out_file.write("\n")
|
|
||||||
else:
|
|
||||||
# data on this line, store for possible range compaction
|
|
||||||
if last < 0:
|
|
||||||
# set as the first line in a possible range
|
|
||||||
first = c
|
|
||||||
last = c
|
|
||||||
first_data = data
|
|
||||||
else:
|
|
||||||
# must be c == (last + 1) and data == first_data
|
|
||||||
# because of previous conditions
|
|
||||||
# continue with the current range
|
|
||||||
last = c
|
|
||||||
else:
|
|
||||||
# Only strip, don't merge: just output the stripped line.
|
|
||||||
out_file.write(line)
|
out_file.write(line)
|
||||||
out_file.write("\n")
|
out_file.write("\n")
|
||||||
if do_merge and last >= 0:
|
|
||||||
# output the last range in the file
|
|
||||||
if first == last:
|
|
||||||
out_file.write("%04X%s\n" % (first, first_data))
|
|
||||||
else:
|
else:
|
||||||
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
# data on this line, store for possible range compaction
|
||||||
first = -1
|
if last < 0:
|
||||||
last = -1
|
# set as the first line in a possible range
|
||||||
first_data = ""
|
first = c
|
||||||
out_file.flush()
|
last = c
|
||||||
|
first_data = data
|
||||||
|
else:
|
||||||
|
# must be c == (last + 1) and data == first_data
|
||||||
|
# because of previous conditions
|
||||||
|
# continue with the current range
|
||||||
|
last = c
|
||||||
|
else:
|
||||||
|
# Only strip, don't merge: just output the stripped line.
|
||||||
|
out_file.write(line)
|
||||||
|
out_file.write("\n")
|
||||||
|
if do_merge and last >= 0:
|
||||||
|
# output the last range in the file
|
||||||
|
if first == last:
|
||||||
|
out_file.write("%04X%s\n" % (first, first_data))
|
||||||
|
else:
|
||||||
|
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
||||||
|
first = -1
|
||||||
|
last = -1
|
||||||
|
first_data = ""
|
||||||
|
out_file.flush()
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
@ -1571,11 +1569,9 @@ def CopyAndStripAndMerge(s, t):
|
|||||||
|
|
||||||
|
|
||||||
def PrependBOM(s, t):
|
def PrependBOM(s, t):
|
||||||
# TODO: With Python 2.7+, combine the two with statements into one.
|
with open(s, "r") as in_file, open(t, "w") as out_file:
|
||||||
with open(s, "r") as in_file:
|
out_file.write("\xef\xbb\xbf") # UTF-8 BOM for ICU svn
|
||||||
with open(t, "w") as out_file:
|
shutil.copyfileobj(in_file, out_file)
|
||||||
out_file.write("\xef\xbb\xbf") # UTF-8 BOM for ICU svn
|
|
||||||
shutil.copyfileobj(in_file, out_file)
|
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
@ -1613,6 +1609,7 @@ _files = {
|
|||||||
"emoji-data.txt": (DontCopy, ParseNamedProperties),
|
"emoji-data.txt": (DontCopy, ParseNamedProperties),
|
||||||
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
|
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
|
||||||
"GraphemeBreakTest.txt": (PrependBOM, "testdata"),
|
"GraphemeBreakTest.txt": (PrependBOM, "testdata"),
|
||||||
|
"IdnaTest.txt": (CopyOnly, "testdata"),
|
||||||
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
|
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
|
||||||
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
|
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
|
||||||
"LineBreak.txt": (DontCopy, ParseLineBreak),
|
"LineBreak.txt": (DontCopy, ParseLineBreak),
|
||||||
|
Loading…
Reference in New Issue
Block a user