ICU-8130 UTS 46 conformance test using Unicode IdnaTest.txt
X-SVN-Rev: 40130
This commit is contained in:
parent
1b2cc7d1fb
commit
b2ead3e2e1
@ -1015,8 +1015,8 @@ UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) c
|
||||
) {
|
||||
info.isOkBiDi=FALSE;
|
||||
}
|
||||
// Get the directionalities of the intervening characters.
|
||||
uint32_t mask=0;
|
||||
// Add the directionalities of the intervening characters.
|
||||
uint32_t mask=firstMask|lastMask;
|
||||
while(i<labelLength) {
|
||||
U16_NEXT_UNSAFE(label, i, c);
|
||||
mask|=U_MASK(u_charDirection(c));
|
||||
@ -1045,7 +1045,7 @@ UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) c
|
||||
// label. [...]
|
||||
// The following rule, consisting of six conditions, applies to labels
|
||||
// in BIDI domain names.
|
||||
if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
|
||||
if((mask&R_AL_AN_MASK)!=0) {
|
||||
info.isBiDi=TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -373,7 +373,7 @@ or
|
||||
cd $ICU_SRC/icu4c/source/data/unidata
|
||||
cp confusables.txt confusablesWholeScript.txt NormalizationCorrections.txt NormalizationTest.txt SpecialCasing.txt UnicodeData.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||
cd ../../test/testdata
|
||||
cp BidiCharacterTest.txt BidiTest.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||
cp BidiCharacterTest.txt BidiTest.txt IdnaTest.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||
cp $UNICODE_DATA/ucd/CompositionExclusions.txt $ICU_SRC/icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode
|
||||
|
||||
* run & fix ICU4J tests
|
||||
|
@ -26,8 +26,10 @@
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uidna.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "intltest.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "intltest.h"
|
||||
#include "uparse.h"
|
||||
|
||||
class UTS46Test : public IntlTest {
|
||||
public:
|
||||
@ -38,6 +40,13 @@ public:
|
||||
void TestAPI();
|
||||
void TestNotSTD3();
|
||||
void TestSomeCases();
|
||||
void IdnaTest();
|
||||
|
||||
void checkIdnaTestResult(const char *line, const char *type,
|
||||
const UnicodeString &expected, const UnicodeString &result,
|
||||
const IDNAInfo &info);
|
||||
void idnaTestOneLine(char *fields[][2], UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
IDNA *trans, *nontrans;
|
||||
};
|
||||
@ -74,6 +83,7 @@ void UTS46Test::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
|
||||
TESTCASE_AUTO(TestAPI);
|
||||
TESTCASE_AUTO(TestNotSTD3);
|
||||
TESTCASE_AUTO(TestSomeCases);
|
||||
TESTCASE_AUTO(IdnaTest);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -517,8 +527,11 @@ static const TestCase testCases[]={
|
||||
{ "\\u05D07\\u05EA", "B", "\\u05D07\\u05EA", 0 },
|
||||
{ "\\u05D0\\u0667\\u05EA", "B", "\\u05D0\\u0667\\u05EA", 0 }, // Arabic 7 in the middle
|
||||
{ "a7\\u0667z", "B", "a7\\u0667z", UIDNA_ERROR_BIDI }, // AN digit in LTR
|
||||
{ "a7\\u0667", "B", "a7\\u0667", UIDNA_ERROR_BIDI }, // AN digit in LTR
|
||||
{ "\\u05D07\\u0667\\u05EA", "B", // mixed EN/AN digits in RTL
|
||||
"\\u05D07\\u0667\\u05EA", UIDNA_ERROR_BIDI },
|
||||
{ "\\u05D07\\u0667", "B", // mixed EN/AN digits in RTL
|
||||
"\\u05D07\\u0667", UIDNA_ERROR_BIDI },
|
||||
// ZWJ
|
||||
{ "\\u0BB9\\u0BCD\\u200D", "N", "\\u0BB9\\u0BCD\\u200D", 0 }, // Virama+ZWJ
|
||||
{ "\\u0BB9\\u200D", "N", "\\u0BB9\\u200D", UIDNA_ERROR_CONTEXTJ }, // no Virama
|
||||
@ -881,4 +894,117 @@ void UTS46Test::TestSomeCases() {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const int32_t kNumFields = 4; // Will need 5 when we read NV8 from the optional fifth column.
|
||||
|
||||
void U_CALLCONV
|
||||
idnaTestLineFn(void *context,
|
||||
char *fields[][2], int32_t /* fieldCount */,
|
||||
UErrorCode *pErrorCode) {
|
||||
reinterpret_cast<UTS46Test *>(context)->idnaTestOneLine(fields, *pErrorCode);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
|
||||
const UnicodeString &expected, const UnicodeString &result,
|
||||
const IDNAInfo &info) {
|
||||
// An error in toUnicode or toASCII is indicated by a value in square brackets,
|
||||
// such as "[B5 B6]".
|
||||
UBool expectedHasErrors = !expected.isEmpty() && expected[0] == u'[';
|
||||
if (expectedHasErrors != info.hasErrors()) {
|
||||
errln("%s expected errors %d != %d = actual has errors: %04lx\n %s",
|
||||
type, expectedHasErrors, info.hasErrors(), (long)info.getErrors(), line);
|
||||
}
|
||||
if (!expectedHasErrors && expected != result) {
|
||||
errln("%s expected != actual\n %s", type, line);
|
||||
errln(UnicodeString(u" ") + expected);
|
||||
errln(UnicodeString(u" ") + result);
|
||||
}
|
||||
}
|
||||
|
||||
void UTS46Test::idnaTestOneLine(char *fields[][2], UErrorCode &errorCode) {
|
||||
// Column 1: type - T for transitional, N for nontransitional, B for both
|
||||
const char *typePtr = u_skipWhitespace(fields[0][0]);
|
||||
const char *limit;
|
||||
char typeChar;
|
||||
if (typePtr == fields[0][1] ||
|
||||
((typeChar = *typePtr) != 'B' && typeChar != 'N' && typeChar != 'T') ||
|
||||
(limit = u_skipWhitespace(typePtr + 1)) != fields[0][1]) {
|
||||
errln("empty or unknown type field: %s", fields[0][0]);
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Column 2: source - the source string to be tested
|
||||
int32_t length = (int32_t)(fields[1][1] - fields[1][0]);
|
||||
UnicodeString source16 = UnicodeString::fromUTF8(StringPiece(fields[1][0], length)).
|
||||
trim().unescape();
|
||||
|
||||
// Column 3: toUnicode - the result of applying toUnicode to the source.
|
||||
// A blank value means the same as the source value.
|
||||
length = (int32_t)(fields[2][1] - fields[2][0]);
|
||||
UnicodeString unicode16 = UnicodeString::fromUTF8(StringPiece(fields[2][0], length)).
|
||||
trim().unescape();
|
||||
if (unicode16.isEmpty()) {
|
||||
unicode16 = source16;
|
||||
}
|
||||
|
||||
// Column 4: toASCII - the result of applying toASCII to the source, using the specified type.
|
||||
// A blank value means the same as the toUnicode value.
|
||||
length = (int32_t)(fields[3][1] - fields[3][0]);
|
||||
UnicodeString ascii16 = UnicodeString::fromUTF8(StringPiece(fields[3][0], length)).
|
||||
trim().unescape();
|
||||
if (ascii16.isEmpty()) {
|
||||
ascii16 = unicode16;
|
||||
}
|
||||
|
||||
// Column 5: NV8 - present if the toUnicode value would not be a valid domain name under IDNA2008. Not a normative field.
|
||||
// Ignored as long as we do not implement and test vanilla IDNA2008.
|
||||
|
||||
// ToASCII/ToUnicode, transitional/nontransitional
|
||||
UnicodeString uN, aN, aT;
|
||||
IDNAInfo uNInfo, aNInfo, aTInfo;
|
||||
nontrans->nameToUnicode(source16, uN, uNInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toUnicodeNontrans", unicode16, uN, uNInfo);
|
||||
if (typeChar == 'T' || typeChar == 'B') {
|
||||
trans->nameToASCII(source16, aT, aTInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toASCIITrans", ascii16, aT, aTInfo);
|
||||
}
|
||||
if (typeChar == 'N' || typeChar == 'B') {
|
||||
nontrans->nameToASCII(source16, aN, aNInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toASCIINontrans", ascii16, aN, aNInfo);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: de-duplicate
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
|
||||
|
||||
// http://www.unicode.org/Public/idna/latest/IdnaTest.txt
|
||||
void UTS46Test::IdnaTest() {
|
||||
IcuTestErrorCode errorCode(*this, "IdnaTest");
|
||||
const char *sourceTestDataPath = getSourceTestData(errorCode);
|
||||
if (errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
|
||||
"folder (getSourceTestData())")) {
|
||||
return;
|
||||
}
|
||||
CharString path(sourceTestDataPath, errorCode);
|
||||
path.appendPathPart("IdnaTest.txt", errorCode);
|
||||
LocalStdioFilePointer idnaTestFile(fopen(path.data(), "r"));
|
||||
if (idnaTestFile.isNull()) {
|
||||
errln("unable to open %s", path.data());
|
||||
return;
|
||||
}
|
||||
|
||||
// Columns (c1, c2,...) are separated by semicolons.
|
||||
// Leading and trailing spaces and tabs in each column are ignored.
|
||||
// Comments are indicated with hash marks.
|
||||
char *fields[kNumFields][2];
|
||||
u_parseDelimitedFile(path.data(), ';', fields, kNumFields, idnaTestLineFn, this, errorCode);
|
||||
if (errorCode.logIfFailureAndReset("error parsing IdnaTest.txt")) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // UCONFIG_NO_IDNA
|
||||
|
7844
icu4c/source/test/testdata/IdnaTest.txt
vendored
Normal file
7844
icu4c/source/test/testdata/IdnaTest.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -77,7 +77,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
|
||||
UParseLineFn *lineFn, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
FileStream *file;
|
||||
char line[300];
|
||||
char line[10000];
|
||||
char *start, *limit;
|
||||
int32_t i, length;
|
||||
|
||||
@ -163,7 +163,7 @@ u_parseDelimitedFile(const char *filename, char delimiter,
|
||||
}
|
||||
}
|
||||
|
||||
/* error in a field function? */
|
||||
/* too few fields? */
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
break;
|
||||
}
|
||||
|
@ -586,8 +586,8 @@ public final class UTS46 extends IDNA {
|
||||
) {
|
||||
setNotOkBiDi(info);
|
||||
}
|
||||
// Get the directionalities of the intervening characters.
|
||||
int mask=0;
|
||||
// Add the directionalities of the intervening characters.
|
||||
int mask=firstMask|lastMask;
|
||||
while(i<labelLimit) {
|
||||
c=Character.codePointAt(label, i);
|
||||
i+=Character.charCount(c);
|
||||
@ -617,7 +617,7 @@ public final class UTS46 extends IDNA {
|
||||
// label. [...]
|
||||
// The following rule, consisting of six conditions, applies to labels
|
||||
// in BIDI domain names.
|
||||
if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) {
|
||||
if((mask&R_AL_AN_MASK)!=0) {
|
||||
setBiDi(info);
|
||||
}
|
||||
}
|
||||
|
7844
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/IdnaTest.txt
Normal file
7844
icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/IdnaTest.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,16 +8,21 @@
|
||||
*/
|
||||
package com.ibm.icu.dev.test.normalizer;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.test.TestUtil;
|
||||
import com.ibm.icu.impl.Normalizer2Impl.UTF16Plus;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.text.IDNA;
|
||||
|
||||
/**
|
||||
@ -413,8 +418,11 @@ public class UTS46Test extends TestFmwk {
|
||||
{ "\u05D07\u05EA", "B", "\u05D07\u05EA", "" },
|
||||
{ "\u05D0\u0667\u05EA", "B", "\u05D0\u0667\u05EA", "" }, // Arabic 7 in the middle
|
||||
{ "a7\u0667z", "B", "a7\u0667z", "UIDNA_ERROR_BIDI" }, // AN digit in LTR
|
||||
{ "a7\u0667", "B", "a7\u0667", "UIDNA_ERROR_BIDI" }, // AN digit in LTR
|
||||
{ "\u05D07\u0667\u05EA", "B", // mixed EN/AN digits in RTL
|
||||
"\u05D07\u0667\u05EA", "UIDNA_ERROR_BIDI" },
|
||||
{ "\u05D07\u0667", "B", // mixed EN/AN digits in RTL
|
||||
"\u05D07\u0667", "UIDNA_ERROR_BIDI" },
|
||||
// ZWJ
|
||||
{ "\u0BB9\u0BCD\u200D", "N", "\u0BB9\u0BCD\u200D", "" }, // Virama+ZWJ
|
||||
{ "\u0BB9\u200D", "N", "\u0BB9\u200D", "UIDNA_ERROR_CONTEXTJ" }, // no Virama
|
||||
@ -716,6 +724,88 @@ public class UTS46Test extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
private void checkIdnaTestResult(String line, String type,
|
||||
String expected, CharSequence result, IDNA.Info info) {
|
||||
// An error in toUnicode or toASCII is indicated by a value in square brackets,
|
||||
// such as "[B5 B6]".
|
||||
boolean expectedHasErrors = !expected.isEmpty() && expected.charAt(0) == '[';
|
||||
if (expectedHasErrors != info.hasErrors()) {
|
||||
errln(String.format(
|
||||
"%s expected errors %b != %b = actual has errors: %s\n %s",
|
||||
type, expectedHasErrors, info.hasErrors(), info.getErrors(), line));
|
||||
}
|
||||
if (!expectedHasErrors && !UTF16Plus.equal(expected, result)) {
|
||||
errln(String.format("%s expected != actual\n %s", type, line));
|
||||
errln(" " + expected);
|
||||
errln(" " + result);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void IdnaTest() throws IOException {
|
||||
BufferedReader idnaTestFile = TestUtil.getDataReader("unicode/IdnaTest.txt");
|
||||
Pattern semi = Pattern.compile(";");
|
||||
try {
|
||||
String line;
|
||||
while ((line = idnaTestFile.readLine()) != null) {
|
||||
// Remove trailing comments and whitespace.
|
||||
int commentStart = line.indexOf('#');
|
||||
if (commentStart >= 0) {
|
||||
line = line.substring(0, commentStart);
|
||||
}
|
||||
String[] fields = semi.split(line, -1);
|
||||
if (fields.length <= 1) {
|
||||
continue; // Skip empty and comment-only lines.
|
||||
}
|
||||
|
||||
// Column 1: type - T for transitional, N for nontransitional, B for both
|
||||
String type = fields[0].trim();
|
||||
char typeChar;
|
||||
if (type.length() != 1 ||
|
||||
((typeChar = type.charAt(0)) != 'B' && typeChar != 'N' && typeChar != 'T')) {
|
||||
errln("empty or unknown type field: " + line);
|
||||
return;
|
||||
}
|
||||
|
||||
// Column 2: source - the source string to be tested
|
||||
String source16 = Utility.unescape(fields[1].trim());
|
||||
|
||||
// Column 3: toUnicode - the result of applying toUnicode to the source.
|
||||
// A blank value means the same as the source value.
|
||||
String unicode16 = Utility.unescape(fields[2].trim());
|
||||
if (unicode16.isEmpty()) {
|
||||
unicode16 = source16;
|
||||
}
|
||||
|
||||
// Column 4: toASCII - the result of applying toASCII to the source, using the specified type.
|
||||
// A blank value means the same as the toUnicode value.
|
||||
String ascii16 = Utility.unescape(fields[3].trim());
|
||||
if (ascii16.isEmpty()) {
|
||||
ascii16 = unicode16;
|
||||
}
|
||||
|
||||
// Column 5: NV8 - present if the toUnicode value would not be a valid domain name under IDNA2008. Not a normative field.
|
||||
// Ignored as long as we do not implement and test vanilla IDNA2008.
|
||||
|
||||
// ToASCII/ToUnicode, transitional/nontransitional
|
||||
StringBuilder uN, aN, aT;
|
||||
IDNA.Info uNInfo, aNInfo, aTInfo;
|
||||
nontrans.nameToUnicode(source16, uN = new StringBuilder(), uNInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toUnicodeNontrans", unicode16, uN, uNInfo);
|
||||
if (typeChar == 'T' || typeChar == 'B') {
|
||||
trans.nameToASCII(source16, aT = new StringBuilder(), aTInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toASCIITrans", ascii16, aT, aTInfo);
|
||||
}
|
||||
if (typeChar == 'N' || typeChar == 'B') {
|
||||
nontrans.nameToASCII(source16, aN = new StringBuilder(), aNInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toASCIINontrans", ascii16, aN, aNInfo);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
idnaTestFile.close();
|
||||
}
|
||||
}
|
||||
|
||||
private final IDNA trans, nontrans;
|
||||
|
||||
private static final EnumSet<IDNA.Error> severeErrors=EnumSet.of(
|
||||
|
@ -1493,65 +1493,63 @@ _code_point_re = re.compile("\s*([0-9a-fA-F]+)\s*;")
|
||||
|
||||
def CopyAndStripWithOptionalMerge(s, t, do_merge):
|
||||
# TODO: We do not seem to need the do_merge argument and logic any more.
|
||||
# TODO: With Python 2.7+, combine the two with statements into one.
|
||||
with open(s, "r") as in_file:
|
||||
with open(t, "w") as out_file:
|
||||
first = -1 # First code point with first_data.
|
||||
last = -1 # Last code point with first_data.
|
||||
first_data = "" # Common data for code points [first..last].
|
||||
for line in in_file:
|
||||
match = _strip_re.match(line)
|
||||
with open(s, "r") as in_file, open(t, "w") as out_file:
|
||||
first = -1 # First code point with first_data.
|
||||
last = -1 # Last code point with first_data.
|
||||
first_data = "" # Common data for code points [first..last].
|
||||
for line in in_file:
|
||||
match = _strip_re.match(line)
|
||||
if match:
|
||||
line = match.group(1)
|
||||
else:
|
||||
line = line.rstrip()
|
||||
if do_merge:
|
||||
match = _code_point_re.match(line)
|
||||
if match:
|
||||
line = match.group(1)
|
||||
c = int(match.group(1), 16)
|
||||
data = line[match.end() - 1:]
|
||||
else:
|
||||
line = line.rstrip()
|
||||
if do_merge:
|
||||
match = _code_point_re.match(line)
|
||||
if match:
|
||||
c = int(match.group(1), 16)
|
||||
data = line[match.end() - 1:]
|
||||
c = -1
|
||||
data = ""
|
||||
if last >= 0 and (c != (last + 1) or data != first_data):
|
||||
# output the current range
|
||||
if first == last:
|
||||
out_file.write("%04X%s\n" % (first, first_data))
|
||||
else:
|
||||
c = -1
|
||||
data = ""
|
||||
if last >= 0 and (c != (last + 1) or data != first_data):
|
||||
# output the current range
|
||||
if first == last:
|
||||
out_file.write("%04X%s\n" % (first, first_data))
|
||||
else:
|
||||
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
||||
first = -1
|
||||
last = -1
|
||||
first_data = ""
|
||||
if c < 0:
|
||||
# no data on this line, output as is
|
||||
out_file.write(line)
|
||||
out_file.write("\n")
|
||||
else:
|
||||
# data on this line, store for possible range compaction
|
||||
if last < 0:
|
||||
# set as the first line in a possible range
|
||||
first = c
|
||||
last = c
|
||||
first_data = data
|
||||
else:
|
||||
# must be c == (last + 1) and data == first_data
|
||||
# because of previous conditions
|
||||
# continue with the current range
|
||||
last = c
|
||||
else:
|
||||
# Only strip, don't merge: just output the stripped line.
|
||||
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
||||
first = -1
|
||||
last = -1
|
||||
first_data = ""
|
||||
if c < 0:
|
||||
# no data on this line, output as is
|
||||
out_file.write(line)
|
||||
out_file.write("\n")
|
||||
if do_merge and last >= 0:
|
||||
# output the last range in the file
|
||||
if first == last:
|
||||
out_file.write("%04X%s\n" % (first, first_data))
|
||||
else:
|
||||
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
||||
first = -1
|
||||
last = -1
|
||||
first_data = ""
|
||||
out_file.flush()
|
||||
# data on this line, store for possible range compaction
|
||||
if last < 0:
|
||||
# set as the first line in a possible range
|
||||
first = c
|
||||
last = c
|
||||
first_data = data
|
||||
else:
|
||||
# must be c == (last + 1) and data == first_data
|
||||
# because of previous conditions
|
||||
# continue with the current range
|
||||
last = c
|
||||
else:
|
||||
# Only strip, don't merge: just output the stripped line.
|
||||
out_file.write(line)
|
||||
out_file.write("\n")
|
||||
if do_merge and last >= 0:
|
||||
# output the last range in the file
|
||||
if first == last:
|
||||
out_file.write("%04X%s\n" % (first, first_data))
|
||||
else:
|
||||
out_file.write("%04X..%04X%s\n" % (first, last, first_data))
|
||||
first = -1
|
||||
last = -1
|
||||
first_data = ""
|
||||
out_file.flush()
|
||||
return t
|
||||
|
||||
|
||||
@ -1571,11 +1569,9 @@ def CopyAndStripAndMerge(s, t):
|
||||
|
||||
|
||||
def PrependBOM(s, t):
|
||||
# TODO: With Python 2.7+, combine the two with statements into one.
|
||||
with open(s, "r") as in_file:
|
||||
with open(t, "w") as out_file:
|
||||
out_file.write("\xef\xbb\xbf") # UTF-8 BOM for ICU svn
|
||||
shutil.copyfileobj(in_file, out_file)
|
||||
with open(s, "r") as in_file, open(t, "w") as out_file:
|
||||
out_file.write("\xef\xbb\xbf") # UTF-8 BOM for ICU svn
|
||||
shutil.copyfileobj(in_file, out_file)
|
||||
return t
|
||||
|
||||
|
||||
@ -1613,6 +1609,7 @@ _files = {
|
||||
"emoji-data.txt": (DontCopy, ParseNamedProperties),
|
||||
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
|
||||
"GraphemeBreakTest.txt": (PrependBOM, "testdata"),
|
||||
"IdnaTest.txt": (CopyOnly, "testdata"),
|
||||
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
|
||||
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
|
||||
"LineBreak.txt": (DontCopy, ParseLineBreak),
|
||||
|
Loading…
Reference in New Issue
Block a user