ICU-13186 stop prepending UTF-8 BOM to some Unicode files

X-SVN-Rev: 40149
This commit is contained in:
Markus Scherer 2017-06-02 22:52:19 +00:00
parent f3b00dc8ff
commit acf2b4cc82
6 changed files with 11 additions and 18 deletions

View File

@ -52,10 +52,10 @@ http://www.unicode.org/reports/tr44/tr44-19.html
* Command-line environment setup
UNICODE_DATA=~/unidata/uni10/20170503
CLDR_SRC=~/svn.cldr/uni10
ICU_ROOT=~/svn.icu/uni10
CLDR_SRC=~/svn.cldr/trunk
ICU_ROOT=~/svn.icu/trunk
ICU_SRC=$ICU_ROOT/src
ICUDT=icudt59b
ICUDT=icudt60b
ICU4C_DATA_IN=$ICU_SRC/icu4c/source/data/in
ICU4C_UNIDATA=$ICU_SRC/icu4c/source/data/unidata
export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib

View File

@ -1,4 +1,4 @@
# GraphemeBreakTest-10.0.0.txt
# GraphemeBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:29 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1,4 +1,4 @@
# LineBreakTest-10.0.0.txt
# LineBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:30 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1,4 +1,4 @@
# SentenceBreakTest-10.0.0.txt
# SentenceBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:43 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1,4 +1,4 @@
# WordBreakTest-10.0.0.txt
# WordBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:44 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1568,13 +1568,6 @@ def CopyAndStripAndMerge(s, t):
return CopyAndStripWithOptionalMerge(s, t, True)
def PrependBOM(s, t):
with open(s, "r") as in_file, open(t, "w") as out_file:
out_file.write("\xef\xbb\xbf") # UTF-8 BOM for ICU svn
shutil.copyfileobj(in_file, out_file)
return t
def CopyOnly(s, t):
shutil.copy(s, t)
return t
@ -1608,12 +1601,12 @@ _files = {
"EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth),
"emoji-data.txt": (DontCopy, ParseNamedProperties),
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
"GraphemeBreakTest.txt": (PrependBOM, "testdata"),
"GraphemeBreakTest.txt": (CopyOnly, "testdata"),
"IdnaTest.txt": (CopyOnly, "testdata"),
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
"LineBreak.txt": (DontCopy, ParseLineBreak),
"LineBreakTest.txt": (PrependBOM, "testdata"),
"LineBreakTest.txt": (CopyOnly, "testdata"),
"NameAliases.txt": (DontCopy, ParseNameAliases),
"NamesList.txt": (DontCopy, ParseNamesList),
"NormalizationCorrections.txt": (CopyOnly,), # Only used in gensprep.
@ -1622,14 +1615,14 @@ _files = {
"PropertyValueAliases.txt": (DontCopy, ParsePropertyValueAliases, 1),
"PropList.txt": (DontCopy, ParseNamedProperties),
"SentenceBreakProperty.txt": (DontCopy, ParseSentenceBreak),
"SentenceBreakTest.txt": (PrependBOM, "testdata"),
"SentenceBreakTest.txt": (CopyOnly, "testdata"),
"Scripts.txt": (DontCopy, ParseScripts),
"ScriptExtensions.txt": (DontCopy, ParseScriptExtensions),
"SpecialCasing.txt": (CopyOnly, ParseSpecialCasing),
"UnicodeData.txt": (CopyOnly, ParseUnicodeData, 2),
"VerticalOrientation.txt": (DontCopy, ParseVerticalOrientation),
"WordBreakProperty.txt": (DontCopy, ParseWordBreak),
"WordBreakTest.txt": (PrependBOM, "testdata"),
"WordBreakTest.txt": (CopyOnly, "testdata"),
# From www.unicode.org/Public/idna/<version>/
"IdnaMappingTable.txt": (IdnaToUTS46TextFile, "norm2")
}