ICU-13186 stop prepending UTF-8 BOM to some Unicode files

X-SVN-Rev: 40149
This commit is contained in:
Markus Scherer 2017-06-02 22:52:19 +00:00
parent f3b00dc8ff
commit acf2b4cc82
6 changed files with 11 additions and 18 deletions

View File

@ -52,10 +52,10 @@ http://www.unicode.org/reports/tr44/tr44-19.html
* Command-line environment setup * Command-line environment setup
UNICODE_DATA=~/unidata/uni10/20170503 UNICODE_DATA=~/unidata/uni10/20170503
CLDR_SRC=~/svn.cldr/uni10 CLDR_SRC=~/svn.cldr/trunk
ICU_ROOT=~/svn.icu/uni10 ICU_ROOT=~/svn.icu/trunk
ICU_SRC=$ICU_ROOT/src ICU_SRC=$ICU_ROOT/src
ICUDT=icudt59b ICUDT=icudt60b
ICU4C_DATA_IN=$ICU_SRC/icu4c/source/data/in ICU4C_DATA_IN=$ICU_SRC/icu4c/source/data/in
ICU4C_UNIDATA=$ICU_SRC/icu4c/source/data/unidata ICU4C_UNIDATA=$ICU_SRC/icu4c/source/data/unidata
export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib

View File

@ -1,4 +1,4 @@
# GraphemeBreakTest-10.0.0.txt # GraphemeBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:29 GMT # Date: 2017-04-14, 05:40:29 GMT
# © 2017 Unicode®, Inc. # © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1,4 +1,4 @@
# LineBreakTest-10.0.0.txt # LineBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:30 GMT # Date: 2017-04-14, 05:40:30 GMT
# © 2017 Unicode®, Inc. # © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1,4 +1,4 @@
# SentenceBreakTest-10.0.0.txt # SentenceBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:43 GMT # Date: 2017-04-14, 05:40:43 GMT
# © 2017 Unicode®, Inc. # © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1,4 +1,4 @@
# WordBreakTest-10.0.0.txt # WordBreakTest-10.0.0.txt
# Date: 2017-04-14, 05:40:44 GMT # Date: 2017-04-14, 05:40:44 GMT
# © 2017 Unicode®, Inc. # © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.

View File

@ -1568,13 +1568,6 @@ def CopyAndStripAndMerge(s, t):
return CopyAndStripWithOptionalMerge(s, t, True) return CopyAndStripWithOptionalMerge(s, t, True)
def PrependBOM(s, t):
with open(s, "r") as in_file, open(t, "w") as out_file:
out_file.write("\xef\xbb\xbf") # UTF-8 BOM for ICU svn
shutil.copyfileobj(in_file, out_file)
return t
def CopyOnly(s, t): def CopyOnly(s, t):
shutil.copy(s, t) shutil.copy(s, t)
return t return t
@ -1608,12 +1601,12 @@ _files = {
"EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth), "EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth),
"emoji-data.txt": (DontCopy, ParseNamedProperties), "emoji-data.txt": (DontCopy, ParseNamedProperties),
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty), "GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
"GraphemeBreakTest.txt": (PrependBOM, "testdata"), "GraphemeBreakTest.txt": (CopyOnly, "testdata"),
"IdnaTest.txt": (CopyOnly, "testdata"), "IdnaTest.txt": (CopyOnly, "testdata"),
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory), "IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory), "IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
"LineBreak.txt": (DontCopy, ParseLineBreak), "LineBreak.txt": (DontCopy, ParseLineBreak),
"LineBreakTest.txt": (PrependBOM, "testdata"), "LineBreakTest.txt": (CopyOnly, "testdata"),
"NameAliases.txt": (DontCopy, ParseNameAliases), "NameAliases.txt": (DontCopy, ParseNameAliases),
"NamesList.txt": (DontCopy, ParseNamesList), "NamesList.txt": (DontCopy, ParseNamesList),
"NormalizationCorrections.txt": (CopyOnly,), # Only used in gensprep. "NormalizationCorrections.txt": (CopyOnly,), # Only used in gensprep.
@ -1622,14 +1615,14 @@ _files = {
"PropertyValueAliases.txt": (DontCopy, ParsePropertyValueAliases, 1), "PropertyValueAliases.txt": (DontCopy, ParsePropertyValueAliases, 1),
"PropList.txt": (DontCopy, ParseNamedProperties), "PropList.txt": (DontCopy, ParseNamedProperties),
"SentenceBreakProperty.txt": (DontCopy, ParseSentenceBreak), "SentenceBreakProperty.txt": (DontCopy, ParseSentenceBreak),
"SentenceBreakTest.txt": (PrependBOM, "testdata"), "SentenceBreakTest.txt": (CopyOnly, "testdata"),
"Scripts.txt": (DontCopy, ParseScripts), "Scripts.txt": (DontCopy, ParseScripts),
"ScriptExtensions.txt": (DontCopy, ParseScriptExtensions), "ScriptExtensions.txt": (DontCopy, ParseScriptExtensions),
"SpecialCasing.txt": (CopyOnly, ParseSpecialCasing), "SpecialCasing.txt": (CopyOnly, ParseSpecialCasing),
"UnicodeData.txt": (CopyOnly, ParseUnicodeData, 2), "UnicodeData.txt": (CopyOnly, ParseUnicodeData, 2),
"VerticalOrientation.txt": (DontCopy, ParseVerticalOrientation), "VerticalOrientation.txt": (DontCopy, ParseVerticalOrientation),
"WordBreakProperty.txt": (DontCopy, ParseWordBreak), "WordBreakProperty.txt": (DontCopy, ParseWordBreak),
"WordBreakTest.txt": (PrependBOM, "testdata"), "WordBreakTest.txt": (CopyOnly, "testdata"),
# From www.unicode.org/Public/idna/<version>/ # From www.unicode.org/Public/idna/<version>/
"IdnaMappingTable.txt": (IdnaToUTS46TextFile, "norm2") "IdnaMappingTable.txt": (IdnaToUTS46TextFile, "norm2")
} }