ICU-13186 stop prepending UTF-8 BOM to some Unicode files

X-SVN-Rev: 40149
2017-06-02 22:52:19 +00:00 · 2017-06-02 22:52:19 +00:00 · acf2b4cc82
commit acf2b4cc82
parent f3b00dc8ff
6 changed files with 11 additions and 18 deletions
--- a/icu4c/source/data/unidata/changes.txt
+++ b/icu4c/source/data/unidata/changes.txt
@ -52,10 +52,10 @@ http://www.unicode.org/reports/tr44/tr44-19.html
 * Command-line environment setup

 UNICODE_DATA=~/unidata/uni10/20170503
-CLDR_SRC=~/svn.cldr/uni10
-ICU_ROOT=~/svn.icu/uni10
+CLDR_SRC=~/svn.cldr/trunk
+ICU_ROOT=~/svn.icu/trunk
 ICU_SRC=$ICU_ROOT/src
-ICUDT=icudt59b
+ICUDT=icudt60b
 ICU4C_DATA_IN=$ICU_SRC/icu4c/source/data/in
 ICU4C_UNIDATA=$ICU_SRC/icu4c/source/data/unidata
 export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
--- a/icu4c/source/test/testdata/GraphemeBreakTest.txt
+++ b/icu4c/source/test/testdata/GraphemeBreakTest.txt
@ -1,4 +1,4 @@
-# GraphemeBreakTest-10.0.0.txt
+# GraphemeBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:29 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
--- a/icu4c/source/test/testdata/LineBreakTest.txt
+++ b/icu4c/source/test/testdata/LineBreakTest.txt
@ -1,4 +1,4 @@
-# LineBreakTest-10.0.0.txt
+# LineBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:30 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
--- a/icu4c/source/test/testdata/SentenceBreakTest.txt
+++ b/icu4c/source/test/testdata/SentenceBreakTest.txt
@ -1,4 +1,4 @@
-# SentenceBreakTest-10.0.0.txt
+# SentenceBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:43 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
--- a/icu4c/source/test/testdata/WordBreakTest.txt
+++ b/icu4c/source/test/testdata/WordBreakTest.txt
@ -1,4 +1,4 @@
-# WordBreakTest-10.0.0.txt
+# WordBreakTest-10.0.0.txt
 # Date: 2017-04-14, 05:40:44 GMT
 # © 2017 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
--- a/tools/unicode/py/preparseucd.py
+++ b/tools/unicode/py/preparseucd.py
@ -1568,13 +1568,6 @@ def CopyAndStripAndMerge(s, t):
  return CopyAndStripWithOptionalMerge(s, t, True)


-def PrependBOM(s, t):
-  with open(s, "r") as in_file, open(t, "w") as out_file:
-    out_file.write("\xef\xbb\xbf")  # UTF-8 BOM for ICU svn
-    shutil.copyfileobj(in_file, out_file)
-  return t
-
-
 def CopyOnly(s, t):
  shutil.copy(s, t)
  return t
@ -1608,12 +1601,12 @@ _files = {
  "EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth),
  "emoji-data.txt": (DontCopy, ParseNamedProperties),
  "GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
-  "GraphemeBreakTest.txt": (PrependBOM, "testdata"),
+  "GraphemeBreakTest.txt": (CopyOnly, "testdata"),
  "IdnaTest.txt": (CopyOnly, "testdata"),
  "IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
  "IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
  "LineBreak.txt": (DontCopy, ParseLineBreak),
-  "LineBreakTest.txt": (PrependBOM, "testdata"),
+  "LineBreakTest.txt": (CopyOnly, "testdata"),
  "NameAliases.txt": (DontCopy, ParseNameAliases),
  "NamesList.txt": (DontCopy, ParseNamesList),
  "NormalizationCorrections.txt": (CopyOnly,),  # Only used in gensprep.
@ -1622,14 +1615,14 @@ _files = {
  "PropertyValueAliases.txt": (DontCopy, ParsePropertyValueAliases, 1),
  "PropList.txt": (DontCopy, ParseNamedProperties),
  "SentenceBreakProperty.txt": (DontCopy, ParseSentenceBreak),
-  "SentenceBreakTest.txt": (PrependBOM, "testdata"),
+  "SentenceBreakTest.txt": (CopyOnly, "testdata"),
  "Scripts.txt": (DontCopy, ParseScripts),
  "ScriptExtensions.txt": (DontCopy, ParseScriptExtensions),
  "SpecialCasing.txt": (CopyOnly, ParseSpecialCasing),
  "UnicodeData.txt": (CopyOnly, ParseUnicodeData, 2),
  "VerticalOrientation.txt": (DontCopy, ParseVerticalOrientation),
  "WordBreakProperty.txt": (DontCopy, ParseWordBreak),
-  "WordBreakTest.txt": (PrependBOM, "testdata"),
+  "WordBreakTest.txt": (CopyOnly, "testdata"),
  # From www.unicode.org/Public/idna/<version>/
  "IdnaMappingTable.txt": (IdnaToUTS46TextFile, "norm2")
 }