ICU-3170 updates to transliterators due to Unicode 4.0.1 update

X-SVN-Rev: 14995
This commit is contained in:
Vladimir Weinstein 2004-04-16 17:28:06 +00:00
parent b8870b2691
commit 3cbe7619d1
3 changed files with 62 additions and 32 deletions

View File

@ -1,12 +1,12 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2002, International Business Machines
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Greek_Latin.txt
// Date: Sat Jul 27 10:31:01 2002
// Date: Fri Apr 16 10:06:58 2004
//--------------------------------------------------------------------
// Greek_Latin
@ -20,7 +20,7 @@ t_Grek_Latn {
// Rules are predicated on running NFD first, and NFC afterwards
// :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
// MINIMAL FILTER GENERATED FOR: Greek-Latin
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126] ;"
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;"
":: NFD (NFC) ;"
@ -261,10 +261,16 @@ t_Grek_Latn {
"ρ <> r ;"
"Ρ <> R ;"
// insert separator
// insert separator before things that turn into s
"[Pp] { } ς > \\\' ;"
"[Pp] { } σ > \\\' ;"
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
// special S variants
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
// underbar means exception
@ -280,7 +286,7 @@ t_Grek_Latn {
"ς <> s $underbar;"
"σ <> s ;"
"[Pp] { Σ <> \\\'S ;"
// [Pp] { Σ <> \\\'S ;
"Σ <> S ;"
"τ <> t ;"
@ -332,6 +338,7 @@ t_Grek_Latn {
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"

View File

@ -1,12 +1,12 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2002, International Business Machines
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
// Date: Sat Jul 27 10:31:01 2002
// Date: Fri Apr 16 10:06:58 2004
//--------------------------------------------------------------------
// Greek_Latin_UNGEGN
@ -21,7 +21,7 @@ t_Grek_Latn_UNGEGN {
// Rules are predicated on running NFD first, and NFC afterwards
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
// WARNING: need to add accents to both filters ###
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ;
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
"::NFD (NFC) ;"
@ -179,8 +179,15 @@ t_Grek_Latn_UNGEGN {
"ρ <> r ;"
"Ρ <> R ;"
"[Pp] { } ς > \\\' ;"
"[Pp] { } σ > \\\' ;"
// insert separator before things that turn into s
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
// special S variants
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
// Caron means exception
@ -196,7 +203,7 @@ t_Grek_Latn_UNGEGN {
"ς <> s $under;"
"σ <> s ;"
"[Pp] { Σ <> \\\'S ;"
// [Pp] { Σ <> \\\'S ;
"Σ <> S ;"
"τ <> t ;"
@ -242,6 +249,7 @@ t_Grek_Latn_UNGEGN {
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"

View File

@ -1021,6 +1021,8 @@ void TransliteratorRoundTripTest::TestHangul() {
}
void TransliteratorRoundTripTest::TestGreek() {
// weiv removed the test and the fiter
/*
if (isICUVersionAtLeast(ICU_30)) {
// We temporarily filter against Unicode 3.2, but we only do this
// before version 3.0.
@ -1029,17 +1031,21 @@ void TransliteratorRoundTripTest::TestGreek() {
} else {
logln("Warning: TestGreek needs to be updated to remove Unicode 3.2 filter");
}
*/
RTTest test("Latin-Greek");
LegalGreek *legal = new LegalGreek(TRUE);
test.test(UnicodeString("[a-zA-Z]", ""),
// weiv removed code points from test in order to quiet it.
// alan should verify and give the proper resolution
UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fcd\\u1fce\\u1fdd\\u1fde\\u1fed-\\u1fef\\u1ffd\\u03D7-\\u03EF]]&[:Age=3.2:]]",
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]&[:Age=3.2:]]",
UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
"\\u1D26-\\u1D2A" // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
"\\u1D5D-\\u1D61" // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
"\\u1D66-\\u1D6A" // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
"\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
"]]",
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fcd\\u1fce\\u1fdd\\u1fde\\u1fed-\\u1fef\\u1ffd\\u03D7-\\u03EF]]&[:Age=3.2:]]",
""),
"[\\u00B5\\u037A\\u03D0-\\u03F5\\u1fcf\\u1fdf]", /* exclusions */
//"[\\u00B5\\u037A\\u03D0-\\u03F5]", /* exclusions */
"[\\u00B5\\u037A\\u03D0-\\u03F5\\u03f9]", /* exclusions */
this, quick, legal, 50);
@ -1048,9 +1054,8 @@ void TransliteratorRoundTripTest::TestGreek() {
void TransliteratorRoundTripTest::TestGreekUNGEGN() {
// TODO: couldn't fix this test the same way I fixed TestGreek.
// needs Alan/Mark
return;
// weiv removed the test and the fiter
/*
if (isICUVersionAtLeast(ICU_30)) {
// We temporarily filter against Unicode 3.2, but we only do this
// before version 3.0.
@ -1059,14 +1064,18 @@ void TransliteratorRoundTripTest::TestGreekUNGEGN() {
} else {
logln("Warning: TestGreekUNGEGN needs to be updated to remove Unicode 3.2 filter");
}
*/
RTTest test("Latin-Greek/UNGEGN");
LegalGreek *legal = new LegalGreek(FALSE);
test.test(UnicodeString("[a-zA-Z]", ""),
// weiv removed code points from test in order to quiet it.
// alan should verify and give the proper resolution
UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]&[:Age=3.2:]]",
UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
"\\u1D26-\\u1D2A" // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
"\\u1D5D-\\u1D61" // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
"\\u1D66-\\u1D6A" // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
"\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
"]]",
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
""),
"[\\u0385\\u00B5\\u037A\\u03D0-\\uFFFF {\\u039C\\u03C0}]", /* roundtrip exclusions */
this, quick, legal);
@ -1075,9 +1084,8 @@ void TransliteratorRoundTripTest::TestGreekUNGEGN() {
}
void TransliteratorRoundTripTest::Testel() {
// TODO: couldn't fix this test the same way I fixed TestGreek.
// needs Alan/Mark
return;
// weiv removed the test and the fiter
/*
if (isICUVersionAtLeast(ICU_30)) {
// We temporarily filter against Unicode 3.2, but we only do this
// before version 3.0.
@ -1086,11 +1094,18 @@ void TransliteratorRoundTripTest::Testel() {
} else {
logln("Warning: Testel needs to be updated to remove Unicode 3.2 filter");
}
*/
RTTest test("Latin-el");
LegalGreek *legal = new LegalGreek(FALSE);
test.test(UnicodeString("[a-zA-Z]", ""),
UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
"\\u1D26-\\u1D2A" // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
"\\u1D5D-\\u1D61" // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
"\\u1D66-\\u1D6A" // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
"\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
"]]",
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
""),
"[\\u00B5\\u037A\\u03D0-\\uFFFF {\\u039C\\u03C0}]", /* exclusions */
this, quick, legal);