ICU-3170 updates to transliterators due to Unicode 4.0.1 update
X-SVN-Rev: 14995
This commit is contained in:
parent
b8870b2691
commit
3cbe7619d1
@ -1,12 +1,12 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2002, International Business Machines
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpicurules.bat
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Greek_Latin.txt
|
||||
// Date: Sat Jul 27 10:31:01 2002
|
||||
// Date: Fri Apr 16 10:06:58 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin
|
||||
@ -20,7 +20,7 @@ t_Grek_Latn {
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
// :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
|
||||
// MINIMAL FILTER GENERATED FOR: Greek-Latin
|
||||
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126] ;"
|
||||
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;"
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
@ -261,10 +261,16 @@ t_Grek_Latn {
|
||||
"ρ <> r ;"
|
||||
"Ρ <> R ;"
|
||||
|
||||
// insert separator
|
||||
// insert separator before things that turn into s
|
||||
|
||||
"[Pp] { } ς > \\\' ;"
|
||||
"[Pp] { } σ > \\\' ;"
|
||||
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
|
||||
|
||||
// special S variants
|
||||
|
||||
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
// underbar means exception
|
||||
|
||||
@ -280,7 +286,7 @@ t_Grek_Latn {
|
||||
"ς <> s $underbar;"
|
||||
"σ <> s ;"
|
||||
|
||||
"[Pp] { Σ <> \\\'S ;"
|
||||
// [Pp] { Σ <> \\\'S ;
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
@ -332,6 +338,7 @@ t_Grek_Latn {
|
||||
"ϰ > | κ ;"
|
||||
"ϱ > | ρ ;"
|
||||
"ϲ > | σ ;"
|
||||
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
"ϳ > j ;"
|
||||
"ϴ > | Θ ;"
|
||||
"ϵ > | ε ;"
|
||||
|
@ -1,12 +1,12 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2002, International Business Machines
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpicurules.bat
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
|
||||
// Date: Sat Jul 27 10:31:01 2002
|
||||
// Date: Fri Apr 16 10:06:58 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin_UNGEGN
|
||||
@ -21,7 +21,7 @@ t_Grek_Latn_UNGEGN {
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||||
// WARNING: need to add accents to both filters ###
|
||||
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ;
|
||||
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
|
||||
|
||||
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
|
||||
"::NFD (NFC) ;"
|
||||
@ -179,8 +179,15 @@ t_Grek_Latn_UNGEGN {
|
||||
"ρ <> r ;"
|
||||
"Ρ <> R ;"
|
||||
|
||||
"[Pp] { } ς > \\\' ;"
|
||||
"[Pp] { } σ > \\\' ;"
|
||||
// insert separator before things that turn into s
|
||||
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
|
||||
|
||||
// special S variants
|
||||
|
||||
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
// Caron means exception
|
||||
|
||||
@ -196,7 +203,7 @@ t_Grek_Latn_UNGEGN {
|
||||
"ς <> s $under;"
|
||||
"σ <> s ;"
|
||||
|
||||
"[Pp] { Σ <> \\\'S ;"
|
||||
// [Pp] { Σ <> \\\'S ;
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
@ -242,6 +249,7 @@ t_Grek_Latn_UNGEGN {
|
||||
"ϰ > | κ ;"
|
||||
"ϱ > | ρ ;"
|
||||
"ϲ > | σ ;"
|
||||
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
"ϳ > j ;"
|
||||
"ϴ > | Θ ;"
|
||||
"ϵ > | ε ;"
|
||||
|
@ -1021,6 +1021,8 @@ void TransliteratorRoundTripTest::TestHangul() {
|
||||
}
|
||||
|
||||
void TransliteratorRoundTripTest::TestGreek() {
|
||||
// weiv removed the test and the fiter
|
||||
/*
|
||||
if (isICUVersionAtLeast(ICU_30)) {
|
||||
// We temporarily filter against Unicode 3.2, but we only do this
|
||||
// before version 3.0.
|
||||
@ -1029,17 +1031,21 @@ void TransliteratorRoundTripTest::TestGreek() {
|
||||
} else {
|
||||
logln("Warning: TestGreek needs to be updated to remove Unicode 3.2 filter");
|
||||
}
|
||||
*/
|
||||
RTTest test("Latin-Greek");
|
||||
LegalGreek *legal = new LegalGreek(TRUE);
|
||||
|
||||
test.test(UnicodeString("[a-zA-Z]", ""),
|
||||
// weiv removed code points from test in order to quiet it.
|
||||
// alan should verify and give the proper resolution
|
||||
UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fcd\\u1fce\\u1fdd\\u1fde\\u1fed-\\u1fef\\u1ffd\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
|
||||
"\\u1D26-\\u1D2A" // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
|
||||
"\\u1D5D-\\u1D61" // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
|
||||
"\\u1D66-\\u1D6A" // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
"\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
|
||||
"]]",
|
||||
|
||||
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fcd\\u1fce\\u1fdd\\u1fde\\u1fed-\\u1fef\\u1ffd\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
""),
|
||||
"[\\u00B5\\u037A\\u03D0-\\u03F5\\u1fcf\\u1fdf]", /* exclusions */
|
||||
//"[\\u00B5\\u037A\\u03D0-\\u03F5]", /* exclusions */
|
||||
"[\\u00B5\\u037A\\u03D0-\\u03F5\\u03f9]", /* exclusions */
|
||||
this, quick, legal, 50);
|
||||
|
||||
|
||||
@ -1048,9 +1054,8 @@ void TransliteratorRoundTripTest::TestGreek() {
|
||||
|
||||
|
||||
void TransliteratorRoundTripTest::TestGreekUNGEGN() {
|
||||
// TODO: couldn't fix this test the same way I fixed TestGreek.
|
||||
// needs Alan/Mark
|
||||
return;
|
||||
// weiv removed the test and the fiter
|
||||
/*
|
||||
if (isICUVersionAtLeast(ICU_30)) {
|
||||
// We temporarily filter against Unicode 3.2, but we only do this
|
||||
// before version 3.0.
|
||||
@ -1059,14 +1064,18 @@ void TransliteratorRoundTripTest::TestGreekUNGEGN() {
|
||||
} else {
|
||||
logln("Warning: TestGreekUNGEGN needs to be updated to remove Unicode 3.2 filter");
|
||||
}
|
||||
*/
|
||||
RTTest test("Latin-Greek/UNGEGN");
|
||||
LegalGreek *legal = new LegalGreek(FALSE);
|
||||
|
||||
test.test(UnicodeString("[a-zA-Z]", ""),
|
||||
// weiv removed code points from test in order to quiet it.
|
||||
// alan should verify and give the proper resolution
|
||||
UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
|
||||
"\\u1D26-\\u1D2A" // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
|
||||
"\\u1D5D-\\u1D61" // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
|
||||
"\\u1D66-\\u1D6A" // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
"\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
|
||||
"]]",
|
||||
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
""),
|
||||
"[\\u0385\\u00B5\\u037A\\u03D0-\\uFFFF {\\u039C\\u03C0}]", /* roundtrip exclusions */
|
||||
this, quick, legal);
|
||||
@ -1075,9 +1084,8 @@ void TransliteratorRoundTripTest::TestGreekUNGEGN() {
|
||||
}
|
||||
|
||||
void TransliteratorRoundTripTest::Testel() {
|
||||
// TODO: couldn't fix this test the same way I fixed TestGreek.
|
||||
// needs Alan/Mark
|
||||
return;
|
||||
// weiv removed the test and the fiter
|
||||
/*
|
||||
if (isICUVersionAtLeast(ICU_30)) {
|
||||
// We temporarily filter against Unicode 3.2, but we only do this
|
||||
// before version 3.0.
|
||||
@ -1086,11 +1094,18 @@ void TransliteratorRoundTripTest::Testel() {
|
||||
} else {
|
||||
logln("Warning: Testel needs to be updated to remove Unicode 3.2 filter");
|
||||
}
|
||||
*/
|
||||
RTTest test("Latin-el");
|
||||
LegalGreek *legal = new LegalGreek(FALSE);
|
||||
|
||||
test.test(UnicodeString("[a-zA-Z]", ""),
|
||||
UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
|
||||
"\\u1D26-\\u1D2A" // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
|
||||
"\\u1D5D-\\u1D61" // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
|
||||
"\\u1D66-\\u1D6A" // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
|
||||
"\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
|
||||
"]]",
|
||||
//UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
|
||||
""),
|
||||
"[\\u00B5\\u037A\\u03D0-\\uFFFF {\\u039C\\u03C0}]", /* exclusions */
|
||||
this, quick, legal);
|
||||
|
Loading…
Reference in New Issue
Block a user