ICU-1426 add el-Latin and Latin-el rules to locale resource

X-SVN-Rev: 6599
This commit is contained in:
Alan Liu 2001-11-03 02:04:37 +00:00
parent 2820286119
commit 8886275ce8
6 changed files with 473 additions and 0 deletions

View File

@ -164,4 +164,220 @@ el {
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
"1,000,000,000,000: =#,##0="
}
TransliterateLATIN {
"UNGEGN",
"# Rules are predicated on running NFD first, and NFC afterwards\n"
"::NFD (NFC) ; "
"# For modern Greek.\n"
"# Useful variables\n"
"$lower = [:Ll:] ; "
"$upper = [:Lu:] ; "
"$accent = [:M:] ; "
"$macron = \u0304 ;"
"$ddot = \u0308 ;"
"$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ; "
"$ucgvowel = [\u0391\u0395\u0397\u0399\u039f\u03a5\u03a9] ; "
"$gvowel = [$lcgvowel $ucgvowel] ; "
"$lcgvowelC = [$lcgvowel $accent] ; "
"$evowel = [aeiouyAEIOUY];"
"$vowel = [ $evowel $gvowel] ; "
"$beforeLower = $accent * $lower ; "
"$gammaLike = [\u0393\u039a\u039e\u03a7\u03b3\u03ba\u03be\u03c7\u03f0] ; "
"$egammaLike = [GKXCgkxc] ; "
"$smooth = \u0313 ; "
"$rough = \u0314 ; "
"$iotasub = \u0345 ; "
"$softener = [\u03b2\u0392\u03b3\u0393\u03b4\u0394\u03b6\u0396\u03bb\u039b\u03bc\u039c\u03bd\u039d\u03c1\u03a1$gvowel] ;"
"$under = \u0331;"
"$caron = \u030C;"
"$afterLetter = [:^L:] [\'[:M:]]* ;"
"$beforeLetter = [\'[:M:]]* [:^L:] ;"
"# Fix punctuation\n"
"\; <> \? ;"
"\u00b7 <> \: ;"
"# Fix any ancient characters that creep in\n"
"\u0342 > \u0301 ;"
"\u0302 > \u0301 ;"
"\u0300 > \u0301 ;"
"$smooth > ;"
"$rough > ;"
"$iotasub > ;"
"\u037A > ;"
"# need to have these up here so the rules don't mask\n"
"\u03b7 <> i $under ;"
"\u0397 <> I $under ;"
"\u03a8 } $beforeLower <> Ps ; "
"\u03a8 <> PS ; "
"\u03c8 <> ps ; "
"\u03c9 <> o $under ;"
"\u03a9 <> O $under;"
"# at begining or end of word, convert mp to b\n"
"[^[:L:][:M:]] } \u03bc\u03c0 > b ; "
"\u03bc\u03c0 } [^[:L:][:M:]] > b ; "
"[^[:L:][:M:]] } [\u039c\u03bc][\u03a0\u03c0] > B ; "
"[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
"\u03bc\u03c0 < b ; "
"\u039c\u03c0 < B { $beforeLower ; "
"\u039c\u03a0 < B ; "
"# handle diphthongs ending with upsilon\n"
"$vowel { \u03c5 } $softener <> v $under ; "
"$vowel { \u03c5 } <> f $under; "
"\u03c5 <> y ; "
"$vowel { \u03a5 } $softener <> V $under ; "
"$vowel { \u03a5 <> U $under ; "
"\u03a5 <> Y ; "
"# NORMAL\n"
"\u03b1 <> a ; "
"\u0391 <> A ; "
"\u03b2 <> v ; "
"\u0392 <> V ; "
"\u03b3 } $gammaLike <> n } $egammaLike ; "
"\u03b3 <> g ; "
"\u0393 } $gammaLike <> N } $egammaLike ; "
"\u0393 <> G ; "
"\u03b4 <> d ; "
"\u0394 <> D ; "
"\u03b5 <> e ; "
"\u0395 <> E ; "
"\u03b6 <> z ; "
"\u0396 <> Z ; "
"\u03b8 <> th ; "
"\u0398 } $beforeLower <> Th ; "
"\u0398 <> TH ; "
"\u03b9 <> i ; "
"\u0399 <> I ; "
"\u03ba <> k ;"
"\u039a <> K ; "
"\u03bb <> l ; "
"\u039b <> L ; "
"\u03bc <> m ; "
"\u039c <> M ; "
"\u03bd } $gammaLike > n\' ; "
"\u03bd <> n ; "
"\u039d } $gammaLike <> N\' ; "
"\u039d <> N ; "
"\u03be <> x ; "
"\u039e <> X ; "
"\u03bf <> o ; "
"\u039f <> O ; "
"\u03c0 <> p ; "
"\u03a0 <> P ; "
"\u03c1 <> r ; "
"\u03a1 <> R ; "
"[Pp] { } \u03c2 > \' ; "
"[Pp] { } \u03c3 > \' ;"
"# Caron means exception\n"
"# before a letter, initial\n"
"\u03c2 } $beforeLetter <> s $caron } $beforeLetter;"
"\u03c3 } $beforeLetter <> s } $beforeLetter;"
"# otherwise, after a letter = final\n"
"$afterLetter { \u03c3 <> $afterLetter { s $caron;"
"$afterLetter { \u03c2 <> $afterLetter { s ;"
"# otherwise (isolated) = initial\n"
"\u03c2 <> s $caron;"
"\u03c3 <> s ;"
"[Pp] { \u03a3 <> \'S ; "
"\u03a3 <> S ; "
"\u03c4 <> t ; "
"\u03a4 <> T ; "
"\u03c6 <> f ; "
"\u03a6 <> F ;"
"\u03c7 <> ch ; "
"\u03a7 } $beforeLower <> Ch ; "
"\u03a7 <> CH ; "
"# Completeness for ASCII\n"
"$ignore = [[:Mark:]''] * ;"
"| ch < h ;"
"| k < c ;"
"| i < j ;"
"| k < q ;"
"| y < u ;"
"| y < w ;"
"| Ch < H ;"
"| K < C ;"
"| I < J ;"
"| K < Q ;"
"| Y < W ;"
"| Y < U ;"
"# Completeness for Greek\n"
"\u03d0 > | \u03b2 ;"
"\u03d1 > | \u03b8 ;"
"\u03d2 > | \u03a5 ;"
"\u03d5 > | \u03c6 ;"
"\u03d6 > | \u03c0 ;"
"\u03f0 > | \u03ba ;"
"\u03f1 > | \u03c1 ;"
"\u03f2 > | \u03c3 ;"
"\u03f3 > j ;"
"\u03f4 > | \u0398 ;"
"\u03f5 > | \u03b5 ;"
"# delete any trailing ' marks used for roundtripping\n"
" < [\u03a0\u03c0] { \' } [Ss] ;"
" < [\u039d\u03bd] { \' } $egammaLike ;"
"::NFC (NFD) ; "
}
}

View File

@ -70,6 +70,9 @@ translit_index {
{ "Latin-Greek", "file", "translit_Greek_Latin", "REVERSE" },
{ "Greek-Latin", "file", "translit_Greek_Latin", "FORWARD" },
{ "Latin-Greek/UNGEGN", "alias", "Latin-el/UNGEGN", "" },
{ "Greek-Latin/UNGEGN", "alias", "el-Latin/UNGEGN", "" },
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },

View File

@ -164,4 +164,220 @@ el {
"1,000,000,000: << \u03b4\u03b9\u03c3\u03b5\u03ba\u03b1\u03c4\u03bf\u03bc\u03bc\u03b9\u03cc\u03c1\u03b9\u03bf[ >>];\n"
"1,000,000,000,000: =#,##0="
}
TransliterateLATIN {
"UNGEGN",
"# Rules are predicated on running NFD first, and NFC afterwards\n"
"::NFD (NFC) ; "
"# For modern Greek.\n"
"# Useful variables\n"
"$lower = [:Ll:] ; "
"$upper = [:Lu:] ; "
"$accent = [:M:] ; "
"$macron = \u0304 ;"
"$ddot = \u0308 ;"
"$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ; "
"$ucgvowel = [\u0391\u0395\u0397\u0399\u039f\u03a5\u03a9] ; "
"$gvowel = [$lcgvowel $ucgvowel] ; "
"$lcgvowelC = [$lcgvowel $accent] ; "
"$evowel = [aeiouyAEIOUY];"
"$vowel = [ $evowel $gvowel] ; "
"$beforeLower = $accent * $lower ; "
"$gammaLike = [\u0393\u039a\u039e\u03a7\u03b3\u03ba\u03be\u03c7\u03f0] ; "
"$egammaLike = [GKXCgkxc] ; "
"$smooth = \u0313 ; "
"$rough = \u0314 ; "
"$iotasub = \u0345 ; "
"$softener = [\u03b2\u0392\u03b3\u0393\u03b4\u0394\u03b6\u0396\u03bb\u039b\u03bc\u039c\u03bd\u039d\u03c1\u03a1$gvowel] ;"
"$under = \u0331;"
"$caron = \u030C;"
"$afterLetter = [:^L:] [\'[:M:]]* ;"
"$beforeLetter = [\'[:M:]]* [:^L:] ;"
"# Fix punctuation\n"
"\; <> \? ;"
"\u00b7 <> \: ;"
"# Fix any ancient characters that creep in\n"
"\u0342 > \u0301 ;"
"\u0302 > \u0301 ;"
"\u0300 > \u0301 ;"
"$smooth > ;"
"$rough > ;"
"$iotasub > ;"
"\u037A > ;"
"# need to have these up here so the rules don't mask\n"
"\u03b7 <> i $under ;"
"\u0397 <> I $under ;"
"\u03a8 } $beforeLower <> Ps ; "
"\u03a8 <> PS ; "
"\u03c8 <> ps ; "
"\u03c9 <> o $under ;"
"\u03a9 <> O $under;"
"# at begining or end of word, convert mp to b\n"
"[^[:L:][:M:]] } \u03bc\u03c0 > b ; "
"\u03bc\u03c0 } [^[:L:][:M:]] > b ; "
"[^[:L:][:M:]] } [\u039c\u03bc][\u03a0\u03c0] > B ; "
"[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
"\u03bc\u03c0 < b ; "
"\u039c\u03c0 < B { $beforeLower ; "
"\u039c\u03a0 < B ; "
"# handle diphthongs ending with upsilon\n"
"$vowel { \u03c5 } $softener <> v $under ; "
"$vowel { \u03c5 } <> f $under; "
"\u03c5 <> y ; "
"$vowel { \u03a5 } $softener <> V $under ; "
"$vowel { \u03a5 <> U $under ; "
"\u03a5 <> Y ; "
"# NORMAL\n"
"\u03b1 <> a ; "
"\u0391 <> A ; "
"\u03b2 <> v ; "
"\u0392 <> V ; "
"\u03b3 } $gammaLike <> n } $egammaLike ; "
"\u03b3 <> g ; "
"\u0393 } $gammaLike <> N } $egammaLike ; "
"\u0393 <> G ; "
"\u03b4 <> d ; "
"\u0394 <> D ; "
"\u03b5 <> e ; "
"\u0395 <> E ; "
"\u03b6 <> z ; "
"\u0396 <> Z ; "
"\u03b8 <> th ; "
"\u0398 } $beforeLower <> Th ; "
"\u0398 <> TH ; "
"\u03b9 <> i ; "
"\u0399 <> I ; "
"\u03ba <> k ;"
"\u039a <> K ; "
"\u03bb <> l ; "
"\u039b <> L ; "
"\u03bc <> m ; "
"\u039c <> M ; "
"\u03bd } $gammaLike > n\' ; "
"\u03bd <> n ; "
"\u039d } $gammaLike <> N\' ; "
"\u039d <> N ; "
"\u03be <> x ; "
"\u039e <> X ; "
"\u03bf <> o ; "
"\u039f <> O ; "
"\u03c0 <> p ; "
"\u03a0 <> P ; "
"\u03c1 <> r ; "
"\u03a1 <> R ; "
"[Pp] { } \u03c2 > \' ; "
"[Pp] { } \u03c3 > \' ;"
"# Caron means exception\n"
"# before a letter, initial\n"
"\u03c2 } $beforeLetter <> s $caron } $beforeLetter;"
"\u03c3 } $beforeLetter <> s } $beforeLetter;"
"# otherwise, after a letter = final\n"
"$afterLetter { \u03c3 <> $afterLetter { s $caron;"
"$afterLetter { \u03c2 <> $afterLetter { s ;"
"# otherwise (isolated) = initial\n"
"\u03c2 <> s $caron;"
"\u03c3 <> s ;"
"[Pp] { \u03a3 <> \'S ; "
"\u03a3 <> S ; "
"\u03c4 <> t ; "
"\u03a4 <> T ; "
"\u03c6 <> f ; "
"\u03a6 <> F ;"
"\u03c7 <> ch ; "
"\u03a7 } $beforeLower <> Ch ; "
"\u03a7 <> CH ; "
"# Completeness for ASCII\n"
"$ignore = [[:Mark:]''] * ;"
"| ch < h ;"
"| k < c ;"
"| i < j ;"
"| k < q ;"
"| y < u ;"
"| y < w ;"
"| Ch < H ;"
"| K < C ;"
"| I < J ;"
"| K < Q ;"
"| Y < W ;"
"| Y < U ;"
"# Completeness for Greek\n"
"\u03d0 > | \u03b2 ;"
"\u03d1 > | \u03b8 ;"
"\u03d2 > | \u03a5 ;"
"\u03d5 > | \u03c6 ;"
"\u03d6 > | \u03c0 ;"
"\u03f0 > | \u03ba ;"
"\u03f1 > | \u03c1 ;"
"\u03f2 > | \u03c3 ;"
"\u03f3 > j ;"
"\u03f4 > | \u0398 ;"
"\u03f5 > | \u03b5 ;"
"# delete any trailing ' marks used for roundtripping\n"
" < [\u03a0\u03c0] { \' } [Ss] ;"
" < [\u039d\u03bd] { \' } $egammaLike ;"
"::NFC (NFD) ; "
}
}

View File

@ -70,6 +70,9 @@ translit_index {
{ "Latin-Greek", "file", "translit_Greek_Latin", "REVERSE" },
{ "Greek-Latin", "file", "translit_Greek_Latin", "FORWARD" },
{ "Latin-Greek/UNGEGN", "alias", "Latin-el/UNGEGN", "" },
{ "Greek-Latin/UNGEGN", "alias", "el-Latin/UNGEGN", "" },
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },

View File

@ -133,6 +133,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(51,TestSanskritLatinRT);
TESTCASE(52,TestLocaleInstantiation);
TESTCASE(53,TestTitleAccents);
TESTCASE(54,TestLocaleResource);
default: name = ""; break;
}
}
@ -2570,6 +2571,35 @@ void TransliteratorTest::TestTitleAccents(void) {
delete t;
}
/**
* Basic test of a locale resource based rule.
*/
void TransliteratorTest::TestLocaleResource() {
const char* DATA[] = {
// id from to
//"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
"Latin-el", "b", "\\u03bc\\u03c0",
"Latin-Greek", "b", "\\u03B2",
"Greek-Latin/UNGEGN", "\\u03bc\\u03c0", "b",
"el-Latin", "\\u03bc\\u03c0", "b",
"Greek-Latin", "\\u03B2", "b",
};
const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
for (int32_t i=0; i<DATA_length; i+=3) {
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
if (U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
delete t;
continue;
}
expect(*t, CharsToUnicodeString(DATA[i+1]),
CharsToUnicodeString(DATA[i+2]));
delete t;
}
}
//======================================================================
// icu4c ONLY
// These tests are not mirrored (yet) in icu4j at

View File

@ -251,6 +251,11 @@ class TransliteratorTest : public IntlTest {
*/
void TestTitleAccents(void);
/**
* Basic test of a locale resource based rule.
*/
void TestLocaleResource(void);
//======================================================================
// Support methods
//======================================================================