3cbe7619d1
X-SVN-Rev: 14995
269 lines
5.5 KiB
Plaintext
269 lines
5.5 KiB
Plaintext
// -*- Coding: utf-8; -*-
|
||
//--------------------------------------------------------------------
|
||
// Copyright (c) 1999-2004, International Business Machines
|
||
// Corporation and others. All Rights Reserved.
|
||
//--------------------------------------------------------------------
|
||
// THIS IS A MACHINE-GENERATED FILE
|
||
// Tool: dumpICUrules.bat
|
||
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
|
||
// Date: Fri Apr 16 10:06:58 2004
|
||
//--------------------------------------------------------------------
|
||
|
||
// Greek_Latin_UNGEGN
|
||
|
||
t_Grek_Latn_UNGEGN {
|
||
Rule {
|
||
//--------------------------------------------------------------------
|
||
//--------------------------------------------------------------------
|
||
//--------------------------------------------------------------------
|
||
// For modern Greek, based on UNGEGN rules.
|
||
|
||
// Rules are predicated on running NFD first, and NFC afterwards
|
||
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||
// WARNING: need to add accents to both filters ###
|
||
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
|
||
|
||
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
|
||
"::NFD (NFC) ;"
|
||
|
||
// Useful variables
|
||
|
||
"$lower = [[:latin:][:greek:] & [:Ll:]] ;"
|
||
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
|
||
"$accent = [[:Mn:][:Me:]] ;"
|
||
|
||
"$macron = ̄ ;"
|
||
"$ddot = ̈ ;"
|
||
|
||
"$lcgvowel = [αεηιουω] ;"
|
||
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
||
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
||
"$lcgvowelC = [$lcgvowel $accent] ;"
|
||
|
||
"$evowel = [aeiouyAEIOUY];"
|
||
"$vowel = [ $evowel $gvowel] ;"
|
||
|
||
"$beforeLower = $accent * $lower ;"
|
||
|
||
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
|
||
"$egammaLike = [GKXCgkxc] ;"
|
||
"$smooth = ̓ ;"
|
||
"$rough = ̔ ;"
|
||
"$iotasub = ͅ ;"
|
||
|
||
"$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;"
|
||
|
||
"$under = ̱;"
|
||
|
||
"$caron = ̌;"
|
||
|
||
"$afterLetter = [:L:] [\\\'$accent]* ;"
|
||
"$beforeLetter = [\\\'$accent]* [:L:] ;"
|
||
|
||
// Fix punctuation
|
||
|
||
// preserve orginal
|
||
"\\\: <> \\\: $under ;"
|
||
"\\\? <> \\\? $under ;"
|
||
|
||
"\\\; <> \\\? ;"
|
||
"· <> \\\: ;"
|
||
|
||
// Fix any ancient characters that creep in
|
||
|
||
"͂ > ́ ;"
|
||
"̂ > ́ ;"
|
||
"̀ > ́ ;"
|
||
"$smooth > ;"
|
||
"$rough > ;"
|
||
"$iotasub > ;"
|
||
"ͺ > ;"
|
||
|
||
// need to have these up here so the rules don't mask
|
||
|
||
"η <> i $under ;"
|
||
"Η <> I $under ;"
|
||
|
||
"Ψ } $beforeLower <> Ps ;"
|
||
"Ψ <> PS ;"
|
||
"ψ <> ps ;"
|
||
|
||
"ω <> o $under ;"
|
||
"Ω <> O $under;"
|
||
|
||
// at begining or end of word, convert mp to b
|
||
|
||
"[^[:L:]$accent] { μπ > b ;"
|
||
"μπ } [^[:L:]$accent] > b ;"
|
||
"[^[:L:]$accent] { [Μμ][Ππ] > B ;"
|
||
"[Μμ][Ππ] } [^[:L:]$accent] > B ;"
|
||
|
||
"μπ < b ;"
|
||
"Μπ < B } $beforeLower ;"
|
||
"ΜΠ < B ;"
|
||
|
||
// handle diphthongs ending with upsilon
|
||
|
||
"ου <> ou ;"
|
||
"ΟΥ <> OU ;"
|
||
"Ου <> Ou ;"
|
||
"οΥ <> oU ;"
|
||
|
||
"$fmaker = [aeiAEI] $under ? ;"
|
||
"$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate
|
||
|
||
"$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;"
|
||
"υ $1 < ( $shiftForwardVowels )* v $under ;"
|
||
|
||
"$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;"
|
||
"υ $1 < ( $shiftForwardVowels )* f $under ;"
|
||
|
||
"$fmaker { Υ } $softener <> V $under ;"
|
||
"$fmaker { Υ <> U $under ;"
|
||
|
||
"υ <> y ;"
|
||
"Υ <> Y ;"
|
||
|
||
// NORMAL
|
||
|
||
"α <> a ;"
|
||
"Α <> A ;"
|
||
|
||
"β <> v ;"
|
||
"Β <> V ;"
|
||
|
||
"γ } $gammaLike <> n } $egammaLike ;"
|
||
"γ <> g ;"
|
||
"Γ } $gammaLike <> N } $egammaLike ;"
|
||
"Γ <> G ;"
|
||
|
||
"δ <> d ;"
|
||
"Δ <> D ;"
|
||
|
||
"ε <> e ;"
|
||
"Ε <> E ;"
|
||
|
||
"ζ <> z ;"
|
||
"Ζ <> Z ;"
|
||
|
||
"θ <> th ;"
|
||
"Θ } $beforeLower <> Th ;"
|
||
"Θ <> TH ;"
|
||
|
||
"ι <> i ;"
|
||
"Ι <> I ;"
|
||
|
||
"κ <> k ;"
|
||
"Κ <> K ;"
|
||
|
||
"λ <> l ;"
|
||
"Λ <> L ;"
|
||
|
||
"μ <> m ;"
|
||
"Μ <> M ;"
|
||
|
||
"ν } $gammaLike > n\\\' ;"
|
||
"ν <> n ;"
|
||
"Ν } $gammaLike <> N\\\' ;"
|
||
"Ν <> N ;"
|
||
|
||
"ξ <> x ;"
|
||
"Ξ <> X ;"
|
||
|
||
"ο <> o ;"
|
||
"Ο <> O ;"
|
||
|
||
"π <> p ;"
|
||
"Π <> P ;"
|
||
|
||
"ρ <> r ;"
|
||
"Ρ <> R ;"
|
||
|
||
// insert separator before things that turn into s
|
||
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
|
||
|
||
// special S variants
|
||
|
||
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||
|
||
// Caron means exception
|
||
|
||
// before a letter, initial
|
||
"ς } $beforeLetter <> s $under } $beforeLetter;"
|
||
"σ } $beforeLetter <> s } $beforeLetter;"
|
||
|
||
// otherwise, after a letter = final
|
||
"$afterLetter { σ <> $afterLetter { s $under;"
|
||
"$afterLetter { ς <> $afterLetter { s ;"
|
||
|
||
// otherwise (isolated) = initial
|
||
"ς <> s $under;"
|
||
"σ <> s ;"
|
||
|
||
// [Pp] { Σ <> \\\'S ;
|
||
"Σ <> S ;"
|
||
|
||
"τ <> t ;"
|
||
"Τ <> T ;"
|
||
|
||
"φ <> f ;"
|
||
"Φ <> F ;"
|
||
|
||
"χ <> ch ;"
|
||
"Χ } $beforeLower <> Ch ;"
|
||
"Χ <> CH ;"
|
||
|
||
// Completeness for ASCII
|
||
|
||
// $ignore = [[:Mark:]''] * ;
|
||
|
||
"| ch < h ;"
|
||
"| k < c ;"
|
||
"| i < j ;"
|
||
"| k < q ;"
|
||
"| b < u } $vowel ;"
|
||
"| b < w } $vowel ;"
|
||
"| y < u ;"
|
||
"| y < w ;"
|
||
|
||
"| Ch < H ;"
|
||
"| K < C ;"
|
||
"| I < J ;"
|
||
"| K < Q ;"
|
||
"| B < W } $vowel ;"
|
||
"| B < U } $vowel ;"
|
||
"| Y < W ;"
|
||
"| Y < U ;"
|
||
|
||
// Completeness for Greek
|
||
|
||
"ϐ > | β ;"
|
||
"ϑ > | θ ;"
|
||
"ϒ > | Υ ;"
|
||
"ϕ > | φ ;"
|
||
"ϖ > | π ;"
|
||
|
||
"ϰ > | κ ;"
|
||
"ϱ > | ρ ;"
|
||
"ϲ > | σ ;"
|
||
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||
"ϳ > j ;"
|
||
"ϴ > | Θ ;"
|
||
"ϵ > | ε ;"
|
||
"µ > | μ ;"
|
||
|
||
// delete any trailing ' marks used for roundtripping
|
||
|
||
"< [Ππ] { \\\' } [Ss] ;"
|
||
"< [Νν] { \\\' } $egammaLike ;"
|
||
|
||
"::NFC (NFD) ;"
|
||
|
||
// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
||
":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;"
|
||
}
|
||
}
|