// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2004, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt // Date: Tue May 18 17:24:48 2004 //-------------------------------------------------------------------- // Greek_Latin_UNGEGN t_Grek_Latn_UNGEGN { Rule { //-------------------------------------------------------------------- //-------------------------------------------------------------------- //-------------------------------------------------------------------- // For modern Greek, based on UNGEGN rules. // Rules are predicated on running NFD first, and NFC afterwards // MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN // WARNING: need to add accents to both filters ### // :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; ":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;" "::NFD (NFC) ;" // Useful variables "$lower = [[:latin:][:greek:] & [:Ll:]] ;" "$upper = [[:latin:][:greek:] & [:Lu:]] ;" "$accent = [[:Mn:][:Me:]] ;" "$macron = ̄ ;" "$ddot = ̈ ;" "$lcgvowel = [αεηιουω] ;" "$ucgvowel = [ΑΕΗΙΟΥΩ] ;" "$gvowel = [$lcgvowel $ucgvowel] ;" "$lcgvowelC = [$lcgvowel $accent] ;" "$evowel = [aeiouyAEIOUY];" "$vowel = [ $evowel $gvowel] ;" "$beforeLower = $accent * $lower ;" "$gammaLike = [ΓΚΞΧγκξχϰ] ;" "$egammaLike = [GKXCgkxc] ;" "$smooth = ̓ ;" "$rough = ̔ ;" "$iotasub = ͅ ;" "$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;" "$under = ̱;" "$caron = ̌;" "$afterLetter = [:L:] [\\\'$accent]* ;" "$beforeLetter = [\\\'$accent]* [:L:] ;" // Fix punctuation // preserve orginal "\\\: <> \\\: $under ;" "\\\? <> \\\? $under ;" "\\\; <> \\\? ;" "· <> \\\: ;" // Fix any ancient characters that creep in "͂ > ́ ;" "̂ > ́ ;" "̀ > ́ ;" "$smooth > ;" "$rough > ;" "$iotasub > ;" "ͺ > ;" // need to have these up here so the rules don't mask "η <> i $under ;" "Η <> I $under ;" "Ψ } $beforeLower <> Ps ;" "Ψ <> PS ;" "ψ <> ps ;" "ω <> o $under ;" "Ω <> O $under;" // at begining or end of word, convert mp to b "[^[:L:]$accent] { μπ > b ;" "μπ } [^[:L:]$accent] > b ;" "[^[:L:]$accent] { [Μμ][Ππ] > B ;" "[Μμ][Ππ] } [^[:L:]$accent] > B ;" "μπ < b ;" "Μπ < B } $beforeLower ;" "ΜΠ < B ;" // handle diphthongs ending with upsilon "ου <> ou ;" "ΟΥ <> OU ;" "Ου <> Ou ;" "οΥ <> oU ;" "$fmaker = [aeiAEI] $under ? ;" "$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate "$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;" "υ $1 < ( $shiftForwardVowels )* v $under ;" "$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;" "υ $1 < ( $shiftForwardVowels )* f $under ;" "$fmaker { Υ } $softener <> V $under ;" "$fmaker { Υ <> U $under ;" "υ <> y ;" "Υ <> Y ;" // NORMAL "α <> a ;" "Α <> A ;" "β <> v ;" "Β <> V ;" "γ } $gammaLike <> n } $egammaLike ;" "γ <> g ;" "Γ } $gammaLike <> N } $egammaLike ;" "Γ <> G ;" "δ <> d ;" "Δ <> D ;" "ε <> e ;" "Ε <> E ;" "ζ <> z ;" "Ζ <> Z ;" "θ <> th ;" "Θ } $beforeLower <> Th ;" "Θ <> TH ;" "ι <> i ;" "Ι <> I ;" "κ <> k ;" "Κ <> K ;" "λ <> l ;" "Λ <> L ;" "μ <> m ;" "Μ <> M ;" "ν } $gammaLike > n\\\' ;" "ν <> n ;" "Ν } $gammaLike <> N\\\' ;" "Ν <> N ;" "ξ <> x ;" "Ξ <> X ;" "ο <> o ;" "Ο <> O ;" "π <> p ;" "Π <> P ;" "ρ <> r ;" "Ρ <> R ;" // insert separator before things that turn into s "[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;" // special S variants "Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L "ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L "Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L "ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L // Caron means exception // before a letter, initial "ς } $beforeLetter <> s $under } $beforeLetter;" "σ } $beforeLetter <> s } $beforeLetter;" // otherwise, after a letter = final "$afterLetter { σ <> $afterLetter { s $under;" "$afterLetter { ς <> $afterLetter { s ;" // otherwise (isolated) = initial "ς <> s $under;" "σ <> s ;" // [Pp] { Σ <> \\\'S ; "Σ <> S ;" "τ <> t ;" "Τ <> T ;" "φ <> f ;" "Φ <> F ;" "χ <> ch ;" "Χ } $beforeLower <> Ch ;" "Χ <> CH ;" // Completeness for ASCII // $ignore = [[:Mark:]''] * ; "| ch < h ;" "| k < c ;" "| i < j ;" "| k < q ;" "| b < u } $vowel ;" "| b < w } $vowel ;" "| y < u ;" "| y < w ;" "| Ch < H ;" "| K < C ;" "| I < J ;" "| K < Q ;" "| B < W } $vowel ;" "| B < U } $vowel ;" "| Y < W ;" "| Y < U ;" // Completeness for Greek "ϐ > | β ;" "ϑ > | θ ;" "ϒ > | Υ ;" "ϕ > | φ ;" "ϖ > | π ;" "ϰ > | κ ;" "ϱ > | ρ ;" "ϲ > | σ ;" "Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL "ϳ > j ;" "ϴ > | Θ ;" "ϵ > | ε ;" "µ > | μ ;" // delete any trailing ' marks used for roundtripping "< [Ππ] { \\\' } [Ss] ;" "< [Νν] { \\\' } $egammaLike ;" "::NFC (NFD) ;" // MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD ":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;" } }