scuffed-code/icu4c/source/data/translit/t_Grek_Latn_UNGEGN.txt

261 lines
5.1 KiB
Plaintext
Raw Normal View History

 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2002, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
// Date: Sat Jul 27 10:31:01 2002
//--------------------------------------------------------------------
// Greek_Latin_UNGEGN
t_Grek_Latn_UNGEGN {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// For modern Greek, based on UNGEGN rules.
// Rules are predicated on running NFD first, and NFC afterwards
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
// WARNING: need to add accents to both filters ###
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ;
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
"::NFD (NFC) ;"
// Useful variables
"$lower = [[:latin:][:greek:] & [:Ll:]] ;"
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
"$accent = [[:Mn:][:Me:]] ;"
"$macron = ̄ ;"
"$ddot = ̈ ;"
"$lcgvowel = [αεηιουω] ;"
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
"$gvowel = [$lcgvowel $ucgvowel] ;"
"$lcgvowelC = [$lcgvowel $accent] ;"
"$evowel = [aeiouyAEIOUY];"
"$vowel = [ $evowel $gvowel] ;"
"$beforeLower = $accent * $lower ;"
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
"$egammaLike = [GKXCgkxc] ;"
"$smooth = ̓ ;"
"$rough = ̔ ;"
"$iotasub = ͅ ;"
"$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;"
"$under = ̱;"
"$caron = ̌;"
"$afterLetter = [:L:] [\\\'$accent]* ;"
"$beforeLetter = [\\\'$accent]* [:L:] ;"
// Fix punctuation
// preserve orginal
"\\\: <> \\\: $under ;"
"\\\? <> \\\? $under ;"
"\\\; <> \\\? ;"
"· <> \\\: ;"
// Fix any ancient characters that creep in
"͂ > ́ ;"
"̂ > ́ ;"
"̀ > ́ ;"
"$smooth > ;"
"$rough > ;"
"$iotasub > ;"
"ͺ > ;"
// need to have these up here so the rules don't mask
"η <> i $under ;"
"Η <> I $under ;"
"Ψ } $beforeLower <> Ps ;"
"Ψ <> PS ;"
"ψ <> ps ;"
"ω <> o $under ;"
"Ω <> O $under;"
// at begining or end of word, convert mp to b
"[^[:L:]$accent] { μπ > b ;"
"μπ } [^[:L:]$accent] > b ;"
"[^[:L:]$accent] { [Μμ][Ππ] > B ;"
"[Μμ][Ππ] } [^[:L:]$accent] > B ;"
"μπ < b ;"
"Μπ < B } $beforeLower ;"
"ΜΠ < B ;"
// handle diphthongs ending with upsilon
"ου <> ou ;"
"ΟΥ <> OU ;"
"Ου <> Ou ;"
"οΥ <> oU ;"
"$fmaker = [aeiAEI] $under ? ;"
"$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate
"$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;"
"υ $1 < ( $shiftForwardVowels )* v $under ;"
"$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;"
"υ $1 < ( $shiftForwardVowels )* f $under ;"
"$fmaker { Υ } $softener <> V $under ;"
"$fmaker { Υ <> U $under ;"
"υ <> y ;"
"Υ <> Y ;"
// NORMAL
"α <> a ;"
"Α <> A ;"
"β <> v ;"
"Β <> V ;"
"γ } $gammaLike <> n } $egammaLike ;"
"γ <> g ;"
"Γ } $gammaLike <> N } $egammaLike ;"
"Γ <> G ;"
"δ <> d ;"
"Δ <> D ;"
"ε <> e ;"
"Ε <> E ;"
"ζ <> z ;"
"Ζ <> Z ;"
"θ <> th ;"
"Θ } $beforeLower <> Th ;"
"Θ <> TH ;"
"ι <> i ;"
"Ι <> I ;"
"κ <> k ;"
"Κ <> K ;"
"λ <> l ;"
"Λ <> L ;"
"μ <> m ;"
"Μ <> M ;"
"ν } $gammaLike > n\\\' ;"
"ν <> n ;"
"Ν } $gammaLike <> N\\\' ;"
"Ν <> N ;"
"ξ <> x ;"
"Ξ <> X ;"
"ο <> o ;"
"Ο <> O ;"
"π <> p ;"
"Π <> P ;"
"ρ <> r ;"
"Ρ <> R ;"
"[Pp] { } ς > \\\' ;"
"[Pp] { } σ > \\\' ;"
// Caron means exception
// before a letter, initial
"ς } $beforeLetter <> s $under } $beforeLetter;"
"σ } $beforeLetter <> s } $beforeLetter;"
// otherwise, after a letter = final
"$afterLetter { σ <> $afterLetter { s $under;"
"$afterLetter { ς <> $afterLetter { s ;"
// otherwise (isolated) = initial
"ς <> s $under;"
"σ <> s ;"
"[Pp] { Σ <> \\\'S ;"
"Σ <> S ;"
"τ <> t ;"
"Τ <> T ;"
"φ <> f ;"
"Φ <> F ;"
"χ <> ch ;"
"Χ } $beforeLower <> Ch ;"
"Χ <> CH ;"
// Completeness for ASCII
// $ignore = [[:Mark:]''] * ;
"| ch < h ;"
"| k < c ;"
"| i < j ;"
"| k < q ;"
"| b < u } $vowel ;"
"| b < w } $vowel ;"
"| y < u ;"
"| y < w ;"
"| Ch < H ;"
"| K < C ;"
"| I < J ;"
"| K < Q ;"
"| B < W } $vowel ;"
"| B < U } $vowel ;"
"| Y < W ;"
"| Y < U ;"
// Completeness for Greek
"ϐ > | β ;"
"ϑ > | θ ;"
"ϒ > | Υ ;"
"ϕ > | φ ;"
"ϖ > | π ;"
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"
"µ > | μ ;"
// delete any trailing ' marks used for roundtripping
"< [Ππ] { \\\' } [Ss] ;"
"< [Νν] { \\\' } $egammaLike ;"
"::NFC (NFD) ;"
// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;"
}
}