261 lines
5.1 KiB
Plaintext
261 lines
5.1 KiB
Plaintext
|
// -*- Coding: utf-8; -*-
|
|||
|
//--------------------------------------------------------------------
|
|||
|
// Copyright (c) 1999-2002, International Business Machines
|
|||
|
// Corporation and others. All Rights Reserved.
|
|||
|
//--------------------------------------------------------------------
|
|||
|
// THIS IS A MACHINE-GENERATED FILE
|
|||
|
// Tool: dumpicurules.bat
|
|||
|
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
|
|||
|
// Date: Sat Jul 27 10:31:01 2002
|
|||
|
//--------------------------------------------------------------------
|
|||
|
|
|||
|
// Greek_Latin_UNGEGN
|
|||
|
|
|||
|
t_Grek_Latn_UNGEGN {
|
|||
|
Rule {
|
|||
|
//--------------------------------------------------------------------
|
|||
|
//--------------------------------------------------------------------
|
|||
|
//--------------------------------------------------------------------
|
|||
|
// For modern Greek, based on UNGEGN rules.
|
|||
|
|
|||
|
// Rules are predicated on running NFD first, and NFC afterwards
|
|||
|
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
|||
|
// WARNING: need to add accents to both filters ###
|
|||
|
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ;
|
|||
|
|
|||
|
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
|
|||
|
"::NFD (NFC) ;"
|
|||
|
|
|||
|
// Useful variables
|
|||
|
|
|||
|
"$lower = [[:latin:][:greek:] & [:Ll:]] ;"
|
|||
|
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
|
|||
|
"$accent = [[:Mn:][:Me:]] ;"
|
|||
|
|
|||
|
"$macron = ̄ ;"
|
|||
|
"$ddot = ̈ ;"
|
|||
|
|
|||
|
"$lcgvowel = [αεηιουω] ;"
|
|||
|
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
|||
|
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
|||
|
"$lcgvowelC = [$lcgvowel $accent] ;"
|
|||
|
|
|||
|
"$evowel = [aeiouyAEIOUY];"
|
|||
|
"$vowel = [ $evowel $gvowel] ;"
|
|||
|
|
|||
|
"$beforeLower = $accent * $lower ;"
|
|||
|
|
|||
|
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
|
|||
|
"$egammaLike = [GKXCgkxc] ;"
|
|||
|
"$smooth = ̓ ;"
|
|||
|
"$rough = ̔ ;"
|
|||
|
"$iotasub = ͅ ;"
|
|||
|
|
|||
|
"$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;"
|
|||
|
|
|||
|
"$under = ̱;"
|
|||
|
|
|||
|
"$caron = ̌;"
|
|||
|
|
|||
|
"$afterLetter = [:L:] [\\\'$accent]* ;"
|
|||
|
"$beforeLetter = [\\\'$accent]* [:L:] ;"
|
|||
|
|
|||
|
// Fix punctuation
|
|||
|
|
|||
|
// preserve orginal
|
|||
|
"\\\: <> \\\: $under ;"
|
|||
|
"\\\? <> \\\? $under ;"
|
|||
|
|
|||
|
"\\\; <> \\\? ;"
|
|||
|
"· <> \\\: ;"
|
|||
|
|
|||
|
// Fix any ancient characters that creep in
|
|||
|
|
|||
|
"͂ > ́ ;"
|
|||
|
"̂ > ́ ;"
|
|||
|
"̀ > ́ ;"
|
|||
|
"$smooth > ;"
|
|||
|
"$rough > ;"
|
|||
|
"$iotasub > ;"
|
|||
|
"ͺ > ;"
|
|||
|
|
|||
|
// need to have these up here so the rules don't mask
|
|||
|
|
|||
|
"η <> i $under ;"
|
|||
|
"Η <> I $under ;"
|
|||
|
|
|||
|
"Ψ } $beforeLower <> Ps ;"
|
|||
|
"Ψ <> PS ;"
|
|||
|
"ψ <> ps ;"
|
|||
|
|
|||
|
"ω <> o $under ;"
|
|||
|
"Ω <> O $under;"
|
|||
|
|
|||
|
// at begining or end of word, convert mp to b
|
|||
|
|
|||
|
"[^[:L:]$accent] { μπ > b ;"
|
|||
|
"μπ } [^[:L:]$accent] > b ;"
|
|||
|
"[^[:L:]$accent] { [Μμ][Ππ] > B ;"
|
|||
|
"[Μμ][Ππ] } [^[:L:]$accent] > B ;"
|
|||
|
|
|||
|
"μπ < b ;"
|
|||
|
"Μπ < B } $beforeLower ;"
|
|||
|
"ΜΠ < B ;"
|
|||
|
|
|||
|
// handle diphthongs ending with upsilon
|
|||
|
|
|||
|
"ου <> ou ;"
|
|||
|
"ΟΥ <> OU ;"
|
|||
|
"Ου <> Ou ;"
|
|||
|
"οΥ <> oU ;"
|
|||
|
|
|||
|
"$fmaker = [aeiAEI] $under ? ;"
|
|||
|
"$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate
|
|||
|
|
|||
|
"$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;"
|
|||
|
"υ $1 < ( $shiftForwardVowels )* v $under ;"
|
|||
|
|
|||
|
"$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;"
|
|||
|
"υ $1 < ( $shiftForwardVowels )* f $under ;"
|
|||
|
|
|||
|
"$fmaker { Υ } $softener <> V $under ;"
|
|||
|
"$fmaker { Υ <> U $under ;"
|
|||
|
|
|||
|
"υ <> y ;"
|
|||
|
"Υ <> Y ;"
|
|||
|
|
|||
|
// NORMAL
|
|||
|
|
|||
|
"α <> a ;"
|
|||
|
"Α <> A ;"
|
|||
|
|
|||
|
"β <> v ;"
|
|||
|
"Β <> V ;"
|
|||
|
|
|||
|
"γ } $gammaLike <> n } $egammaLike ;"
|
|||
|
"γ <> g ;"
|
|||
|
"Γ } $gammaLike <> N } $egammaLike ;"
|
|||
|
"Γ <> G ;"
|
|||
|
|
|||
|
"δ <> d ;"
|
|||
|
"Δ <> D ;"
|
|||
|
|
|||
|
"ε <> e ;"
|
|||
|
"Ε <> E ;"
|
|||
|
|
|||
|
"ζ <> z ;"
|
|||
|
"Ζ <> Z ;"
|
|||
|
|
|||
|
"θ <> th ;"
|
|||
|
"Θ } $beforeLower <> Th ;"
|
|||
|
"Θ <> TH ;"
|
|||
|
|
|||
|
"ι <> i ;"
|
|||
|
"Ι <> I ;"
|
|||
|
|
|||
|
"κ <> k ;"
|
|||
|
"Κ <> K ;"
|
|||
|
|
|||
|
"λ <> l ;"
|
|||
|
"Λ <> L ;"
|
|||
|
|
|||
|
"μ <> m ;"
|
|||
|
"Μ <> M ;"
|
|||
|
|
|||
|
"ν } $gammaLike > n\\\' ;"
|
|||
|
"ν <> n ;"
|
|||
|
"Ν } $gammaLike <> N\\\' ;"
|
|||
|
"Ν <> N ;"
|
|||
|
|
|||
|
"ξ <> x ;"
|
|||
|
"Ξ <> X ;"
|
|||
|
|
|||
|
"ο <> o ;"
|
|||
|
"Ο <> O ;"
|
|||
|
|
|||
|
"π <> p ;"
|
|||
|
"Π <> P ;"
|
|||
|
|
|||
|
"ρ <> r ;"
|
|||
|
"Ρ <> R ;"
|
|||
|
|
|||
|
"[Pp] { } ς > \\\' ;"
|
|||
|
"[Pp] { } σ > \\\' ;"
|
|||
|
|
|||
|
// Caron means exception
|
|||
|
|
|||
|
// before a letter, initial
|
|||
|
"ς } $beforeLetter <> s $under } $beforeLetter;"
|
|||
|
"σ } $beforeLetter <> s } $beforeLetter;"
|
|||
|
|
|||
|
// otherwise, after a letter = final
|
|||
|
"$afterLetter { σ <> $afterLetter { s $under;"
|
|||
|
"$afterLetter { ς <> $afterLetter { s ;"
|
|||
|
|
|||
|
// otherwise (isolated) = initial
|
|||
|
"ς <> s $under;"
|
|||
|
"σ <> s ;"
|
|||
|
|
|||
|
"[Pp] { Σ <> \\\'S ;"
|
|||
|
"Σ <> S ;"
|
|||
|
|
|||
|
"τ <> t ;"
|
|||
|
"Τ <> T ;"
|
|||
|
|
|||
|
"φ <> f ;"
|
|||
|
"Φ <> F ;"
|
|||
|
|
|||
|
"χ <> ch ;"
|
|||
|
"Χ } $beforeLower <> Ch ;"
|
|||
|
"Χ <> CH ;"
|
|||
|
|
|||
|
// Completeness for ASCII
|
|||
|
|
|||
|
// $ignore = [[:Mark:]''] * ;
|
|||
|
|
|||
|
"| ch < h ;"
|
|||
|
"| k < c ;"
|
|||
|
"| i < j ;"
|
|||
|
"| k < q ;"
|
|||
|
"| b < u } $vowel ;"
|
|||
|
"| b < w } $vowel ;"
|
|||
|
"| y < u ;"
|
|||
|
"| y < w ;"
|
|||
|
|
|||
|
"| Ch < H ;"
|
|||
|
"| K < C ;"
|
|||
|
"| I < J ;"
|
|||
|
"| K < Q ;"
|
|||
|
"| B < W } $vowel ;"
|
|||
|
"| B < U } $vowel ;"
|
|||
|
"| Y < W ;"
|
|||
|
"| Y < U ;"
|
|||
|
|
|||
|
// Completeness for Greek
|
|||
|
|
|||
|
"ϐ > | β ;"
|
|||
|
"ϑ > | θ ;"
|
|||
|
"ϒ > | Υ ;"
|
|||
|
"ϕ > | φ ;"
|
|||
|
"ϖ > | π ;"
|
|||
|
|
|||
|
"ϰ > | κ ;"
|
|||
|
"ϱ > | ρ ;"
|
|||
|
"ϲ > | σ ;"
|
|||
|
"ϳ > j ;"
|
|||
|
"ϴ > | Θ ;"
|
|||
|
"ϵ > | ε ;"
|
|||
|
"µ > | μ ;"
|
|||
|
|
|||
|
// delete any trailing ' marks used for roundtripping
|
|||
|
|
|||
|
"< [Ππ] { \\\' } [Ss] ;"
|
|||
|
"< [Νν] { \\\' } $egammaLike ;"
|
|||
|
|
|||
|
"::NFC (NFD) ;"
|
|||
|
|
|||
|
// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
|||
|
":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;"
|
|||
|
}
|
|||
|
}
|