// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2001, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../text/resources/Transliterator_Greek_Latin.txt // Date: Mon Nov 19 12:15:35 2001 //-------------------------------------------------------------------- // Greek_Latin translit_Greek_Latin { Rule { //-------------------------------------------------------------------- // Copyright (c) 1999-2001, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Grek_Latn.txt,v $ // $Date: 2001/11/19 22:23:33 $ // $Revision: 1.4 $ //-------------------------------------------------------------------- // Rules are predicated on running NFD first, and NFC afterwards ":: [\\u0000-\u007F \\\00B7 [:Greek:] [:Mark:]] ;" ":: NFD (NFC) ;" // TEST CASES // Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος // ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ // ᾳ ῃ ῳ ὃ ὄ // ὠς ὡς ὢς ὣς // Ὠς Ὡς Ὢς Ὣς // ὨΣ ὩΣ ὪΣ ὫΣ // Ạ, ạ, Ẹ, ẹ, Ọ, ọ // Useful variables "$lower = [:Ll:] ;" "$upper = [:Lu:] ;" "$accent = [:M:] ;" "$macron = \u0304 ;" "$ddot = \u0308 ;" "$ddotmac = [$ddot$macron];" "$lcgvowel = [αεηιουω] ;" "$ucgvowel = [ΑΕΗΙΟΥΩ] ;" "$gvowel = [$lcgvowel $ucgvowel] ;" "$lcgvowelC = [$lcgvowel $accent] ;" "$evowel = [aeiouyAEIOUY];" "$vowel = [ $evowel $gvowel] ;" "$beforeLower = $accent * $lower ;" "$gammaLike = [ΓΚΞΧγκξχϰ] ;" "$egammaLike = [GKXCgkxc] ;" "$smooth = ̓ ;" "$rough = ̔ ;" "$iotasub = ͅ ;" "$evowel_i = [$evowel-[iI]] ;" "$caron = \u030C;" "$afterLetter = [:^L:] [\\\'[:M:]]* ;" "$beforeLetter = [\\\'[:M:]]* [:^L:] ;" // Fix punctuation "\\\; <> \\\? ;" "· <> \\\: ;" // CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve "\u0342 <> \u0302 ;" // IOTA: convert iota subscript to iota // first make previous alpha long! "$accent_minus = [[$accent]-[$iotasub$macron]];" "Α } $accent_minus * $iotasub > | Α $macron ;" "α } $accent_minus * $iotasub > | α $macron ;" // now convert to uppercase if after uppercase, ow to lowercase "$upper $accent * { $iotasub > I ;" "$iotasub > i ;" "| $1 $iotasub < ([:L:] $macron [:M:]*) i ;" "| $1 $iotasub < ([:L:] $macron [:M:]*) I ;" // BREATHING // Convert rough breathing to h, and move before letters. // Make A ` x = > H a x "Α ($macron?) $rough } $beforeLower > H | α $1;" "Ε $rough } $beforeLower > H | ε;" "Η $rough } $beforeLower > H | η ;" "Ι ($ddot?) $rough } $beforeLower > H | ι $1;" "Ο $rough } $beforeLower > H | ο ;" "Υ $rough } $beforeLower > H | υ ;" "Ω ($ddot?) $rough } $beforeLower > H | ω $1;" // Make A x ` = > H a x "Α ($lower $macron?) $rough > H | α $1 ;" "Ε ($lower) $rough > H | ε $1 ;" "Η ($lower) $rough > H | η $1 ;" "Ι ($lower $ddot?) $rough > H | ι $1 ;" "Ο ($lower) $rough > H | ο $1 ;" "Υ ($lower) $rough > H | υ $1 ;" "Ω ($lower $ddot?) $rough > H | ω $1 ;" //Otherwise, make x ` into h x and X ` into H X "($lcgvowel + $ddotmac? ) $rough > h | $1 ;" "($gvowel + $ddotmac? ) $rough > H | $1 ;" // Go backwards with H "| $1 $rough < h ($evowel $macron $ddot? $evowel_i $macron?) ;" "| $1 $rough < h ($evowel $ddot? $evowel $macron?) ;" "| $1 $rough < h ($evowel $macron? $ddot?) ;" "| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel_i $macron?) ;" "| $1 $rough < H ([AEIOUY] $ddot? $evowel $macron?) ;" "| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;" // titlecase, have to fix individually // in the future, we should add &uppercase() to make this easier "| A $1 $rough < H a ($macron $ddot? $evowel_i $macron?) ;" "| E $1 $rough < H e ($macron $ddot? $evowel_i $macron?) ;" "| I $1 $rough < H i ($macron $ddot? $evowel_i $macron?) ;" "| O $1 $rough < H o ($macron $ddot? $evowel_i $macron?) ;" "| U $1 $rough < H u ($macron $ddot? $evowel_i $macron?) ;" "| Y $1 $rough < H y ($macron $ddot? $evowel_i $macron?) ;" "| A $1 $rough < H a ($ddot? $evowel $macron?) ;" "| E $1 $rough < H e ($ddot? $evowel $macron?) ;" "| I $1 $rough < H i ($ddot? $evowel $macron?) ;" "| O $1 $rough < H o ($ddot? $evowel $macron?) ;" "| U $1 $rough < H u ($ddot? $evowel $macron?) ;" "| Y $1 $rough < H y ($ddot? $evowel $macron?) ;" "| A $1 $rough < H a ($macron? $ddot? ) ;" "| E $1 $rough < H e ($macron? $ddot? ) ;" "| I $1 $rough < H i ($macron? $ddot? ) ;" "| O $1 $rough < H o ($macron? $ddot? ) ;" "| U $1 $rough < H u ($macron? $ddot? ) ;" "| Y $1 $rough < H y ($macron? $ddot? ) ;" // Now do smooth //delete smooth breathing for Latin "$smooth > ;" // insert in Greek // the assumption is that all Marks are on letters. "| $1 $smooth < [^[:L:][:M:]] { ([rR]) } [^hH$smooth$rough] ;" "| $1 $smooth < [^[:L:][:M:]] { ($evowel $macron? $evowel $macron?) } [^$smooth$rough] ;" "| $1 $smooth < [^[:L:][:M:]] { ($evowel $macron?) } [^$evowel$smooth$rough] ;" // TODO: preserve smooth/rough breathing if not // on initial vowel sequence // need to have these up here so the rules don't mask // remove now superfluous macron when returning "Α < A $macron ;" "α < a $macron ;" "η <> e $macron ;" "Η <> E $macron ;" "φ <> ph ;" "Ψ } $beforeLower <> Ps ;" "Ψ <> PS ;" "Φ } $beforeLower <> Ph ;" "Φ <> PH ;" "ψ <> ps ;" "ω <> o $macron ;" "Ω <> O $macron;" // NORMAL "α <> a ;" "Α <> A ;" "β <> b ;" "Β <> B ;" "γ } $gammaLike <> n } $egammaLike ;" "γ <> g ;" "Γ } $gammaLike <> N } $egammaLike ;" "Γ <> G ;" "δ <> d ;" "Δ <> D ;" "ε <> e ;" "Ε <> E ;" "ζ <> z ;" "Ζ <> Z ;" "θ <> th ;" "Θ } $beforeLower <> Th ;" "Θ <> TH ;" "ι <> i ;" "Ι <> I ;" "κ <> k ;" "Κ <> K ;" "λ <> l ;" "Λ <> L ;" "μ <> m ;" "Μ <> M ;" "ν } $gammaLike > n\\\' ;" "ν <> n ;" "Ν } $gammaLike <> N\\\' ;" "Ν <> N ;" "ξ <> x ;" "Ξ <> X ;" "ο <> o ;" "Ο <> O ;" "π <> p ;" "Π <> P ;" "ρ $rough <> rh;" "Ρ $rough } $beforeLower <> Rh ;" "Ρ $rough <> RH ;" "ρ <> r ;" "Ρ <> R ;" // insert separator "[Pp] { } ς > \\\' ;" "[Pp] { } σ > \\\' ;" // Caron means exception // before a letter, initial "ς } $beforeLetter <> s $caron } $beforeLetter;" "σ } $beforeLetter <> s } $beforeLetter;" // otherwise, after a letter = final "$afterLetter { σ <> $afterLetter { s $caron;" "$afterLetter { ς <> $afterLetter { s ;" // otherwise (isolated) = initial "ς <> s $caron;" "σ <> s ;" "[Pp] { Σ <> \\\'S ;" "Σ <> S ;" "τ <> t ;" "Τ <> T ;" "$vowel {υ } <> u ;" "υ <> y ;" "$vowel { Υ <> U ;" "Υ <> Y ;" "χ <> ch ;" "Χ } $beforeLower <> Ch ;" "Χ <> CH ;" // Completeness for ASCII "$ignore = [[:Mark:]''] * ;" "| k < c ;" "| ph < f ;" "| i < j ;" "| k < q ;" "| u < v ;" "| u < w ;" "| K < C ;" "| PH < F } $ignore [:UppercaseLetter:] ;" "| PH < [:UppercaseLetter:] $ignore { F ;" "| PH < F ;" "| I < J ;" "| K < Q ;" "| U < V ;" "| U < W ;" "$rough } $ignore [:UppercaseLetter:] > H ;" "$ignore [:UppercaseLetter:] { $rough > H ;" "$rough < H ;" "$rough <> h ;" // Completeness for Greek "ϐ > | β ;" "ϑ > | θ ;" "ϒ > | Υ ;" "ϕ > | φ ;" "ϖ > | π ;" "ϰ > | κ ;" "ϱ > | ρ ;" "ϲ > | σ ;" "ϳ > j ;" "ϴ > | Θ ;" "ϵ > | ε ;" "µ > | μ ;" "ͺ > i;" // delete any trailing ' marks used for roundtripping "< [Ππ] { \\\' } [Ss] ;" "< [Νν] { \\\' } $egammaLike ;" "::NFC (NFD) ;" ":: ([\\u0000-\u007F [:Latin:] [:Mark:]]) ;" } }