// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2004, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: C:\work\DevICU4J\icu4j\src\com\ibm\icu\impl\data/Transliterator_Greek_Latin.txt // Date: Fri Mar 1 16:15:45 2002 //-------------------------------------------------------------------- // Greek_Latin t_Grek_Latn { Rule { //-------------------------------------------------------------------- //-------------------------------------------------------------------- //-------------------------------------------------------------------- // Rules are predicated on running NFD first, and NFC afterwards // :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ; // MINIMAL FILTER GENERATED FOR: Greek-Latin ":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126] ;" ":: NFD (NFC) ;" // TEST CASES // Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος // ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ // ᾳ ῃ ῳ ὃ ὄ // ὠς ὡς ὢς ὣς // Ὠς Ὡς Ὢς Ὣς // ὨΣ ὩΣ ὪΣ ὫΣ // Ạ, ạ, Ẹ, ẹ, Ọ, ọ // Useful variables "$lower = [[:latin:][:greek:] & [:Ll:]];" "$glower = [[:greek:] & [:Ll:]];" "$upper = [[:latin:][:greek:] & [:Lu:]] ;" "$accent = [:M:] ;" // NOTE: restrict to just the Greek & Latin accents that we care about // TODO: broaden out once interation is fixed "$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;" "$macron = \u0304 ;" "$ddot = \u0308 ;" "$ddotmac = [$ddot$macron];" "$lcgvowel = [αεηιουω] ;" "$ucgvowel = [ΑΕΗΙΟΥΩ] ;" "$gvowel = [$lcgvowel $ucgvowel] ;" "$lcgvowelC = [$lcgvowel $accent] ;" "$evowel = [aeiouyAEIOUY];" "$vowel = [ $evowel $gvowel] ;" "$gammaLike = [ΓΚΞΧγκξχϰ] ;" "$egammaLike = [GKXCgkxc] ;" "$smooth = ̓ ;" "$rough = ̔ ;" "$iotasub = ͅ ;" "$evowel_i = [$evowel-[iI]] ;" "$underbar = \u0331;" "$afterLetter = [:L:] [[:M:]\\\']* ;" "$beforeLetter = [[:M:]\\\']* [:L:] ;" "$beforeLower = $accent * $lower ;" "$notLetter = [^[:L:][:M:]] ;" // Fix punctuation "\\\; <> \\\? ;" "· <> \\\: ;" // CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve "\u0342 <> \u0302 ;" // IOTA: convert iota subscript to iota // first make previous alpha long! "$accent_minus = [[$accent]-[$iotasub$macron]];" "Α } $accent_minus * $iotasub > | Α $macron ;" "α } $accent_minus * $iotasub > | α $macron ;" // now convert to uppercase if after uppercase, ow to lowercase "$upper $accent * { $iotasub > I ;" "$iotasub > i ;" "| $1 $iotasub < ($evowel $macron $accentMinus *) i ;" "| $1 $iotasub < ($evowel $macron $accentMinus *) I ;" // BREATHING // Convert rough breathing to h, and move before letters. // Make A ` x = > H a x "Α ($macron?) $rough } $beforeLower > H | α $1;" "Ε $rough } $beforeLower > H | ε;" "Η $rough } $beforeLower > H | η ;" "Ι ($ddot?) $rough } $beforeLower > H | ι $1;" "Ο $rough } $beforeLower > H | ο ;" "Υ $rough } $beforeLower > H | υ ;" "Ω ($ddot?) $rough } $beforeLower > H | ω $1;" // Make A x ` = > H a x "Α ($glower $macron?) $rough > H | α $1 ;" "Ε ($glower) $rough > H | ε $1 ;" "Η ($glower) $rough > H | η $1 ;" "Ι ($glower $ddot?) $rough > H | ι $1 ;" "Ο ($glower) $rough > H | ο $1 ;" "Υ ($glower) $rough > H | υ $1 ;" "Ω ($glower $ddot?) $rough > H | ω $1 ;" //Otherwise, make x ` into h x and X ` into H X "($lcgvowel + $ddotmac? ) $rough > h | $1 ;" "($gvowel + $ddotmac? ) $rough > H | $1 ;" // Go backwards with H "| $1 $rough < h ($evowel $macron $ddot? $evowel_i $macron?) ;" "| $1 $rough < h ($evowel $ddot? $evowel $macron?) ;" "| $1 $rough < h ($evowel $macron? $ddot?) ;" "| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel_i $macron?) ;" "| $1 $rough < H ([AEIOUY] $ddot? $evowel $macron?) ;" "| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;" // titlecase, have to fix individually // in the future, we should add &uppercase() to make this easier "| A $1 $rough < H a ($macron $ddot? $evowel_i $macron?) ;" "| E $1 $rough < H e ($macron $ddot? $evowel_i $macron?) ;" "| I $1 $rough < H i ($macron $ddot? $evowel_i $macron?) ;" "| O $1 $rough < H o ($macron $ddot? $evowel_i $macron?) ;" "| U $1 $rough < H u ($macron $ddot? $evowel_i $macron?) ;" "| Y $1 $rough < H y ($macron $ddot? $evowel_i $macron?) ;" "| A $1 $rough < H a ($ddot? $evowel $macron?) ;" "| E $1 $rough < H e ($ddot? $evowel $macron?) ;" "| I $1 $rough < H i ($ddot? $evowel $macron?) ;" "| O $1 $rough < H o ($ddot? $evowel $macron?) ;" "| U $1 $rough < H u ($ddot? $evowel $macron?) ;" "| Y $1 $rough < H y ($ddot? $evowel $macron?) ;" "| A $1 $rough < H a ($macron? $ddot? ) ;" "| E $1 $rough < H e ($macron? $ddot? ) ;" "| I $1 $rough < H i ($macron? $ddot? ) ;" "| O $1 $rough < H o ($macron? $ddot? ) ;" "| U $1 $rough < H u ($macron? $ddot? ) ;" "| Y $1 $rough < H y ($macron? $ddot? ) ;" // Now do smooth //delete smooth breathing for Latin "$smooth > ;" // insert in Greek // the assumption is that all Marks are on letters. "| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;" "| $1 $smooth < $notLetter { ($evowel $macron? $evowel $macron?) } [^$smooth$rough] ;" "| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel$smooth$rough] ;" // TODO: preserve smooth/rough breathing if not // on initial vowel sequence // need to have these up here so the rules don't mask // remove now superfluous macron when returning "Α < A $macron ;" "α < a $macron ;" "η <> e $macron ;" "Η <> E $macron ;" "φ <> ph ;" "Ψ } $beforeLower <> Ps ;" "Ψ <> PS ;" "Φ } $beforeLower <> Ph ;" "Φ <> PH ;" "ψ <> ps ;" "ω <> o $macron ;" "Ω <> O $macron;" // NORMAL "α <> a ;" "Α <> A ;" "β <> b ;" "Β <> B ;" "γ } $gammaLike <> n } $egammaLike ;" "γ <> g ;" "Γ } $gammaLike <> N } $egammaLike ;" "Γ <> G ;" "δ <> d ;" "Δ <> D ;" "ε <> e ;" "Ε <> E ;" "ζ <> z ;" "Ζ <> Z ;" "θ <> th ;" "Θ } $beforeLower <> Th ;" "Θ <> TH ;" "ι <> i ;" "Ι <> I ;" "κ <> k ;" "Κ <> K ;" "λ <> l ;" "Λ <> L ;" "μ <> m ;" "Μ <> M ;" "ν } $gammaLike > n\\\' ;" "ν <> n ;" "Ν } $gammaLike <> N\\\' ;" "Ν <> N ;" "ξ <> x ;" "Ξ <> X ;" "ο <> o ;" "Ο <> O ;" "π <> p ;" "Π <> P ;" "ρ $rough <> rh;" "Ρ $rough } $beforeLower <> Rh ;" "Ρ $rough <> RH ;" "ρ <> r ;" "Ρ <> R ;" // insert separator "[Pp] { } ς > \\\' ;" "[Pp] { } σ > \\\' ;" // underbar means exception // before a letter, initial "ς } $beforeLetter <> s $underbar } $beforeLetter;" "σ } $beforeLetter <> s } $beforeLetter;" // otherwise, after a letter = final "$afterLetter { σ <> $afterLetter { s $underbar;" "$afterLetter { ς <> $afterLetter { s ;" // otherwise (isolated) = initial "ς <> s $underbar;" "σ <> s ;" "[Pp] { Σ <> \\\'S ;" "Σ <> S ;" "τ <> t ;" "Τ <> T ;" "$vowel {υ } <> u ;" "υ <> y ;" "$vowel { Υ <> U ;" "Υ <> Y ;" "χ <> ch ;" "Χ } $beforeLower <> Ch ;" "Χ <> CH ;" // Completeness for ASCII "$ignore = [[:Mark:]''] * ;" "| k < c ;" "| ph < f ;" "| i < j ;" "| k < q ;" "| b < v } $vowel ;" "| b < w } $vowel;" "| u < v ;" "| u < w;" "| K < C ;" "| Ph < F ;" "| I < J ;" "| K < Q ;" "| B < V } $vowel ;" "| B < W } $vowel ;" "| U < V ;" "| U < W ;" "$rough } $ignore [:UppercaseLetter:] > H ;" "$ignore [:UppercaseLetter:] { $rough > H ;" "$rough < H ;" "$rough <> h ;" // Completeness for Greek "ϐ > | β ;" "ϑ > | θ ;" "ϒ > | Υ ;" "ϕ > | φ ;" "ϖ > | π ;" "ϰ > | κ ;" "ϱ > | ρ ;" "ϲ > | σ ;" "ϳ > j ;" "ϴ > | Θ ;" "ϵ > | ε ;" "µ > | μ ;" "ͺ > i;" // delete any trailing ' marks used for roundtripping "< [Ππ] { \\\' } [Ss] ;" "< [Νν] { \\\' } $egammaLike ;" "::NFC (NFD) ;" // ([\\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; // ([\\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ; // MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD ":: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;" } }