9cc0f7b308
X-SVN-Rev: 15395
362 lines
10 KiB
Plaintext
362 lines
10 KiB
Plaintext
// -*- Coding: utf-8; -*-
|
||
//--------------------------------------------------------------------
|
||
// Copyright (c) 1999-2004, International Business Machines
|
||
// Corporation and others. All Rights Reserved.
|
||
//--------------------------------------------------------------------
|
||
// THIS IS A MACHINE-GENERATED FILE
|
||
// Tool: dumpICUrules.bat
|
||
// Source: ../../../impl/data/Transliterator_Greek_Latin.txt
|
||
// Date: Tue May 18 17:24:48 2004
|
||
//--------------------------------------------------------------------
|
||
|
||
// Greek_Latin
|
||
|
||
t_Grek_Latn {
|
||
Rule {
|
||
//--------------------------------------------------------------------
|
||
//--------------------------------------------------------------------
|
||
//--------------------------------------------------------------------
|
||
|
||
// Rules are predicated on running NFD first, and NFC afterwards
|
||
// :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
|
||
// MINIMAL FILTER GENERATED FOR: Greek-Latin
|
||
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;"
|
||
|
||
":: NFD (NFC) ;"
|
||
|
||
// TEST CASES
|
||
|
||
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
||
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
||
// ᾳ ῃ ῳ ὃ ὄ
|
||
// ὠς ὡς ὢς ὣς
|
||
// Ὠς Ὡς Ὢς Ὣς
|
||
// ὨΣ ὩΣ ὪΣ ὫΣ
|
||
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
||
|
||
// Useful variables
|
||
|
||
"$lower = [[:latin:][:greek:] & [:Ll:]];"
|
||
"$glower = [[:greek:] & [:Ll:]];"
|
||
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
|
||
"$accent = [:M:] ;"
|
||
|
||
// NOTE: restrict to just the Greek & Latin accents that we care about
|
||
// TODO: broaden out once interation is fixed
|
||
"$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;"
|
||
|
||
"$macron = \u0304 ;"
|
||
"$ddot = \u0308 ;"
|
||
"$ddotmac = [$ddot$macron];"
|
||
|
||
"$lcgvowel = [αεηιουω] ;"
|
||
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
||
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
||
"$lcgvowelC = [$lcgvowel $accent] ;"
|
||
|
||
"$evowel = [aeiouyAEIOUY];"
|
||
"$evowel2 = [iuyIUY];"
|
||
"$vowel = [ $evowel $gvowel] ;"
|
||
|
||
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
|
||
"$egammaLike = [GKXCgkxc] ;"
|
||
"$smooth = ̓ ;"
|
||
"$rough = ̔ ;"
|
||
"$iotasub = ͅ ;"
|
||
|
||
"$evowel_i = [$evowel-[iI]] ;"
|
||
"$evowel2_i = [uyUY];"
|
||
|
||
"$underbar = \u0331;"
|
||
|
||
"$afterLetter = [:L:] [[:M:]\\\']* ;"
|
||
"$beforeLetter = [[:M:]\\\']* [:L:] ;"
|
||
"$beforeLower = $accent * $lower ;"
|
||
|
||
"$notLetter = [^[:L:][:M:]] ;"
|
||
"$under = ̱;"
|
||
|
||
// Fix punctuation
|
||
// preserve original
|
||
"\\\: <> \\\: $under ;"
|
||
"\\\? <> \\\? $under ;"
|
||
|
||
"\\\; <> \\\? ;"
|
||
"· <> \\\: ;"
|
||
|
||
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||
|
||
"\u0342 <> \u0302 ;"
|
||
|
||
// IOTA: convert iota subscript to iota
|
||
// first make previous alpha long!
|
||
|
||
"$accent_minus = [[$accent]-[$iotasub$macron]];"
|
||
|
||
"Α } $accent_minus * $iotasub > | Α $macron ;"
|
||
"α } $accent_minus * $iotasub > | α $macron ;"
|
||
|
||
// now convert to uppercase if after uppercase, ow to lowercase
|
||
|
||
"$upper $accent * { $iotasub > I ;"
|
||
"$iotasub > i ;"
|
||
|
||
"| $1 $iotasub < ($evowel $macron $accentMinus *) i ;"
|
||
"| $1 $iotasub < ($evowel $macron $accentMinus *) I ;"
|
||
|
||
// BREATHING
|
||
|
||
// Convert rough breathing to h, and move before letters.
|
||
|
||
// Make A ` x = > H a x
|
||
|
||
"Α ($macron?) $rough } $beforeLower > H | α $1;"
|
||
"Ε $rough } $beforeLower > H | ε;"
|
||
"Η $rough } $beforeLower > H | η ;"
|
||
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
|
||
"Ο $rough } $beforeLower > H | ο ;"
|
||
"Υ $rough } $beforeLower > H | υ ;"
|
||
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
|
||
|
||
// Make A x ` = > H a x
|
||
|
||
"Α ($glower $macron?) $rough > H | α $1 ;"
|
||
"Ε ($glower) $rough > H | ε $1 ;"
|
||
"Η ($glower) $rough > H | η $1 ;"
|
||
"Ι ($glower $ddot?) $rough > H | ι $1 ;"
|
||
"Ο ($glower) $rough > H | ο $1 ;"
|
||
"Υ ($glower) $rough > H | υ $1 ;"
|
||
"Ω ($glower $ddot?) $rough > H | ω $1 ;"
|
||
|
||
//Otherwise, make x ` into h x and X ` into H X
|
||
|
||
"($lcgvowel + $ddotmac? ) $rough > h | $1 ;"
|
||
"($gvowel + $ddotmac? ) $rough > H | $1 ;"
|
||
|
||
// Go backwards with H
|
||
|
||
"| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;"
|
||
"| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;"
|
||
"| $1 $rough < h ($evowel $macron? $ddot?) ;"
|
||
|
||
"| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;"
|
||
"| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;"
|
||
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
|
||
|
||
// titlecase, have to fix individually
|
||
// in the future, we should add &uppercase() to make this easier
|
||
|
||
"| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;"
|
||
"| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;"
|
||
"| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;"
|
||
"| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;"
|
||
"| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;"
|
||
"| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;"
|
||
|
||
"| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;"
|
||
"| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;"
|
||
"| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;"
|
||
"| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;"
|
||
"| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;"
|
||
"| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;"
|
||
|
||
"| A $1 $rough < H a ($macron? $ddot? ) ;"
|
||
"| E $1 $rough < H e ($macron? $ddot? ) ;"
|
||
"| I $1 $rough < H i ($macron? $ddot? ) ;"
|
||
"| O $1 $rough < H o ($macron? $ddot? ) ;"
|
||
"| U $1 $rough < H u ($macron? $ddot? ) ;"
|
||
"| Y $1 $rough < H y ($macron? $ddot? ) ;"
|
||
|
||
// Now do smooth
|
||
|
||
//delete smooth breathing for Latin
|
||
"$smooth > ;"
|
||
|
||
// insert in Greek
|
||
// the assumption is that all Marks are on letters.
|
||
|
||
"| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;"
|
||
"| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;"
|
||
"| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;"
|
||
|
||
// TODO: preserve smooth/rough breathing if not
|
||
// on initial vowel sequence
|
||
|
||
// need to have these up here so the rules don't mask
|
||
|
||
// remove now superfluous macron when returning
|
||
|
||
"Α < A $macron ;"
|
||
"α < a $macron ;"
|
||
|
||
"η <> e $macron ;"
|
||
"Η <> E $macron ;"
|
||
|
||
"φ <> ph ;"
|
||
"Ψ } $beforeLower <> Ps ;"
|
||
"Ψ <> PS ;"
|
||
|
||
"Φ } $beforeLower <> Ph ;"
|
||
"Φ <> PH ;"
|
||
"ψ <> ps ;"
|
||
|
||
"ω <> o $macron ;"
|
||
"Ω <> O $macron;"
|
||
|
||
// NORMAL
|
||
|
||
"α <> a ;"
|
||
"Α <> A ;"
|
||
|
||
"β <> b ;"
|
||
"Β <> B ;"
|
||
|
||
"γ } $gammaLike <> n } $egammaLike ;"
|
||
"γ <> g ;"
|
||
"Γ } $gammaLike <> N } $egammaLike ;"
|
||
"Γ <> G ;"
|
||
|
||
"δ <> d ;"
|
||
"Δ <> D ;"
|
||
|
||
"ε <> e ;"
|
||
"Ε <> E ;"
|
||
|
||
"ζ <> z ;"
|
||
"Ζ <> Z ;"
|
||
|
||
"θ <> th ;"
|
||
"Θ } $beforeLower <> Th ;"
|
||
"Θ <> TH ;"
|
||
|
||
"ι <> i ;"
|
||
"Ι <> I ;"
|
||
|
||
"κ <> k ;"
|
||
"Κ <> K ;"
|
||
|
||
"λ <> l ;"
|
||
"Λ <> L ;"
|
||
|
||
"μ <> m ;"
|
||
"Μ <> M ;"
|
||
|
||
"ν } $gammaLike > n\\\' ;"
|
||
"ν <> n ;"
|
||
"Ν } $gammaLike <> N\\\' ;"
|
||
"Ν <> N ;"
|
||
|
||
"ξ <> x ;"
|
||
"Ξ <> X ;"
|
||
|
||
"ο <> o ;"
|
||
"Ο <> O ;"
|
||
|
||
"π <> p ;"
|
||
"Π <> P ;"
|
||
|
||
"ρ $rough <> rh;"
|
||
"Ρ $rough } $beforeLower <> Rh ;"
|
||
"Ρ $rough <> RH ;"
|
||
"ρ <> r ;"
|
||
"Ρ <> R ;"
|
||
|
||
// insert separator before things that turn into s
|
||
|
||
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
|
||
|
||
// special S variants
|
||
|
||
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||
|
||
// underbar means exception
|
||
|
||
// before a letter, initial
|
||
"ς } $beforeLetter <> s $underbar } $beforeLetter;"
|
||
"σ } $beforeLetter <> s } $beforeLetter;"
|
||
|
||
// otherwise, after a letter = final
|
||
"$afterLetter { σ <> $afterLetter { s $underbar;"
|
||
"$afterLetter { ς <> $afterLetter { s ;"
|
||
|
||
// otherwise (isolated) = initial
|
||
"ς <> s $underbar;"
|
||
"σ <> s ;"
|
||
|
||
// [Pp] { Σ <> \\\'S ;
|
||
"Σ <> S ;"
|
||
|
||
"τ <> t ;"
|
||
"Τ <> T ;"
|
||
|
||
"$vowel {υ } <> u ;"
|
||
"υ <> y ;"
|
||
"$vowel { Υ <> U ;"
|
||
"Υ <> Y ;"
|
||
|
||
"χ <> ch ;"
|
||
"Χ } $beforeLower <> Ch ;"
|
||
"Χ <> CH ;"
|
||
|
||
// Completeness for ASCII
|
||
|
||
"$ignore = [[:Mark:]''] * ;"
|
||
|
||
"| k < c ;"
|
||
"| ph < f ;"
|
||
"| i < j ;"
|
||
"| k < q ;"
|
||
"| b < v } $vowel ;"
|
||
"| b < w } $vowel;"
|
||
"| u < v ;"
|
||
"| u < w;"
|
||
"| K < C ;"
|
||
"| Ph < F ;"
|
||
"| I < J ;"
|
||
"| K < Q ;"
|
||
"| B < V } $vowel ;"
|
||
"| B < W } $vowel ;"
|
||
"| U < V ;"
|
||
"| U < W ;"
|
||
|
||
"$rough } $ignore [:UppercaseLetter:] > H ;"
|
||
"$ignore [:UppercaseLetter:] { $rough > H ;"
|
||
"$rough < H ;"
|
||
"$rough <> h ;"
|
||
|
||
// Completeness for Greek
|
||
|
||
"ϐ > | β ;"
|
||
"ϑ > | θ ;"
|
||
"ϒ > | Υ ;"
|
||
"ϕ > | φ ;"
|
||
"ϖ > | π ;"
|
||
|
||
"ϰ > | κ ;"
|
||
"ϱ > | ρ ;"
|
||
"ϲ > | σ ;"
|
||
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||
"ϳ > j ;"
|
||
"ϴ > | Θ ;"
|
||
"ϵ > | ε ;"
|
||
|
||
"µ > | μ ;"
|
||
|
||
"ͺ > i;"
|
||
|
||
// delete any trailing ' marks used for roundtripping
|
||
|
||
"< [Ππ] { \\\' } [Ss] ;"
|
||
"< [Νν] { \\\' } $egammaLike ;"
|
||
|
||
"::NFC (NFD) ;"
|
||
// ([\\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
|
||
// ([\\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
|
||
// MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
||
":: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;"
|
||
}
|
||
}
|