2001-10-26 05:41:16 +00:00
|
|
|
|
// -*- Coding: utf-8; -*-
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// Copyright (c) 1999-2001, International Business Machines
|
|
|
|
|
// Corporation and others. All Rights Reserved.
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// THIS IS A MACHINE-GENERATED FILE
|
2001-11-07 18:50:25 +00:00
|
|
|
|
// Tool: dumpICUrules.bat
|
2001-10-26 05:41:16 +00:00
|
|
|
|
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
|
2001-11-19 22:23:36 +00:00
|
|
|
|
// Date: Mon Nov 19 12:15:35 2001
|
2001-10-26 05:41:16 +00:00
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
// Greek_Latin
|
|
|
|
|
|
|
|
|
|
translit_Greek_Latin {
|
|
|
|
|
Rule {
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// Copyright (c) 1999-2001, International Business Machines
|
|
|
|
|
// Corporation and others. All Rights Reserved.
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Grek_Latn.txt,v $
|
2001-11-19 22:23:36 +00:00
|
|
|
|
// $Date: 2001/11/19 22:23:33 $
|
|
|
|
|
// $Revision: 1.4 $
|
2001-10-26 05:41:16 +00:00
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
// Rules are predicated on running NFD first, and NFC afterwards
|
2001-11-19 22:23:36 +00:00
|
|
|
|
":: [\\u0000-\u007F \\\00B7 [:Greek:] [:Mark:]] ;"
|
|
|
|
|
":: NFD (NFC) ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// TEST CASES
|
|
|
|
|
|
|
|
|
|
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
|
|
|
|
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
|
|
|
|
// ᾳ ῃ ῳ ὃ ὄ
|
|
|
|
|
// ὠς ὡς ὢς ὣς
|
|
|
|
|
// Ὠς Ὡς Ὢς Ὣς
|
|
|
|
|
// ὨΣ ὩΣ ὪΣ ὫΣ
|
|
|
|
|
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
|
|
|
|
|
|
|
|
|
// Useful variables
|
|
|
|
|
|
|
|
|
|
"$lower = [:Ll:] ;"
|
|
|
|
|
"$upper = [:Lu:] ;"
|
|
|
|
|
"$accent = [:M:] ;"
|
|
|
|
|
|
|
|
|
|
"$macron = \u0304 ;"
|
|
|
|
|
"$ddot = \u0308 ;"
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"$ddotmac = [$ddot$macron];"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
"$lcgvowel = [αεηιουω] ;"
|
|
|
|
|
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
|
|
|
|
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
|
|
|
|
"$lcgvowelC = [$lcgvowel $accent] ;"
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"$evowel = [aeiouyAEIOUY];"
|
|
|
|
|
"$vowel = [ $evowel $gvowel] ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
"$beforeLower = $accent * $lower ;"
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
|
|
|
|
|
"$egammaLike = [GKXCgkxc] ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"$smooth = ̓ ;"
|
|
|
|
|
"$rough = ̔ ;"
|
|
|
|
|
"$iotasub = ͅ ;"
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"$evowel_i = [$evowel-[iI]] ;"
|
|
|
|
|
|
|
|
|
|
"$caron = \u030C;"
|
|
|
|
|
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"$afterLetter = [:^L:] [\\\'[:M:]]* ;"
|
|
|
|
|
"$beforeLetter = [\\\'[:M:]]* [:^L:] ;"
|
2001-11-05 20:39:12 +00:00
|
|
|
|
|
2001-10-26 05:41:16 +00:00
|
|
|
|
// Fix punctuation
|
|
|
|
|
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"\\\; <> \\\? ;"
|
|
|
|
|
"· <> \\\: ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
|
|
|
|
|
|
|
|
|
"\u0342 <> \u0302 ;"
|
|
|
|
|
|
|
|
|
|
// IOTA: convert iota subscript to iota
|
|
|
|
|
// first make previous alpha long!
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"$accent_minus = [[$accent]-[$iotasub$macron]];"
|
|
|
|
|
|
|
|
|
|
"Α } $accent_minus * $iotasub > | Α $macron ;"
|
|
|
|
|
"α } $accent_minus * $iotasub > | α $macron ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// now convert to uppercase if after uppercase, ow to lowercase
|
|
|
|
|
|
|
|
|
|
"$upper $accent * { $iotasub > I ;"
|
|
|
|
|
"$iotasub > i ;"
|
|
|
|
|
|
|
|
|
|
"| $1 $iotasub < ([:L:] $macron [:M:]*) i ;"
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"| $1 $iotasub < ([:L:] $macron [:M:]*) I ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// BREATHING
|
|
|
|
|
|
|
|
|
|
// Convert rough breathing to h, and move before letters.
|
|
|
|
|
|
|
|
|
|
// Make A ` x = > H a x
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"Α ($macron?) $rough } $beforeLower > H | α $1;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"Ε $rough } $beforeLower > H | ε;"
|
|
|
|
|
"Η $rough } $beforeLower > H | η ;"
|
|
|
|
|
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
|
|
|
|
|
"Ο $rough } $beforeLower > H | ο ;"
|
|
|
|
|
"Υ $rough } $beforeLower > H | υ ;"
|
|
|
|
|
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
|
|
|
|
|
|
|
|
|
|
// Make A x ` = > H a x
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"Α ($lower $macron?) $rough > H | α $1 ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"Ε ($lower) $rough > H | ε $1 ;"
|
|
|
|
|
"Η ($lower) $rough > H | η $1 ;"
|
|
|
|
|
"Ι ($lower $ddot?) $rough > H | ι $1 ;"
|
|
|
|
|
"Ο ($lower) $rough > H | ο $1 ;"
|
|
|
|
|
"Υ ($lower) $rough > H | υ $1 ;"
|
|
|
|
|
"Ω ($lower $ddot?) $rough > H | ω $1 ;"
|
|
|
|
|
|
|
|
|
|
//Otherwise, make x ` into h x and X ` into H X
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"($lcgvowel + $ddotmac? ) $rough > h | $1 ;"
|
|
|
|
|
"($gvowel + $ddotmac? ) $rough > H | $1 ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// Go backwards with H
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"| $1 $rough < h ($evowel $macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| $1 $rough < h ($evowel $ddot? $evowel $macron?) ;"
|
|
|
|
|
"| $1 $rough < h ($evowel $macron? $ddot?) ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| $1 $rough < H ([AEIOUY] $ddot? $evowel $macron?) ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
|
|
|
|
|
|
|
|
|
|
// titlecase, have to fix individually
|
2001-11-05 20:39:12 +00:00
|
|
|
|
// in the future, we should add &uppercase() to make this easier
|
|
|
|
|
|
|
|
|
|
"| A $1 $rough < H a ($macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| E $1 $rough < H e ($macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| I $1 $rough < H i ($macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| O $1 $rough < H o ($macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| U $1 $rough < H u ($macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
"| Y $1 $rough < H y ($macron $ddot? $evowel_i $macron?) ;"
|
|
|
|
|
|
|
|
|
|
"| A $1 $rough < H a ($ddot? $evowel $macron?) ;"
|
|
|
|
|
"| E $1 $rough < H e ($ddot? $evowel $macron?) ;"
|
|
|
|
|
"| I $1 $rough < H i ($ddot? $evowel $macron?) ;"
|
|
|
|
|
"| O $1 $rough < H o ($ddot? $evowel $macron?) ;"
|
|
|
|
|
"| U $1 $rough < H u ($ddot? $evowel $macron?) ;"
|
|
|
|
|
"| Y $1 $rough < H y ($ddot? $evowel $macron?) ;"
|
|
|
|
|
|
|
|
|
|
"| A $1 $rough < H a ($macron? $ddot? ) ;"
|
|
|
|
|
"| E $1 $rough < H e ($macron? $ddot? ) ;"
|
|
|
|
|
"| I $1 $rough < H i ($macron? $ddot? ) ;"
|
|
|
|
|
"| O $1 $rough < H o ($macron? $ddot? ) ;"
|
|
|
|
|
"| U $1 $rough < H u ($macron? $ddot? ) ;"
|
|
|
|
|
"| Y $1 $rough < H y ($macron? $ddot? ) ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// Now do smooth
|
|
|
|
|
|
|
|
|
|
//delete smooth breathing for Latin
|
|
|
|
|
"$smooth > ;"
|
|
|
|
|
|
|
|
|
|
// insert in Greek
|
2001-11-05 20:39:12 +00:00
|
|
|
|
// the assumption is that all Marks are on letters.
|
|
|
|
|
|
|
|
|
|
"| $1 $smooth < [^[:L:][:M:]] { ([rR]) } [^hH$smooth$rough] ;"
|
|
|
|
|
"| $1 $smooth < [^[:L:][:M:]] { ($evowel $macron? $evowel $macron?) } [^$smooth$rough] ;"
|
|
|
|
|
"| $1 $smooth < [^[:L:][:M:]] { ($evowel $macron?) } [^$evowel$smooth$rough] ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// TODO: preserve smooth/rough breathing if not
|
|
|
|
|
// on initial vowel sequence
|
|
|
|
|
|
|
|
|
|
// need to have these up here so the rules don't mask
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
// remove now superfluous macron when returning
|
|
|
|
|
|
|
|
|
|
"Α < A $macron ;"
|
|
|
|
|
"α < a $macron ;"
|
|
|
|
|
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"η <> e $macron ;"
|
|
|
|
|
"Η <> E $macron ;"
|
|
|
|
|
|
|
|
|
|
"φ <> ph ;"
|
|
|
|
|
"Ψ } $beforeLower <> Ps ;"
|
|
|
|
|
"Ψ <> PS ;"
|
|
|
|
|
|
|
|
|
|
"Φ } $beforeLower <> Ph ;"
|
|
|
|
|
"Φ <> PH ;"
|
|
|
|
|
"ψ <> ps ;"
|
|
|
|
|
|
|
|
|
|
"ω <> o $macron ;"
|
|
|
|
|
"Ω <> O $macron;"
|
|
|
|
|
|
|
|
|
|
// NORMAL
|
|
|
|
|
|
|
|
|
|
"α <> a ;"
|
|
|
|
|
"Α <> A ;"
|
|
|
|
|
|
|
|
|
|
"β <> b ;"
|
|
|
|
|
"Β <> B ;"
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"γ } $gammaLike <> n } $egammaLike ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"γ <> g ;"
|
2001-11-05 20:39:12 +00:00
|
|
|
|
"Γ } $gammaLike <> N } $egammaLike ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"Γ <> G ;"
|
|
|
|
|
|
|
|
|
|
"δ <> d ;"
|
|
|
|
|
"Δ <> D ;"
|
|
|
|
|
|
|
|
|
|
"ε <> e ;"
|
|
|
|
|
"Ε <> E ;"
|
|
|
|
|
|
|
|
|
|
"ζ <> z ;"
|
|
|
|
|
"Ζ <> Z ;"
|
|
|
|
|
|
|
|
|
|
"θ <> th ;"
|
|
|
|
|
"Θ } $beforeLower <> Th ;"
|
|
|
|
|
"Θ <> TH ;"
|
|
|
|
|
|
|
|
|
|
"ι <> i ;"
|
|
|
|
|
"Ι <> I ;"
|
|
|
|
|
|
|
|
|
|
"κ <> k ;"
|
|
|
|
|
"Κ <> K ;"
|
|
|
|
|
|
|
|
|
|
"λ <> l ;"
|
|
|
|
|
"Λ <> L ;"
|
|
|
|
|
|
|
|
|
|
"μ <> m ;"
|
|
|
|
|
"Μ <> M ;"
|
|
|
|
|
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"ν } $gammaLike > n\\\' ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"ν <> n ;"
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"Ν } $gammaLike <> N\\\' ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"Ν <> N ;"
|
|
|
|
|
|
|
|
|
|
"ξ <> x ;"
|
|
|
|
|
"Ξ <> X ;"
|
|
|
|
|
|
|
|
|
|
"ο <> o ;"
|
|
|
|
|
"Ο <> O ;"
|
|
|
|
|
|
|
|
|
|
"π <> p ;"
|
|
|
|
|
"Π <> P ;"
|
|
|
|
|
|
|
|
|
|
"ρ $rough <> rh;"
|
|
|
|
|
"Ρ $rough } $beforeLower <> Rh ;"
|
|
|
|
|
"Ρ $rough <> RH ;"
|
|
|
|
|
"ρ <> r ;"
|
|
|
|
|
"Ρ <> R ;"
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
// insert separator
|
|
|
|
|
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"[Pp] { } ς > \\\' ;"
|
|
|
|
|
"[Pp] { } σ > \\\' ;"
|
2001-11-05 20:39:12 +00:00
|
|
|
|
|
|
|
|
|
// Caron means exception
|
|
|
|
|
|
|
|
|
|
// before a letter, initial
|
|
|
|
|
"ς } $beforeLetter <> s $caron } $beforeLetter;"
|
|
|
|
|
"σ } $beforeLetter <> s } $beforeLetter;"
|
|
|
|
|
|
|
|
|
|
// otherwise, after a letter = final
|
|
|
|
|
"$afterLetter { σ <> $afterLetter { s $caron;"
|
|
|
|
|
"$afterLetter { ς <> $afterLetter { s ;"
|
|
|
|
|
|
|
|
|
|
// otherwise (isolated) = initial
|
|
|
|
|
"ς <> s $caron;"
|
|
|
|
|
"σ <> s ;"
|
|
|
|
|
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"[Pp] { Σ <> \\\'S ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"Σ <> S ;"
|
|
|
|
|
|
|
|
|
|
"τ <> t ;"
|
|
|
|
|
"Τ <> T ;"
|
|
|
|
|
|
|
|
|
|
"$vowel {υ } <> u ;"
|
|
|
|
|
"υ <> y ;"
|
|
|
|
|
"$vowel { Υ <> U ;"
|
|
|
|
|
"Υ <> Y ;"
|
|
|
|
|
|
|
|
|
|
"χ <> ch ;"
|
|
|
|
|
"Χ } $beforeLower <> Ch ;"
|
|
|
|
|
"Χ <> CH ;"
|
|
|
|
|
|
|
|
|
|
// Completeness for ASCII
|
|
|
|
|
|
|
|
|
|
"$ignore = [[:Mark:]''] * ;"
|
|
|
|
|
|
|
|
|
|
"| k < c ;"
|
|
|
|
|
"| ph < f ;"
|
|
|
|
|
"| i < j ;"
|
|
|
|
|
"| k < q ;"
|
|
|
|
|
"| u < v ;"
|
|
|
|
|
"| u < w ;"
|
|
|
|
|
"| K < C ;"
|
|
|
|
|
"| PH < F } $ignore [:UppercaseLetter:] ;"
|
|
|
|
|
"| PH < [:UppercaseLetter:] $ignore { F ;"
|
|
|
|
|
"| PH < F ;"
|
|
|
|
|
"| I < J ;"
|
|
|
|
|
"| K < Q ;"
|
|
|
|
|
"| U < V ;"
|
|
|
|
|
"| U < W ;"
|
|
|
|
|
|
|
|
|
|
"$rough } $ignore [:UppercaseLetter:] > H ;"
|
|
|
|
|
"$ignore [:UppercaseLetter:] { $rough > H ;"
|
|
|
|
|
"$rough < H ;"
|
|
|
|
|
"$rough <> h ;"
|
|
|
|
|
|
|
|
|
|
// Completeness for Greek
|
|
|
|
|
|
|
|
|
|
"ϐ > | β ;"
|
|
|
|
|
"ϑ > | θ ;"
|
|
|
|
|
"ϒ > | Υ ;"
|
|
|
|
|
"ϕ > | φ ;"
|
|
|
|
|
"ϖ > | π ;"
|
|
|
|
|
|
|
|
|
|
"ϰ > | κ ;"
|
|
|
|
|
"ϱ > | ρ ;"
|
|
|
|
|
"ϲ > | σ ;"
|
|
|
|
|
"ϳ > j ;"
|
|
|
|
|
"ϴ > | Θ ;"
|
|
|
|
|
"ϵ > | ε ;"
|
|
|
|
|
|
2001-11-19 22:23:36 +00:00
|
|
|
|
"µ > | μ ;"
|
|
|
|
|
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"ͺ > i;"
|
|
|
|
|
|
2001-11-05 20:39:12 +00:00
|
|
|
|
// delete any trailing ' marks used for roundtripping
|
|
|
|
|
|
2001-11-07 18:50:25 +00:00
|
|
|
|
"< [Ππ] { \\\' } [Ss] ;"
|
|
|
|
|
"< [Νν] { \\\' } $egammaLike ;"
|
2001-11-05 20:39:12 +00:00
|
|
|
|
|
2001-11-19 22:23:36 +00:00
|
|
|
|
"::NFC (NFD) ;"
|
|
|
|
|
":: ([\\u0000-\u007F [:Latin:] [:Mark:]]) ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
}
|
|
|
|
|
}
|