66aa8c0fa4
X-SVN-Rev: 38278
189 lines
4.9 KiB
Plaintext
189 lines
4.9 KiB
Plaintext
# ***************************************************************************
|
||
# *
|
||
# * Copyright (C) 2004-2016, International Business Machines
|
||
# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
||
# *
|
||
# ***************************************************************************
|
||
# File: Grek_Latn_UNGEGN.txt
|
||
# Generated from CLDR
|
||
#
|
||
|
||
# For modern Greek, based on UNGEGN rules.
|
||
# Rules are predicated on running NFD first, and NFC afterwards
|
||
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||
# WARNING: need to add accents to both filters ###
|
||
# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
|
||
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
|
||
::NFD (NFC) ;
|
||
# Useful variables
|
||
$lower = [[:latin:][:greek:] & [:Ll:]] ;
|
||
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
||
$accent = [[:Mn:][:Me:]] ;
|
||
$macron = \u0304 ;
|
||
$ddot = \u0308 ;
|
||
$lcgvowel = [αεηιουω] ;
|
||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||
$lcgvowelC = [$lcgvowel $accent] ;
|
||
$evowel = [aeiouyAEIOUY];
|
||
$vowel = [ $evowel $gvowel] ;
|
||
$beforeLower = $accent * $lower ;
|
||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||
$egammaLike = [GKXCgkxc] ;
|
||
$smooth = \u0313 ;
|
||
$rough = \u0314 ;
|
||
$iotasub = \u0345 ;
|
||
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
|
||
$under = \u0331;
|
||
$caron = \u030C;
|
||
$afterLetter = [:L:] [\'$accent]* ;
|
||
$beforeLetter = [\'$accent]* [:L:] ;
|
||
# Fix punctuation
|
||
# preserve orginal
|
||
\: ↔ \: $under ;
|
||
\? ↔ \? $under ;
|
||
\; ↔ \? ;
|
||
· ↔ \: ;
|
||
# Fix any ancient characters that creep in
|
||
\u0342 → \u0301 ;
|
||
\u0302 → \u0301 ;
|
||
\u0300 → \u0301 ;
|
||
$smooth → ;
|
||
$rough → ;
|
||
$iotasub → ;
|
||
ͺ → ;
|
||
# need to have these up here so the rules don't mask
|
||
η ↔ i $under ;
|
||
Η ↔ I $under ;
|
||
Ψ } $beforeLower ↔ Ps ;
|
||
Ψ ↔ PS ;
|
||
ψ ↔ ps ;
|
||
ω ↔ o $under ;
|
||
Ω ↔ O $under;
|
||
# at begining or end of word, convert mp to b
|
||
[^[:L:]$accent] { μπ → b ;
|
||
μπ } [^[:L:]$accent] → b ;
|
||
[^[:L:]$accent] { [Μμ][Ππ] → B ;
|
||
[Μμ][Ππ] } [^[:L:]$accent] → B ;
|
||
μπ ← b ;
|
||
Μπ ← B } $beforeLower ;
|
||
ΜΠ ← B ;
|
||
# handle diphthongs ending with upsilon
|
||
ου ↔ ou ;
|
||
ΟΥ ↔ OU ;
|
||
Ου ↔ Ou ;
|
||
οΥ ↔ oU ;
|
||
$fmaker = [aeiAEI] $under ? ;
|
||
$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
|
||
$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
|
||
υ $1 ← ( $shiftForwardVowels )* v $under ;
|
||
$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
|
||
υ $1 ← ( $shiftForwardVowels )* f $under ;
|
||
$fmaker { Υ } $softener ↔ V $under ;
|
||
$fmaker { Υ ↔ U $under ;
|
||
υ ↔ y ;
|
||
Υ ↔ Y ;
|
||
# NORMAL
|
||
α ↔ a ;
|
||
Α ↔ A ;
|
||
β ↔ v ;
|
||
Β ↔ V ;
|
||
γ } $gammaLike ↔ n } $egammaLike ;
|
||
γ ↔ g ;
|
||
Γ } $gammaLike ↔ N } $egammaLike ;
|
||
Γ ↔ G ;
|
||
δ ↔ d ;
|
||
Δ ↔ D ;
|
||
ε ↔ e ;
|
||
Ε ↔ E ;
|
||
ζ ↔ z ;
|
||
Ζ ↔ Z ;
|
||
θ ↔ th ;
|
||
Θ } $beforeLower ↔ Th ;
|
||
Θ ↔ TH ;
|
||
ι ↔ i ;
|
||
Ι ↔ I ;
|
||
κ ↔ k ;
|
||
Κ ↔ K ;
|
||
λ ↔ l ;
|
||
Λ ↔ L ;
|
||
μ ↔ m ;
|
||
Μ ↔ M ;
|
||
ν } $gammaLike → n\' ;
|
||
ν ↔ n ;
|
||
Ν } $gammaLike ↔ N\' ;
|
||
Ν ↔ N ;
|
||
ξ ↔ x ;
|
||
Ξ ↔ X ;
|
||
ο ↔ o ;
|
||
Ο ↔ O ;
|
||
π ↔ p ;
|
||
Π ↔ P ;
|
||
ρ ↔ r ;
|
||
Ρ ↔ R ;
|
||
# insert separator before things that turn into s
|
||
[Pp] { } [ςσΣϷϸϺϻ] → \' ;
|
||
# special S variants
|
||
Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||
ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||
Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||
ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||
# Caron means exception
|
||
# before a letter, initial
|
||
ς } $beforeLetter ↔ s $under } $beforeLetter;
|
||
σ } $beforeLetter ↔ s } $beforeLetter;
|
||
# otherwise, after a letter = final
|
||
$afterLetter { σ ↔ $afterLetter { s $under;
|
||
$afterLetter { ς ↔ $afterLetter { s ;
|
||
# otherwise (isolated) = initial
|
||
ς ↔ s $under;
|
||
σ ↔ s ;
|
||
# [Pp] { Σ ↔ \'S ;
|
||
Σ ↔ S ;
|
||
τ ↔ t ;
|
||
Τ ↔ T ;
|
||
φ ↔ f ;
|
||
Φ ↔ F ;
|
||
χ ↔ ch ;
|
||
Χ } $beforeLower ↔ Ch ;
|
||
Χ ↔ CH ;
|
||
# Completeness for ASCII
|
||
# $ignore = [[:Mark:]''] * ;
|
||
| ch ← h ;
|
||
| k ← c ;
|
||
| i ← j ;
|
||
| k ← q ;
|
||
| b ← u } $vowel ;
|
||
| b ← w } $vowel ;
|
||
| y ← u ;
|
||
| y ← w ;
|
||
| Ch ← H ;
|
||
| K ← C ;
|
||
| I ← J ;
|
||
| K ← Q ;
|
||
| B ← W } $vowel ;
|
||
| B ← U } $vowel ;
|
||
| Y ← W ;
|
||
| Y ← U ;
|
||
# Completeness for Greek
|
||
ϐ → | β ;
|
||
ϑ → | θ ;
|
||
ϒ → | Υ ;
|
||
ϕ → | φ ;
|
||
ϖ → | π ;
|
||
ϰ → | κ ;
|
||
ϱ → | ρ ;
|
||
ϲ → | σ ;
|
||
Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||
ϳ → j ;
|
||
ϴ → | Θ ;
|
||
ϵ → | ε ;
|
||
µ → | μ ;
|
||
# delete any trailing ' marks used for roundtripping
|
||
← [Ππ] { \' } [Ss] ;
|
||
← [Νν] { \' } $egammaLike ;
|
||
::NFC (NFD) ;
|
||
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
||
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
|
||
|