ICU-1292 Added Latin-el, fixed roundtrip test, additional Greek fixes
X-SVN-Rev: 6541
This commit is contained in:
parent
c0374fa37a
commit
a545c3da81
@ -1,6 +1,6 @@
|
||||
# Copyright (c) 2001, International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#
|
||||
#
|
||||
# TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
# system transliterators. It allows arbitrary mappings between
|
||||
# transliterator IDs and file names, and also allows the system to
|
||||
@ -9,29 +9,29 @@
|
||||
# "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
# are invisible to the user, but can be composed together by the
|
||||
# system to create visible transliterators.
|
||||
#
|
||||
#
|
||||
# Blank lines and lines beginning with '#' are ignored.
|
||||
#
|
||||
#
|
||||
# Lines in this file have one of the following forms (text not
|
||||
# enclosed by <> is literal):
|
||||
#
|
||||
#
|
||||
# <id>:file:<resource>:<encoding>:<direction>
|
||||
# <id>:internal:<resource>:<encoding>:<direction>
|
||||
# <id>:alias:<getInstanceArg>
|
||||
#
|
||||
#
|
||||
# <id> is the ID of the system transliterator being defined. These
|
||||
# are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
# unless the second field is "internal".
|
||||
#
|
||||
#
|
||||
# <resource> is a ResourceReader resource name. Currently these refer
|
||||
# to file names under com/ibm/text/resources. This string is passed
|
||||
# directly to ResourceReader, together with <encoding>.
|
||||
#
|
||||
#
|
||||
# <encoding> is the character encoding to use when reading <resource>;
|
||||
# passed directly to ResourceReader. E.g., "UTF8".
|
||||
#
|
||||
#
|
||||
# <direction> is either "FORWARD" or "REVERSE".
|
||||
#
|
||||
#
|
||||
# <getInstanceArg> is a string to be passed directly to
|
||||
# Transliterator.getInstance(). The returned Transliterator object
|
||||
# then has its ID changed to <id> and is returned.
|
||||
@ -48,6 +48,9 @@ Cyrillic-Latin:file:Transliterator_Cyrillic_Latin.txt:UTF8:FORWARD
|
||||
Latin-Greek:file:Transliterator_Greek_Latin.txt:UTF8:REVERSE
|
||||
Greek-Latin:file:Transliterator_Greek_Latin.txt:UTF8:FORWARD
|
||||
|
||||
Latin-el:file:Transliterator_el_Latin.txt:UTF8:REVERSE
|
||||
el-Latin:file:Transliterator_el_Latin.txt:UTF8:FORWARD
|
||||
|
||||
LowerLatin-Jamo:internal:Transliterator_Latin_Jamo.utf8.txt:UTF8:FORWARD
|
||||
Latin-Jamo:alias:Any-Lower;LowerLatin-Jamo
|
||||
Jamo-Latin:file:Transliterator_Latin_Jamo.utf8.txt:UTF8:REVERSE
|
||||
|
202
icu4j/src/com/ibm/text/resources/Transliterator_el_Latin.txt
Executable file
202
icu4j/src/com/ibm/text/resources/Transliterator_el_Latin.txt
Executable file
@ -0,0 +1,202 @@
|
||||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/Transliterator_el_Latin.txt,v $
|
||||
# $Date: 2001/11/01 00:39:31 $
|
||||
# $Revision: 1.1 $
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
::NFD (NFC) ;
|
||||
|
||||
# For modern Greek.
|
||||
|
||||
# Useful variables
|
||||
|
||||
$lower = [:Ll:] ;
|
||||
$upper = [:Lu:] ;
|
||||
$accent = [:M:] ;
|
||||
|
||||
$macron = \u0304 ;
|
||||
$ddot = \u0308 ;
|
||||
|
||||
$lcgvowel = [αεηιουω] ;
|
||||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||||
$lcgvowelC = [$lcgvowel $accent] ;
|
||||
|
||||
$evowel = [aeiouyAEIOUY];
|
||||
$vowel = [ $evowel $gvowel] ;
|
||||
|
||||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||||
$egammaLike = [GKXCgkxc] ;
|
||||
$smooth = ̓ ;
|
||||
$rough = ̔ ;
|
||||
$iotasub = ͅ ;
|
||||
|
||||
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
|
||||
|
||||
$under = \u0331;
|
||||
|
||||
# Fix punctuation
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
||||
# Fix any ancient characters that creep in
|
||||
|
||||
\u0342 > \u0301 ;
|
||||
\u0302 > \u0301 ;
|
||||
\u0300 > \u0301 ;
|
||||
$smooth > ;
|
||||
$rough > ;
|
||||
$iotasub > ;
|
||||
\u037A > ;
|
||||
|
||||
# need to have these up here so the rules don't mask
|
||||
|
||||
η <> i $under ;
|
||||
Η <> I $under ;
|
||||
|
||||
Ψ } $beforeLower <> Ps ;
|
||||
Ψ <> PS ;
|
||||
ψ <> ps ;
|
||||
|
||||
ω <> o $under ;
|
||||
Ω <> O $under;
|
||||
|
||||
# at begining or end of word, convert mp to b
|
||||
|
||||
[^[:L:][:M:]] } μπ > b ;
|
||||
μπ } [^[:L:][:M:]] > b ;
|
||||
[^[:L:][:M:]] } [Μμ][Ππ] > B ;
|
||||
[Μμ][Ππ] } [^[:L:][:M:]] > B ;
|
||||
|
||||
μπ < b ;
|
||||
Μπ < B { $beforeLower ;
|
||||
ΜΠ < B ;
|
||||
|
||||
# handle diphthongs ending with upsilon
|
||||
|
||||
$vowel { υ } $softener <> v $under ;
|
||||
$vowel { υ } <> f $under;
|
||||
υ <> y ;
|
||||
$vowel { Υ } $softener <> V $under ;
|
||||
$vowel { Υ <> U $under ;
|
||||
Υ <> Y ;
|
||||
|
||||
# NORMAL
|
||||
|
||||
α <> a ;
|
||||
Α <> A ;
|
||||
|
||||
β <> v ;
|
||||
Β <> V ;
|
||||
|
||||
γ } $gammaLike <> n } $egammaLike ;
|
||||
γ <> g ;
|
||||
Γ } $gammaLike <> N } $egammaLike ;
|
||||
Γ <> G ;
|
||||
|
||||
δ <> d ;
|
||||
Δ <> D ;
|
||||
|
||||
ε <> e ;
|
||||
Ε <> E ;
|
||||
|
||||
ζ <> z ;
|
||||
Ζ <> Z ;
|
||||
|
||||
θ <> th ;
|
||||
Θ } $beforeLower <> Th ;
|
||||
Θ <> TH ;
|
||||
|
||||
ι <> i ;
|
||||
Ι <> I ;
|
||||
|
||||
κ <> k ;
|
||||
Κ <> K ;
|
||||
|
||||
λ <> l ;
|
||||
Λ <> L ;
|
||||
|
||||
μ <> m ;
|
||||
Μ <> M ;
|
||||
|
||||
ν } $gammaLike > n\' ;
|
||||
ν <> n ;
|
||||
Ν } $gammaLike <> N\' ;
|
||||
Ν <> N ;
|
||||
|
||||
ξ <> x ;
|
||||
Ξ <> X ;
|
||||
|
||||
ο <> o ;
|
||||
Ο <> O ;
|
||||
|
||||
π <> p ;
|
||||
Π <> P ;
|
||||
|
||||
ρ <> r ;
|
||||
Ρ <> R ;
|
||||
|
||||
[Pp] {ς > \'s ;
|
||||
[Pp] {σ > \'s ;
|
||||
σ < [:^L:] [:M:]* { s } [:^L:] ;
|
||||
ς <> s } [:^L:] ;
|
||||
σ <> s ;
|
||||
[Pp] { Σ <> \'S ;
|
||||
Σ <> S ;
|
||||
|
||||
τ <> t ;
|
||||
Τ <> T ;
|
||||
|
||||
φ <> f ;
|
||||
Φ <> F ;
|
||||
|
||||
χ <> ch ;
|
||||
Χ } $beforeLower <> Ch ;
|
||||
Χ <> CH ;
|
||||
|
||||
# Completeness for ASCII
|
||||
|
||||
$ignore = [[:Mark:]''] * ;
|
||||
|
||||
| ch < h ;
|
||||
| k < c ;
|
||||
| i < j ;
|
||||
| k < q ;
|
||||
| y < u ;
|
||||
| y < w ;
|
||||
|
||||
| Ch < H ;
|
||||
| K < C ;
|
||||
| I < J ;
|
||||
| K < Q ;
|
||||
| Y < W ;
|
||||
| Y < U ;
|
||||
|
||||
# Completeness for Greek
|
||||
|
||||
ϐ > | β ;
|
||||
ϑ > | θ ;
|
||||
ϒ > | Υ ;
|
||||
ϕ > | φ ;
|
||||
ϖ > | π ;
|
||||
|
||||
ϰ > | κ ;
|
||||
ϱ > | ρ ;
|
||||
ϲ > | σ ;
|
||||
ϳ > j ;
|
||||
ϴ > | Θ ;
|
||||
ϵ > | ε ;
|
||||
|
||||
# delete any trailing ' marks used for roundtripping
|
||||
|
||||
< [Ππ] { \' } [Ss] ;
|
||||
< [Νν] { \' } $egammaLike ;
|
||||
|
||||
::NFC (NFD) ;
|
@ -1,6 +1,6 @@
|
||||
# Copyright (c) 2001, International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#
|
||||
#
|
||||
# TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
# system transliterators. It allows arbitrary mappings between
|
||||
# transliterator IDs and file names, and also allows the system to
|
||||
@ -9,29 +9,29 @@
|
||||
# "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
# are invisible to the user, but can be composed together by the
|
||||
# system to create visible transliterators.
|
||||
#
|
||||
#
|
||||
# Blank lines and lines beginning with '#' are ignored.
|
||||
#
|
||||
#
|
||||
# Lines in this file have one of the following forms (text not
|
||||
# enclosed by <> is literal):
|
||||
#
|
||||
#
|
||||
# <id>:file:<resource>:<encoding>:<direction>
|
||||
# <id>:internal:<resource>:<encoding>:<direction>
|
||||
# <id>:alias:<getInstanceArg>
|
||||
#
|
||||
#
|
||||
# <id> is the ID of the system transliterator being defined. These
|
||||
# are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
# unless the second field is "internal".
|
||||
#
|
||||
#
|
||||
# <resource> is a ResourceReader resource name. Currently these refer
|
||||
# to file names under com/ibm/text/resources. This string is passed
|
||||
# directly to ResourceReader, together with <encoding>.
|
||||
#
|
||||
#
|
||||
# <encoding> is the character encoding to use when reading <resource>;
|
||||
# passed directly to ResourceReader. E.g., "UTF8".
|
||||
#
|
||||
#
|
||||
# <direction> is either "FORWARD" or "REVERSE".
|
||||
#
|
||||
#
|
||||
# <getInstanceArg> is a string to be passed directly to
|
||||
# Transliterator.getInstance(). The returned Transliterator object
|
||||
# then has its ID changed to <id> and is returned.
|
||||
@ -48,6 +48,9 @@ Cyrillic-Latin:file:Transliterator_Cyrillic_Latin.txt:UTF8:FORWARD
|
||||
Latin-Greek:file:Transliterator_Greek_Latin.txt:UTF8:REVERSE
|
||||
Greek-Latin:file:Transliterator_Greek_Latin.txt:UTF8:FORWARD
|
||||
|
||||
Latin-el:file:Transliterator_el_Latin.txt:UTF8:REVERSE
|
||||
el-Latin:file:Transliterator_el_Latin.txt:UTF8:FORWARD
|
||||
|
||||
LowerLatin-Jamo:internal:Transliterator_Latin_Jamo.utf8.txt:UTF8:FORWARD
|
||||
Latin-Jamo:alias:Any-Lower;LowerLatin-Jamo
|
||||
Jamo-Latin:file:Transliterator_Latin_Jamo.utf8.txt:UTF8:REVERSE
|
||||
|
Loading…
Reference in New Issue
Block a user