diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Cyrillic_Latin.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Cyrillic_Latin.txt index e69fbc03e8..c50e9d6849 100755 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Cyrillic_Latin.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Cyrillic_Latin.txt @@ -1,11 +1,7 @@ -???#-------------------------------------------------------------------- +#-------------------------------------------------------------------- # Copyright (c) 1999-2004, International Business Machines # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- -# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Cyrillic_Latin.txt,v $ -# $Date: 2004/05/21 01:01:51 $ -# $Revision: 1.12 $ -#-------------------------------------------------------------------- # TODO: add remaining characters # Should add variants for Russian-English, Russian-German # Those can use this as a base, and then remap cases @@ -31,266 +27,266 @@ $under = \u0331 ; # move up so not masked -?? <> a $hat ; # CYRILLIC SMALL LETTER YA -?? <> A $hat ; # CYRILLIC CAPITAL LETTER YA +я <> a $hat ; # CYRILLIC SMALL LETTER YA +Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA -?? <> c $caron ; # CYRILLIC SMALL LETTER CHE -?? <> C $caron; # CYRILLIC CAPITAL LETTER CHE -# ?? <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER -# ?? <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE -# ?? <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +ч <> c $caron ; # CYRILLIC SMALL LETTER CHE +Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE +# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER +# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE +# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE -?? <> e $acute; # CYRILLIC SMALL LETTER E -?? <> E $acute; # CYRILLIC CAPITAL LETTER E -?? <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE -?? <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +э <> e $acute; # CYRILLIC SMALL LETTER E +Э <> E $acute; # CYRILLIC CAPITAL LETTER E +є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE +Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE -?? <> s $caron ; # CYRILLIC SMALL LETTER SHA -?? <> S $caron ; # CYRILLIC CAPITAL LETTER SHA -?? <> s $hat ; # CYRILLIC SMALL LETTER SHCHA -?? <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA +ш <> s $caron ; # CYRILLIC SMALL LETTER SHA +Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA +щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA +Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA -?? <> z $hat ; # CYRILLIC SMALL LETTER DZE -?? <> Z $hat; # CYRILLIC CAPITAL LETTER DZE -# ?? <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE +Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE +# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE +# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE -?? <> u $hat ; # CYRILLIC SMALL LETTER YU -?? <> U $hat ; # CYRILLIC CAPITAL LETTER YU +ю <> u $hat ; # CYRILLIC SMALL LETTER YU +Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU -?? <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -?? <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -?? <> j $caron; # CYRILLIC SMALL LETTER JE -?? <> J $caron; # CYRILLIC CAPITAL LETTER JE +і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +ј <> j $caron; # CYRILLIC SMALL LETTER JE +Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE -?? <> l $hat ; # CYRILLIC SMALL LETTER LJE -?? <> L $hat ; # CYRILLIC CAPITAL LETTER LJE -?? <> n $hat ; # CYRILLIC SMALL LETTER NJE -?? <> N $hat ; # CYRILLIC CAPITAL LETTER NJE +љ <> l $hat ; # CYRILLIC SMALL LETTER LJE +Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE +њ <> n $hat ; # CYRILLIC SMALL LETTER NJE +Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE -?? <> c $acute ; # CYRILLIC SMALL LETTER TSHE -?? <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE +ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE +Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE -?? <> d $hat ; # CYRILLIC SMALL LETTER DZHE -?? <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE +џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE +Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE # Normal order -?? <> a ; # CYRILLIC SMALL LETTER A -?? <> A ; # CYRILLIC CAPITAL LETTER A -?? <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA -?? <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA -?? <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE -?? <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE -?? <> b ; # CYRILLIC SMALL LETTER BE -?? <> B ; # CYRILLIC CAPITAL LETTER BE -?? <> v ; # CYRILLIC SMALL LETTER VE -?? <> V ; # CYRILLIC CAPITAL LETTER VE +а <> a ; # CYRILLIC SMALL LETTER A +А <> A ; # CYRILLIC CAPITAL LETTER A +ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA +Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA +ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE +Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE +б <> b ; # CYRILLIC SMALL LETTER BE +Б <> B ; # CYRILLIC CAPITAL LETTER BE +в <> v ; # CYRILLIC SMALL LETTER VE +В <> V ; # CYRILLIC CAPITAL LETTER VE -?? <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN -?? <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN -?? <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE -?? <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE -?? <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK -?? <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK -?? <> g ; # CYRILLIC SMALL LETTER GHE -?? <> G ; # CYRILLIC CAPITAL LETTER GHE +ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN +Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +г <> g ; # CYRILLIC SMALL LETTER GHE +Г <> G ; # CYRILLIC CAPITAL LETTER GHE -?? <> d; # CYRILLIC SMALL LETTER DE -?? <> D; # CYRILLIC CAPITAL LETTER DE -?? <> ?? ; # CYRILLIC SMALL LETTER DJE -?? <> ?? ; # CYRILLIC CAPITAL LETTER DJE -?? <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER -?? <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER -?? <> e ; # CYRILLIC SMALL LETTER IE -?? <> E; # CYRILLIC CAPITAL LETTER IE +д <> d; # CYRILLIC SMALL LETTER DE +Д <> D; # CYRILLIC CAPITAL LETTER DE +ђ <> đ ; # CYRILLIC SMALL LETTER DJE +Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE +ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER +Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +е <> e ; # CYRILLIC SMALL LETTER IE +Е <> E; # CYRILLIC CAPITAL LETTER IE -?? <> z $caron; # CYRILLIC SMALL LETTER ZHE -?? <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE +ж <> z $caron; # CYRILLIC SMALL LETTER ZHE +Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE -# ?? <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER +# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER -?? <> z ; # CYRILLIC SMALL LETTER ZE -?? <> Z; # CYRILLIC CAPITAL LETTER ZE +з <> z ; # CYRILLIC SMALL LETTER ZE +З <> Z; # CYRILLIC CAPITAL LETTER ZE -???? <> j ; # CYRILLIC SMALL LETTER I -???? <> J ; # CYRILLIC CAPITAL LETTER I -?? <> i ; # CYRILLIC SMALL LETTER I -?? <> I ; # CYRILLIC CAPITAL LETTER I +й <> j ; # CYRILLIC SMALL LETTER I +Й <> J ; # CYRILLIC CAPITAL LETTER I +и <> i ; # CYRILLIC SMALL LETTER I +И <> I ; # CYRILLIC CAPITAL LETTER I -?? <> k ; # CYRILLIC SMALL LETTER KA -?? <> K; # CYRILLIC CAPITAL LETTER KA +к <> k ; # CYRILLIC SMALL LETTER KA +К <> K; # CYRILLIC CAPITAL LETTER KA -# ?? <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER -# ?? <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK -# ?? <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA -# ?? <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE -# ?? <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE -?? <> l ; # CYRILLIC SMALL LETTER EL -?? <> L; # CYRILLIC CAPITAL LETTER EL +# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK +# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK +# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA +# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA +# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE +# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE +# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +л <> l ; # CYRILLIC SMALL LETTER EL +Л <> L; # CYRILLIC CAPITAL LETTER EL -?? <> m ; # CYRILLIC SMALL LETTER EM -?? <> M ; # CYRILLIC CAPITAL LETTER EM -?? <> n ; # CYRILLIC SMALL LETTER EN -?? <> N; # CYRILLIC CAPITAL LETTER EN -# ?? <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER -# ?? <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK -# ?? <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE -# ?? <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE +м <> m ; # CYRILLIC SMALL LETTER EM +М <> M ; # CYRILLIC CAPITAL LETTER EM +н <> n ; # CYRILLIC SMALL LETTER EN +Н <> N; # CYRILLIC CAPITAL LETTER EN +# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK +# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK +# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE +# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE -?? <> o ; # CYRILLIC SMALL LETTER O -?? <> O ; # CYRILLIC CAPITAL LETTER O -# ?? <> XXX ; # CYRILLIC SMALL LETTER BARRED O -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O -?? <> p ; # CYRILLIC SMALL LETTER PE -?? <> P ; # CYRILLIC CAPITAL LETTER PE -# ?? <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK -# ?? <> XXX ; # CYRILLIC SMALL LETTER KOPPA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA -?? <> r ; # CYRILLIC SMALL LETTER ER -?? <> R ; # CYRILLIC CAPITAL LETTER ER -# ?? <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK -?? <> s ; # CYRILLIC SMALL LETTER ES -?? <> S ; # CYRILLIC CAPITAL LETTER ES -# ?? <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER -?? <> t ; # CYRILLIC SMALL LETTER TE -?? <> T ; # CYRILLIC CAPITAL LETTER TE -# ?? <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +о <> o ; # CYRILLIC SMALL LETTER O +О <> O ; # CYRILLIC CAPITAL LETTER O +# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O +# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O +п <> p ; # CYRILLIC SMALL LETTER PE +П <> P ; # CYRILLIC CAPITAL LETTER PE +# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA +# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA +р <> r ; # CYRILLIC SMALL LETTER ER +Р <> R ; # CYRILLIC CAPITAL LETTER ER +# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK +# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK +с <> s ; # CYRILLIC SMALL LETTER ES +С <> S ; # CYRILLIC CAPITAL LETTER ES +# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER +# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +т <> t ; # CYRILLIC SMALL LETTER TE +Т <> T ; # CYRILLIC CAPITAL LETTER TE +# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER +# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER -?? <> u ; # CYRILLIC SMALL LETTER U -?? <> U ; # CYRILLIC CAPITAL LETTER U -# ?? <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U -# ?? <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE -# ?? <> XXX ; # CYRILLIC SMALL LETTER UK -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER UK -?? <> f ; # CYRILLIC SMALL LETTER EF -?? <> F ; # CYRILLIC CAPITAL LETTER EF -?? <> h ; # CYRILLIC SMALL LETTER HA -?? <> H; # CYRILLIC CAPITAL LETTER HA -# ?? <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER -# ?? <> XXX ; # CYRILLIC SMALL LETTER SHHA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER SHHA -# ?? <> XXX ; # CYRILLIC SMALL LETTER OMEGA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA -# ?? <> XXX ; # CYRILLIC SMALL LETTER OT -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER OT -# ?? <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -# ?? <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA -?? <> c ; # CYRILLIC SMALL LETTER TSE -?? <> C; # CYRILLIC CAPITAL LETTER TSE -# ?? <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE -# ?? <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE +у <> u ; # CYRILLIC SMALL LETTER U +У <> U ; # CYRILLIC CAPITAL LETTER U +# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U +# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U +# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK +# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK +ф <> f ; # CYRILLIC SMALL LETTER EF +Ф <> F ; # CYRILLIC CAPITAL LETTER EF +х <> h ; # CYRILLIC SMALL LETTER HA +Х <> H; # CYRILLIC CAPITAL LETTER HA +# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER +# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA +# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA +# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA +# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA +# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT +# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT +# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO +# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA +# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA +ц <> c ; # CYRILLIC SMALL LETTER TSE +Ц <> C; # CYRILLIC CAPITAL LETTER TSE +# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE +# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE -# ?? <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE -# ?? <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE +# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER -?? <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN -?? <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN -?? <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN -?? <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN +Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN +Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN -?? <> y ; # CYRILLIC SMALL LETTER YERU -?? <> Y ; # CYRILLIC CAPITAL LETTER YERU +ы <> y ; # CYRILLIC SMALL LETTER YERU +Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU -# ?? <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN -# ?? <> XXX ; # CYRILLIC SMALL LETTER YAT -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER YAT +# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN +# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT +# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT -# ?? <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E -# ?? <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS -# ?? <> XXX ; # CYRILLIC SMALL LETTER BIG YUS -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS -# ?? <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS -# ?? <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS -# ?? <> XXX ; # CYRILLIC SMALL LETTER KSI -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER KSI -# ?? <> XXX ; # CYRILLIC SMALL LETTER PSI -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER PSI -# ?? <> XXX ; # CYRILLIC SMALL LETTER FITA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER FITA -# ?? <> XXX ; # CYRILLIC SMALL LETTER IZHITSA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA -# ?? <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA -# ?? <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA -# ?? <> XXX ; # CYRILLIC LETTER PALOCHKA -### ???? <> XXX ; # CYRILLIC SMALL LETTER A -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER A -### ???? <> XXX ; # CYRILLIC SMALL LETTER A -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER A -### ???? <> XXX ; # CYRILLIC SMALL LETTER SCHWA -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA -### ???? <> XXX ; # CYRILLIC SMALL LETTER GHE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER GHE -### ???? <> XXX ; # CYRILLIC SMALL LETTER IE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER IE -### ???? <> XXX ; # CYRILLIC SMALL LETTER IE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER IE -### ???? <> XXX ; # CYRILLIC SMALL LETTER IE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER IE -### ???? <> XXX ; # CYRILLIC SMALL LETTER ZHE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER ZHE -### ???? <> XXX ; # CYRILLIC SMALL LETTER ZHE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER ZHE -### ???? <> XXX ; # CYRILLIC SMALL LETTER ZE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER ZE -### ???? <> XXX ; # CYRILLIC SMALL LETTER I -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER I -### ???? <> XXX ; # CYRILLIC SMALL LETTER I -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER I -### ???? <> XXX ; # CYRILLIC SMALL LETTER I -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER I -### ???? <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -### ???? <> XXX ; # CYRILLIC SMALL LETTER O -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER O -### ???? <> XXX ; # CYRILLIC SMALL LETTER BARRED O -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O -### ???? <> XXX ; # CYRILLIC SMALL LETTER KA -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER KA -### ???? <> XXX ; # CYRILLIC SMALL LETTER U -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER U -### ???? <> XXX ; # CYRILLIC SMALL LETTER U -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER U -### ???? <> XXX ; # CYRILLIC SMALL LETTER U -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER U -### ???? <> XXX ; # CYRILLIC SMALL LETTER U -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER U -### ???? <> XXX ; # CYRILLIC SMALL LETTER CHE -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER CHE -### ???? <> XXX ; # CYRILLIC SMALL LETTER YERU -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER YERU -### ???? <> XXX ; # CYRILLIC SMALL LETTER E -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER E -### ???? <> XXX ; # CYRILLIC SMALL LETTER IZHITSA -### ???? <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA +# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E +# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E +# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS +# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS +# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS +# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS +# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS +# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI +# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI +# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI +# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI +# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA +# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA +# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA +# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA +# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA +# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA +### ӑ <> XXX ; # CYRILLIC SMALL LETTER A +### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A +### ӓ <> XXX ; # CYRILLIC SMALL LETTER A +### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A +### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA +### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA +### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE +### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE +### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE +### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE +### ё <> XXX ; # CYRILLIC SMALL LETTER IE +### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE +### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE +### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE +### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE +### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE +### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE +### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE +### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE +### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE +### ѝ <> XXX ; # CYRILLIC SMALL LETTER I +### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I +### ӣ <> XXX ; # CYRILLIC SMALL LETTER I +### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I +### ӥ <> XXX ; # CYRILLIC SMALL LETTER I +### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I +### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +### ӧ <> XXX ; # CYRILLIC SMALL LETTER O +### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O +### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O +### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O +### ќ <> XXX ; # CYRILLIC SMALL LETTER KA +### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA +### ӯ <> XXX ; # CYRILLIC SMALL LETTER U +### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U +### ў <> XXX ; # CYRILLIC SMALL LETTER U +### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U +### ӱ <> XXX ; # CYRILLIC SMALL LETTER U +### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U +### ӳ <> XXX ; # CYRILLIC SMALL LETTER U +### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U +### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE +### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE +### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU +### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU +### ӭ <> XXX ; # CYRILLIC SMALL LETTER E +### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E +### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA +### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA # Completeness $ignore = [[:Mark:]''] * ; diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt index a7a9629965..221e5d084e 100755 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt @@ -1,11 +1,7 @@ -???#-------------------------------------------------------------------- +#-------------------------------------------------------------------- # Copyright (c) 1999-2004, International Business Machines # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- -# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Greek_Latin.txt,v $ -# $Date: 2004/05/21 01:01:51 $ -# $Revision: 1.23 $ -#-------------------------------------------------------------------- # Rules are predicated on running NFD first, and NFC afterwards # :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ; @@ -16,13 +12,13 @@ # TEST CASES -# ????????????? ??????????????? ????????????? ??????????????? ?????????????????????? ??? ???????????????? -# ??? ??? ??? ??? ??? ??? -# ??? ??? ??? ??? ??? -# ????? ????? ????? ????? -# ????? ????? ????? ????? -# ????? ????? ????? ????? -# ???, ???, ???, ???, ???, ??? +# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος +# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ +# ᾳ ῃ ῳ ὃ ὄ +# ὠς ὡς ὢς ὣς +# Ὠς Ὡς Ὢς Ὣς +# ὨΣ ὩΣ ὪΣ ὫΣ +# Ạ, ạ, Ẹ, ẹ, Ọ, ọ # Useful variables @@ -39,8 +35,8 @@ $macron = \u0304 ; $ddot = \u0308 ; $ddotmac = [$ddot$macron]; -$lcgvowel = [??????????????] ; -$ucgvowel = [??????????????] ; +$lcgvowel = [αεηιουω] ; +$ucgvowel = [ΑΕΗΙΟΥΩ] ; $gvowel = [$lcgvowel $ucgvowel] ; $lcgvowelC = [$lcgvowel $accent] ; @@ -48,11 +44,11 @@ $evowel = [aeiouyAEIOUY]; $evowel2 = [iuyIUY]; $vowel = [ $evowel $gvowel] ; -$gammaLike = [??????????????????] ; +$gammaLike = [ΓΚΞΧγκξχϰ] ; $egammaLike = [GKXCgkxc] ; -$smooth = ?? ; -$rough = ?? ; -$iotasub = ?? ; +$smooth = ̓ ; +$rough = ̔ ; +$iotasub = ͅ ; $evowel_i = [$evowel-[iI]] ; $evowel2_i = [uyUY]; @@ -64,7 +60,7 @@ $beforeLetter = [[:M:]\']* [:L:] ; $beforeLower = $accent * $lower ; $notLetter = [^[:L:][:M:]] ; -$under = ??; +$under = ̱; # Fix punctuation # preserve original @@ -72,7 +68,7 @@ $under = ??; \? <> \? $under ; \; <> \? ; -?? <> \: ; +· <> \: ; # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve @@ -83,8 +79,8 @@ $under = ??; $accent_minus = [[$accent]-[$iotasub$macron]]; -?? } $accent_minus * $iotasub > | ?? $macron ; -?? } $accent_minus * $iotasub > | ?? $macron ; +Α } $accent_minus * $iotasub > | Α $macron ; +α } $accent_minus * $iotasub > | α $macron ; # now convert to uppercase if after uppercase, ow to lowercase @@ -100,23 +96,23 @@ $iotasub > i ; # Make A ` x = > H a x - ?? ($macron?) $rough } $beforeLower > H | ?? $1; - ?? $rough } $beforeLower > H | ??; - ?? $rough } $beforeLower > H | ?? ; - ?? ($ddot?) $rough } $beforeLower > H | ?? $1; - ?? $rough } $beforeLower > H | ?? ; - ?? $rough } $beforeLower > H | ?? ; - ?? ($ddot?) $rough } $beforeLower > H | ?? $1; + Α ($macron?) $rough } $beforeLower > H | α $1; + Ε $rough } $beforeLower > H | ε; + Η $rough } $beforeLower > H | η ; + Ι ($ddot?) $rough } $beforeLower > H | ι $1; + Ο $rough } $beforeLower > H | ο ; + Υ $rough } $beforeLower > H | υ ; + Ω ($ddot?) $rough } $beforeLower > H | ω $1; # Make A x ` = > H a x -?? ($glower $macron?) $rough > H | ?? $1 ; -?? ($glower) $rough > H | ?? $1 ; -?? ($glower) $rough > H | ?? $1 ; -?? ($glower $ddot?) $rough > H | ?? $1 ; -?? ($glower) $rough > H | ?? $1 ; -?? ($glower) $rough > H | ?? $1 ; -?? ($glower $ddot?) $rough > H | ?? $1 ; +Α ($glower $macron?) $rough > H | α $1 ; +Ε ($glower) $rough > H | ε $1 ; +Η ($glower) $rough > H | η $1 ; +Ι ($glower $ddot?) $rough > H | ι $1 ; +Ο ($glower) $rough > H | ο $1 ; +Υ ($glower) $rough > H | υ $1 ; +Ω ($glower $ddot?) $rough > H | ω $1 ; #Otherwise, make x ` into h x and X ` into H X @@ -176,120 +172,120 @@ $smooth > ; # remove now superfluous macron when returning -?? < A $macron ; -?? < a $macron ; +Α < A $macron ; +α < a $macron ; -?? <> e $macron ; -?? <> E $macron ; +η <> e $macron ; +Η <> E $macron ; -?? <> ph ; -?? } $beforeLower <> Ps ; -?? <> PS ; +φ <> ph ; +Ψ } $beforeLower <> Ps ; +Ψ <> PS ; -?? } $beforeLower <> Ph ; -?? <> PH ; -?? <> ps ; +Φ } $beforeLower <> Ph ; +Φ <> PH ; +ψ <> ps ; -?? <> o $macron ; -?? <> O $macron; +ω <> o $macron ; +Ω <> O $macron; # NORMAL -?? <> a ; -?? <> A ; +α <> a ; +Α <> A ; -?? <> b ; -?? <> B ; +β <> b ; +Β <> B ; -?? } $gammaLike <> n } $egammaLike ; -?? <> g ; -?? } $gammaLike <> N } $egammaLike ; -?? <> G ; +γ } $gammaLike <> n } $egammaLike ; +γ <> g ; +Γ } $gammaLike <> N } $egammaLike ; +Γ <> G ; -?? <> d ; -?? <> D ; +δ <> d ; +Δ <> D ; -?? <> e ; -?? <> E ; +ε <> e ; +Ε <> E ; -?? <> z ; -?? <> Z ; +ζ <> z ; +Ζ <> Z ; -?? <> th ; -?? } $beforeLower <> Th ; -?? <> TH ; +θ <> th ; +Θ } $beforeLower <> Th ; +Θ <> TH ; -?? <> i ; -?? <> I ; +ι <> i ; +Ι <> I ; -?? <> k ; -?? <> K ; +κ <> k ; +Κ <> K ; -?? <> l ; -?? <> L ; +λ <> l ; +Λ <> L ; -?? <> m ; -?? <> M ; +μ <> m ; +Μ <> M ; -?? } $gammaLike > n\' ; -?? <> n ; -?? } $gammaLike <> N\' ; -?? <> N ; +ν } $gammaLike > n\' ; +ν <> n ; +Ν } $gammaLike <> N\' ; +Ν <> N ; -?? <> x ; -?? <> X ; +ξ <> x ; +Ξ <> X ; -?? <> o ; -?? <> O ; +ο <> o ; +Ο <> O ; -?? <> p ; -?? <> P ; +π <> p ; +Π <> P ; -?? $rough <> rh; -?? $rough } $beforeLower <> Rh ; -?? $rough <> RH ; -?? <> r ; -?? <> R ; +ρ $rough <> rh; +Ρ $rough } $beforeLower <> Rh ; +Ρ $rough <> RH ; +ρ <> r ; +Ρ <> R ; # insert separator before things that turn into s -[Pp] { } [??????????????] > \' ; +[Pp] { } [ςσΣϷϸϺϻ] > \' ; # special S variants -?? <> S?? ; # ?? GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L -?? <> s?? ; #?? GREEK SMALL LETTER SHO Lowercase_Letter Grek - L -?? <> S?? ; # ?? GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L -?? <> s?? ; # ?? GREEK SMALL LETTER SAN Lowercase_Letter Grek - L +Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L # underbar means exception # before a letter, initial -?? } $beforeLetter <> s $underbar } $beforeLetter; -?? } $beforeLetter <> s } $beforeLetter; +ς } $beforeLetter <> s $underbar } $beforeLetter; +σ } $beforeLetter <> s } $beforeLetter; # otherwise, after a letter = final -$afterLetter { ?? <> $afterLetter { s $underbar; -$afterLetter { ?? <> $afterLetter { s ; +$afterLetter { σ <> $afterLetter { s $underbar; +$afterLetter { ς <> $afterLetter { s ; # otherwise (isolated) = initial -?? <> s $underbar; -?? <> s ; +ς <> s $underbar; +σ <> s ; -# [Pp] { ?? <> \'S ; -?? <> S ; +# [Pp] { Σ <> \'S ; +Σ <> S ; -?? <> t ; -?? <> T ; +τ <> t ; +Τ <> T ; -$vowel {?? } <> u ; -?? <> y ; -$vowel { ?? <> U ; -?? <> Y ; +$vowel {υ } <> u ; +υ <> y ; +$vowel { Υ <> U ; +Υ <> Y ; -?? <> ch ; -?? } $beforeLower <> Ch ; -?? <> CH ; +χ <> ch ; +Χ } $beforeLower <> Ch ; +Χ <> CH ; # Completeness for ASCII @@ -319,28 +315,28 @@ $rough <> h ; # Completeness for Greek -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; +ϐ > | β ; +ϑ > | θ ; +ϒ > | Υ ; +ϕ > | φ ; +ϖ > | π ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; -?? > | ??; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL -?? > j ; -?? > | ?? ; -?? > | ?? ; +ϰ > | κ ; +ϱ > | ρ ; +ϲ > | σ ; +Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL +ϳ > j ; +ϴ > | Θ ; +ϵ > | ε ; -?? > | ?? ; +µ > | μ ; - ?? > i; + ͺ > i; # delete any trailing ' marks used for roundtripping - < [????] { \' } [Ss] ; - < [????] { \' } $egammaLike ; + < [Ππ] { \' } [Ss] ; + < [Νν] { \' } $egammaLike ; ::NFC (NFD) ; # ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt index d286167490..f14bc236ce 100644 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt @@ -1,17 +1,13 @@ -???#-------------------------------------------------------------------- +#-------------------------------------------------------------------- # Copyright (c) 1999-2004, International Business Machines # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- -# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Greek_Latin_UNGEGN.txt,v $ -# $Date: 2004/05/21 01:01:50 $ -# $Revision: 1.4 $ -#-------------------------------------------------------------------- # For modern Greek, based on UNGEGN rules. # Rules are predicated on running NFD first, and NFC afterwards # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN # WARNING: need to add accents to both filters ### -# :: [????????;??????????????????????????????????????????????-????-????-????-????-????-????-????-????????-??????-??????????-??????-????-????-????-?????????????-??????-??????-??????-??????-??????-??????-??????-??????-??????-??????-??????-??????-??????-??????-??????-???????????????-??????-??????-?????????-??????-??????-??????-?????????-??????-??????-??????\u03F7-\u07FB\u03F9] ; +# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; :: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ; ::NFD (NFC) ; @@ -22,11 +18,11 @@ $lower = [[:latin:][:greek:] & [:Ll:]] ; $upper = [[:latin:][:greek:] & [:Lu:]] ; $accent = [[:Mn:][:Me:]] ; -$macron = ?? ; -$ddot = ?? ; +$macron = ̄ ; +$ddot = ̈ ; -$lcgvowel = [??????????????] ; -$ucgvowel = [??????????????] ; +$lcgvowel = [αεηιουω] ; +$ucgvowel = [ΑΕΗΙΟΥΩ] ; $gvowel = [$lcgvowel $ucgvowel] ; $lcgvowelC = [$lcgvowel $accent] ; @@ -35,17 +31,17 @@ $vowel = [ $evowel $gvowel] ; $beforeLower = $accent * $lower ; -$gammaLike = [??????????????????] ; +$gammaLike = [ΓΚΞΧγκξχϰ] ; $egammaLike = [GKXCgkxc] ; -$smooth = ?? ; -$rough = ?? ; -$iotasub = ?? ; +$smooth = ̓ ; +$rough = ̔ ; +$iotasub = ͅ ; -$softener = [????????????????????????????????$gvowel] ; +$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; -$under = ??; +$under = ̱; -$caron = ??; +$caron = ̌; $afterLetter = [:L:] [\'$accent]* ; $beforeLetter = [\'$accent]* [:L:] ; @@ -57,154 +53,154 @@ $beforeLetter = [\'$accent]* [:L:] ; \? <> \? $under ; \; <> \? ; -?? <> \: ; +· <> \: ; # Fix any ancient characters that creep in -?? > ?? ; -?? > ?? ; -?? > ?? ; +͂ > ́ ; +̂ > ́ ; +̀ > ́ ; $smooth > ; $rough > ; $iotasub > ; -?? > ; +ͺ > ; # need to have these up here so the rules don't mask -?? <> i $under ; -?? <> I $under ; +η <> i $under ; +Η <> I $under ; -?? } $beforeLower <> Ps ; -?? <> PS ; -?? <> ps ; +Ψ } $beforeLower <> Ps ; +Ψ <> PS ; +ψ <> ps ; -?? <> o $under ; -?? <> O $under; +ω <> o $under ; +Ω <> O $under; # at begining or end of word, convert mp to b -[^[:L:]$accent] { ???? > b ; -???? } [^[:L:]$accent] > b ; -[^[:L:]$accent] { [????][????] > B ; -[????][????] } [^[:L:]$accent] > B ; +[^[:L:]$accent] { μπ > b ; +μπ } [^[:L:]$accent] > b ; +[^[:L:]$accent] { [Μμ][Ππ] > B ; +[Μμ][Ππ] } [^[:L:]$accent] > B ; -???? < b ; -???? < B } $beforeLower ; -???? < B ; +μπ < b ; +Μπ < B } $beforeLower ; +ΜΠ < B ; # handle diphthongs ending with upsilon -???? <> ou ; -???? <> OU ; -???? <> Ou ; -???? <> oU ; +ου <> ou ; +ΟΥ <> OU ; +Ου <> Ou ; +οΥ <> oU ; $fmaker = [aeiAEI] $under ? ; $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate -$fmaker { ?? ( $shiftForwardVowels )* } $softener > $1 v $under ; -?? $1 < ( $shiftForwardVowels )* v $under ; +$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ; +υ $1 < ( $shiftForwardVowels )* v $under ; -$fmaker { ?? ( $shiftForwardVowels )* } > $1 f $under; -?? $1 < ( $shiftForwardVowels )* f $under ; +$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under; +υ $1 < ( $shiftForwardVowels )* f $under ; -$fmaker { ?? } $softener <> V $under ; -$fmaker { ?? <> U $under ; +$fmaker { Υ } $softener <> V $under ; +$fmaker { Υ <> U $under ; -?? <> y ; -?? <> Y ; +υ <> y ; +Υ <> Y ; # NORMAL -?? <> a ; -?? <> A ; +α <> a ; +Α <> A ; -?? <> v ; -?? <> V ; +β <> v ; +Β <> V ; -?? } $gammaLike <> n } $egammaLike ; -?? <> g ; -?? } $gammaLike <> N } $egammaLike ; -?? <> G ; +γ } $gammaLike <> n } $egammaLike ; +γ <> g ; +Γ } $gammaLike <> N } $egammaLike ; +Γ <> G ; -?? <> d ; -?? <> D ; +δ <> d ; +Δ <> D ; -?? <> e ; -?? <> E ; +ε <> e ; +Ε <> E ; -?? <> z ; -?? <> Z ; +ζ <> z ; +Ζ <> Z ; -?? <> th ; -?? } $beforeLower <> Th ; -?? <> TH ; +θ <> th ; +Θ } $beforeLower <> Th ; +Θ <> TH ; -?? <> i ; -?? <> I ; +ι <> i ; +Ι <> I ; -?? <> k ; -?? <> K ; +κ <> k ; +Κ <> K ; -?? <> l ; -?? <> L ; +λ <> l ; +Λ <> L ; -?? <> m ; -?? <> M ; +μ <> m ; +Μ <> M ; -?? } $gammaLike > n\' ; -?? <> n ; -?? } $gammaLike <> N\' ; -?? <> N ; +ν } $gammaLike > n\' ; +ν <> n ; +Ν } $gammaLike <> N\' ; +Ν <> N ; -?? <> x ; -?? <> X ; +ξ <> x ; +Ξ <> X ; -?? <> o ; -?? <> O ; +ο <> o ; +Ο <> O ; -?? <> p ; -?? <> P ; +π <> p ; +Π <> P ; -?? <> r ; -?? <> R ; +ρ <> r ; +Ρ <> R ; # insert separator before things that turn into s -[Pp] { } [??????????????] > \' ; +[Pp] { } [ςσΣϷϸϺϻ] > \' ; # special S variants -?? <> S?? ; # ?? GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L -?? <> s?? ; #?? GREEK SMALL LETTER SHO Lowercase_Letter Grek - L -?? <> S?? ; # ?? GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L -?? <> s?? ; # ?? GREEK SMALL LETTER SAN Lowercase_Letter Grek - L +Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L # Caron means exception # before a letter, initial -?? } $beforeLetter <> s $under } $beforeLetter; -?? } $beforeLetter <> s } $beforeLetter; +ς } $beforeLetter <> s $under } $beforeLetter; +σ } $beforeLetter <> s } $beforeLetter; # otherwise, after a letter = final -$afterLetter { ?? <> $afterLetter { s $under; -$afterLetter { ?? <> $afterLetter { s ; +$afterLetter { σ <> $afterLetter { s $under; +$afterLetter { ς <> $afterLetter { s ; # otherwise (isolated) = initial -?? <> s $under; -?? <> s ; +ς <> s $under; +σ <> s ; -# [Pp] { ?? <> \'S ; -?? <> S ; +# [Pp] { Σ <> \'S ; +Σ <> S ; -?? <> t ; -?? <> T ; +τ <> t ; +Τ <> T ; -?? <> f ; -?? <> F ; +φ <> f ; +Φ <> F ; -?? <> ch ; -?? } $beforeLower <> Ch ; -?? <> CH ; +χ <> ch ; +Χ } $beforeLower <> Ch ; +Χ <> CH ; # Completeness for ASCII @@ -230,25 +226,25 @@ $afterLetter { ?? <> $afterLetter { s ; # Completeness for Greek -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; +ϐ > | β ; +ϑ > | θ ; +ϒ > | Υ ; +ϕ > | φ ; +ϖ > | π ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; -?? > | ??; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL -?? > j ; -?? > | ?? ; -?? > | ?? ; -?? > | ?? ; +ϰ > | κ ; +ϱ > | ρ ; +ϲ > | σ ; +Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL +ϳ > j ; +ϴ > | Θ ; +ϵ > | ε ; +µ > | μ ; # delete any trailing ' marks used for roundtripping - < [????] { \' } [Ss] ; - < [????] { \' } $egammaLike ; + < [Ππ] { \' } [Ss] ; + < [Νν] { \' } $egammaLike ; ::NFC (NFD) ; diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Hebrew_Latin.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Hebrew_Latin.txt index 3f9b70715b..edfb5a08b1 100644 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Hebrew_Latin.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Hebrew_Latin.txt @@ -1,11 +1,7 @@ -???#-------------------------------------------------------------------- +#-------------------------------------------------------------------- # Copyright (c) 1999-2004, International Business Machines # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- -# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Hebrew_Latin.txt,v $ -# $Date: 2004/05/21 01:01:51 $ -# $Revision: 1.6 $ -#-------------------------------------------------------------------- # Transliteration table for Hebrew # Based on the UNGEGN table at: @@ -27,74 +23,74 @@ # For more information, see" # http://oss.software.ibm.com/icu/userguide/Transliteration.html -:: [[:Hebrew:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138??\u05BF] - [\u05BD]] ; +:: [[:Hebrew:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ; :: nfkd (nfc) ; $letterAfter = [:M:]* [:L:] ; # move longer items here to avoid masking -?? <> h?? ; -?? <> z?? } $letterAfter; -?? <> z?? ; -?? <> s?? ; -?? <> t?? ; +ח <> ẖ ; +צ <> ẕ } $letterAfter; +ץ <> ẕ ; +ש <> ş ; +ת <> ţ ; -?? <> ?? ; -?? <> b ; -?? <> g ; -?? <> d ; -?? <> h ; -?? <> w ; -?? <> z ; -?? <> t ; -?? <> y ; -?? <> k } $letterAfter; -?? <> k ; -?? <> l ; -?? <> m } $letterAfter; -?? <> m ; -?? <> n } $letterAfter; -?? <> n ; -?? <> s ; -?? <> ?? ; -?? <> p } $letterAfter; -?? <> p ; -?? <> q ; -?? <> r ; +א <> ʼ ; +ב <> b ; +ג <> g ; +ד <> d ; +ה <> h ; +ו <> w ; +ז <> z ; +ט <> t ; +י <> y ; +כ <> k } $letterAfter; +ך <> k ; +ל <> l ; +מ <> m } $letterAfter; +ם <> m ; +נ <> n } $letterAfter; +ן <> n ; +ס <> s ; +ע <> ʻ ; +פ <> p } $letterAfter; +ף <> p ; +ק <> q ; +ר <> r ; - ?? > | ????; # HEBREW LIGATURE YIDDISH DOUBLE VAV - ?? > | ????; # HEBREW LIGATURE YIDDISH VAV YOD - ?? > | ???? ; # HEBREW LIGATURE YIDDISH DOUBLE YOD + װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV + ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD + ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD -?? <> ?? ; # dagesh just goes to overdot for now -?? <> ?? ; # shin dot -> sh -?? <> ?? ; # sin dot -> s +ּ <> ̇ ; # dagesh just goes to overdot for now +ׁ <> ̌ ; # shin dot -> sh +ׂ <> ̂ ; # sin dot -> s # points -?? <> a?? ; -?? <> a?? ; -?? <> e?? ; -?? <> e?? ; -?? <> e?? ; -?? <> i ; -?? <> o?? ; -?? <> u ; +ֲ <> à ; +ָ <> á ; +ֱ <> è ; +ֵ <> é ; +ְ <> ê ; +ִ <> i ; +ֹ <> ò ; +ֻ <> u ; -?? <> a ; -?? <> e ; -?? <> o ; +ַ <> a ; +ֶ <> e ; +ֳ <> o ; -\u05BF <> ?? ; +\u05BF <> ̄ ; # fallbacks -?? < c ; -?? < f } $letterAfter; -?? < f ; -?? < j ; -?? < v ; -???? < x ; +ק < c ; +פ < f } $letterAfter; +ף < f ; +ז < j ; +ו < v ; +כס < x ; :: (lower); :: nfc (nfd) ; -:: ([[:Latin:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ?? ]]); \ No newline at end of file +:: ([[:Latin:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]); \ No newline at end of file