// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2002, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpicurules.bat // Source: ../../../impl/data/Transliterator_Cyrillic_Latin.txt // Date: Sat Jul 27 10:31:01 2002 //-------------------------------------------------------------------- // Cyrillic_Latin t_Cyrl_Latn { Rule { //-------------------------------------------------------------------- //-------------------------------------------------------------------- //-------------------------------------------------------------------- // TODO: add remaining characters // Should add variants for Russian-English, Russian-German // Those can use this as a base, and then remap cases // like a $hat to ya or ja. // :: [\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ; //## WARNING, \u0308 must be added to the generated filters, in both directions ### // MINIMAL FILTER ":: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;" ":: NFD (NFC) ;" "$modprime = \u02B9;" "$modprime2 = \u02BA;" "$grave = \u0300;" "$acute = \u0301;" "$hat = \u0302;" "$breve = \u0306 ;" "$dot = \u0307 ;" "$caron = \u030C ;" "$comma = \u0326 ;" "$under = \u0331 ;" // move up so not masked "я <> a $hat ;" // CYRILLIC SMALL LETTER YA "Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA "ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE "Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE // ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER // Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER // ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE // Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE // ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE // Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE "э <> e $acute;" // CYRILLIC SMALL LETTER E "Э <> E $acute;" // CYRILLIC CAPITAL LETTER E "є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE "Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE "ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA "Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA "щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA "Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA "ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE "Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE // ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE // Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE "ю <> u $hat ;" // CYRILLIC SMALL LETTER YU "Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU "і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I "І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I "ј <> j $caron;" // CYRILLIC SMALL LETTER JE "Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE "љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE "Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE "њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE "Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE "ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE "Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE "џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE "Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE // Normal order "а <> a ;" // CYRILLIC SMALL LETTER A "А <> A ;" // CYRILLIC CAPITAL LETTER A "ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA "Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA "ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE "Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE "б <> b ;" // CYRILLIC SMALL LETTER BE "Б <> B ;" // CYRILLIC CAPITAL LETTER BE "в <> v ;" // CYRILLIC SMALL LETTER VE "В <> V ;" // CYRILLIC CAPITAL LETTER VE "ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN "Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN "ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE "Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE "ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK "Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK "г <> g ;" // CYRILLIC SMALL LETTER GHE "Г <> G ;" // CYRILLIC CAPITAL LETTER GHE "д <> d;" // CYRILLIC SMALL LETTER DE "Д <> D;" // CYRILLIC CAPITAL LETTER DE "ђ <> đ ;" // CYRILLIC SMALL LETTER DJE "Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE "ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER "Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER "е <> e ;" // CYRILLIC SMALL LETTER IE "Е <> E;" // CYRILLIC CAPITAL LETTER IE "ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE "Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE // җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER // Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER "з <> z ;" // CYRILLIC SMALL LETTER ZE "З <> Z;" // CYRILLIC CAPITAL LETTER ZE "й <> j ;" // CYRILLIC SMALL LETTER I "Й <> J ;" // CYRILLIC CAPITAL LETTER I "и <> i ;" // CYRILLIC SMALL LETTER I "И <> I ;" // CYRILLIC CAPITAL LETTER I "к <> k ;" // CYRILLIC SMALL LETTER KA "К <> K;" // CYRILLIC CAPITAL LETTER KA // қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER // Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER // ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK // Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK // ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA // Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA // ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE // Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE // ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE // Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE "л <> l ;" // CYRILLIC SMALL LETTER EL "Л <> L;" // CYRILLIC CAPITAL LETTER EL "м <> m ;" // CYRILLIC SMALL LETTER EM "М <> M ;" // CYRILLIC CAPITAL LETTER EM "н <> n ;" // CYRILLIC SMALL LETTER EN "Н <> N;" // CYRILLIC CAPITAL LETTER EN // ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER // Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER // ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK // Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK // ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE // Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE "о <> o ;" // CYRILLIC SMALL LETTER O "О <> O ;" // CYRILLIC CAPITAL LETTER O // ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O // Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O "п <> p ;" // CYRILLIC SMALL LETTER PE "П <> P ;" // CYRILLIC CAPITAL LETTER PE // ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK // Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK // ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA // Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA "р <> r ;" // CYRILLIC SMALL LETTER ER "Р <> R ;" // CYRILLIC CAPITAL LETTER ER // ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK // Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK "с <> s ;" // CYRILLIC SMALL LETTER ES "С <> S ;" // CYRILLIC CAPITAL LETTER ES // ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER // Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER "т <> t ;" // CYRILLIC SMALL LETTER TE "Т <> T ;" // CYRILLIC CAPITAL LETTER TE // ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER // Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER "у <> u ;" // CYRILLIC SMALL LETTER U "У <> U ;" // CYRILLIC CAPITAL LETTER U // ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U // Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U // ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE // Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE // ѹ <> XXX ; # CYRILLIC SMALL LETTER UK // Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK "ф <> f ;" // CYRILLIC SMALL LETTER EF "Ф <> F ;" // CYRILLIC CAPITAL LETTER EF "х <> h ;" // CYRILLIC SMALL LETTER HA "Х <> H;" // CYRILLIC CAPITAL LETTER HA // ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER // Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER // һ <> XXX ; # CYRILLIC SMALL LETTER SHHA // Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA // ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA // Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA // ѿ <> XXX ; # CYRILLIC SMALL LETTER OT // Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT // ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO // Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO // ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA // Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA "ц <> c ;" // CYRILLIC SMALL LETTER TSE "Ц <> C;" // CYRILLIC CAPITAL LETTER TSE // ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE // Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE // ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE // Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE // ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER // Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER "Ъ <> $modprime2 $under ;" // CYRILLIC CAPITAL LETTER HARD SIGN "ъ <> $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN "Ь <> $modprime $under ;" // CYRILLIC CAPITAL LETTER SOFT SIGN "ь <> $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN "ы <> y ;" // CYRILLIC SMALL LETTER YERU "Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU // ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN // Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN // ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT // Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT // ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E // Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E // ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS // Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS // ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS // Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS // ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS // Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS // ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS // Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS // ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI // Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI // ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI // Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI // ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA // Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA // ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA // Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA // ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA // Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA // Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA //## ӑ <> XXX ; # CYRILLIC SMALL LETTER A //## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A //## ӓ <> XXX ; # CYRILLIC SMALL LETTER A //## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A //## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA //## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA //## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE //## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE //## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE //## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE //## ё <> XXX ; # CYRILLIC SMALL LETTER IE //## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE //## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE //## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE //## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE //## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE //## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE //## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE //## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE //## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE //## ѝ <> XXX ; # CYRILLIC SMALL LETTER I //## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I //## ӣ <> XXX ; # CYRILLIC SMALL LETTER I //## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I //## ӥ <> XXX ; # CYRILLIC SMALL LETTER I //## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I //## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I //## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I //## ӧ <> XXX ; # CYRILLIC SMALL LETTER O //## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O //## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O //## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O //## ќ <> XXX ; # CYRILLIC SMALL LETTER KA //## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA //## ӯ <> XXX ; # CYRILLIC SMALL LETTER U //## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U //## ў <> XXX ; # CYRILLIC SMALL LETTER U //## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U //## ӱ <> XXX ; # CYRILLIC SMALL LETTER U //## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U //## ӳ <> XXX ; # CYRILLIC SMALL LETTER U //## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U //## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE //## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE //## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU //## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU //## ӭ <> XXX ; # CYRILLIC SMALL LETTER E //## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E //## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA //## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA // Completeness "$ignore = [[:Mark:]''] * ;" "| k < q ;" "| K < Q ;" "| u < w ;" "| U < W ;" "| KS < X } $ignore [:UppercaseLetter:] ;" "| KS < [:UppercaseLetter:] $ignore { X ;" "| Ks < X ;" "| ks < x ;" ":: NFC (NFD) ;" // note: a global filter is more efficient, but MUST include all source chars!! // :: ([\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]); // MINIMAL FILTER: Latin-Cyrillic ":: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;" } }