307 lines
13 KiB
Plaintext
307 lines
13 KiB
Plaintext
|
#--------------------------------------------------------------------
|
|||
|
# Copyright (c) 1999-2004, International Business Machines
|
|||
|
# Corporation and others. All Rights Reserved.
|
|||
|
#--------------------------------------------------------------------
|
|||
|
# TODO: add remaining characters
|
|||
|
# Should add variants for Russian-English, Russian-German
|
|||
|
# Those can use this as a base, and then remap cases
|
|||
|
# like a $hat to ya or ja.
|
|||
|
|
|||
|
# :: [\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
|
|||
|
### WARNING, \u0308 must be added to the generated filters, in both directions ###
|
|||
|
# MINIMAL FILTER
|
|||
|
:: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;
|
|||
|
:: NFD (NFC) ;
|
|||
|
|
|||
|
$modprime = \u02B9;
|
|||
|
$modprime2 = \u02BA;
|
|||
|
|
|||
|
$grave = \u0300;
|
|||
|
$acute = \u0301;
|
|||
|
$hat = \u0302;
|
|||
|
$breve = \u0306 ;
|
|||
|
$dot = \u0307 ;
|
|||
|
$caron = \u030C ;
|
|||
|
$comma = \u0326 ;
|
|||
|
$under = \u0331 ;
|
|||
|
|
|||
|
# move up so not masked
|
|||
|
|
|||
|
я <> a $hat ; # CYRILLIC SMALL LETTER YA
|
|||
|
Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA
|
|||
|
|
|||
|
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
|
|||
|
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
|
|||
|
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
|||
|
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
|||
|
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
|||
|
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
|||
|
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
|||
|
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
|||
|
|
|||
|
э <> e $acute; # CYRILLIC SMALL LETTER E
|
|||
|
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
|
|||
|
є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE
|
|||
|
Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
|||
|
|
|||
|
ш <> s $caron ; # CYRILLIC SMALL LETTER SHA
|
|||
|
Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA
|
|||
|
щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA
|
|||
|
Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA
|
|||
|
|
|||
|
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
|
|||
|
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
|
|||
|
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
|||
|
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
|||
|
|
|||
|
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
|
|||
|
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
|
|||
|
|
|||
|
і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
|||
|
І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
|||
|
ј <> j $caron; # CYRILLIC SMALL LETTER JE
|
|||
|
Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE
|
|||
|
|
|||
|
љ <> l $hat ; # CYRILLIC SMALL LETTER LJE
|
|||
|
Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE
|
|||
|
њ <> n $hat ; # CYRILLIC SMALL LETTER NJE
|
|||
|
Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE
|
|||
|
|
|||
|
ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE
|
|||
|
Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE
|
|||
|
|
|||
|
џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE
|
|||
|
Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE
|
|||
|
|
|||
|
# Normal order
|
|||
|
|
|||
|
а <> a ; # CYRILLIC SMALL LETTER A
|
|||
|
А <> A ; # CYRILLIC CAPITAL LETTER A
|
|||
|
ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA
|
|||
|
Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA
|
|||
|
ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE
|
|||
|
Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE
|
|||
|
б <> b ; # CYRILLIC SMALL LETTER BE
|
|||
|
Б <> B ; # CYRILLIC CAPITAL LETTER BE
|
|||
|
в <> v ; # CYRILLIC SMALL LETTER VE
|
|||
|
В <> V ; # CYRILLIC CAPITAL LETTER VE
|
|||
|
|
|||
|
ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
|||
|
Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
|||
|
ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE
|
|||
|
Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
|||
|
ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
|
|||
|
Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
|||
|
г <> g ; # CYRILLIC SMALL LETTER GHE
|
|||
|
Г <> G ; # CYRILLIC CAPITAL LETTER GHE
|
|||
|
|
|||
|
д <> d; # CYRILLIC SMALL LETTER DE
|
|||
|
Д <> D; # CYRILLIC CAPITAL LETTER DE
|
|||
|
ђ <> đ ; # CYRILLIC SMALL LETTER DJE
|
|||
|
Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE
|
|||
|
ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER
|
|||
|
Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
|||
|
е <> e ; # CYRILLIC SMALL LETTER IE
|
|||
|
Е <> E; # CYRILLIC CAPITAL LETTER IE
|
|||
|
|
|||
|
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
|
|||
|
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
|
|||
|
|
|||
|
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
|||
|
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
|||
|
|
|||
|
з <> z ; # CYRILLIC SMALL LETTER ZE
|
|||
|
З <> Z; # CYRILLIC CAPITAL LETTER ZE
|
|||
|
|
|||
|
й <> j ; # CYRILLIC SMALL LETTER I
|
|||
|
Й <> J ; # CYRILLIC CAPITAL LETTER I
|
|||
|
и <> i ; # CYRILLIC SMALL LETTER I
|
|||
|
И <> I ; # CYRILLIC CAPITAL LETTER I
|
|||
|
|
|||
|
к <> k ; # CYRILLIC SMALL LETTER KA
|
|||
|
К <> K; # CYRILLIC CAPITAL LETTER KA
|
|||
|
|
|||
|
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
|||
|
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
|||
|
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
|||
|
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
|||
|
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
|||
|
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
|||
|
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
|||
|
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
|||
|
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
|||
|
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
|||
|
л <> l ; # CYRILLIC SMALL LETTER EL
|
|||
|
Л <> L; # CYRILLIC CAPITAL LETTER EL
|
|||
|
|
|||
|
м <> m ; # CYRILLIC SMALL LETTER EM
|
|||
|
М <> M ; # CYRILLIC CAPITAL LETTER EM
|
|||
|
н <> n ; # CYRILLIC SMALL LETTER EN
|
|||
|
Н <> N; # CYRILLIC CAPITAL LETTER EN
|
|||
|
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
|||
|
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
|||
|
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
|||
|
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
|||
|
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
|||
|
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
|||
|
|
|||
|
о <> o ; # CYRILLIC SMALL LETTER O
|
|||
|
О <> O ; # CYRILLIC CAPITAL LETTER O
|
|||
|
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
|||
|
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
|||
|
п <> p ; # CYRILLIC SMALL LETTER PE
|
|||
|
П <> P ; # CYRILLIC CAPITAL LETTER PE
|
|||
|
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
|||
|
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
|||
|
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
|||
|
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
|||
|
р <> r ; # CYRILLIC SMALL LETTER ER
|
|||
|
Р <> R ; # CYRILLIC CAPITAL LETTER ER
|
|||
|
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
|||
|
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
|||
|
с <> s ; # CYRILLIC SMALL LETTER ES
|
|||
|
С <> S ; # CYRILLIC CAPITAL LETTER ES
|
|||
|
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
|||
|
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
|||
|
т <> t ; # CYRILLIC SMALL LETTER TE
|
|||
|
Т <> T ; # CYRILLIC CAPITAL LETTER TE
|
|||
|
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
|||
|
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
|||
|
|
|||
|
у <> u ; # CYRILLIC SMALL LETTER U
|
|||
|
У <> U ; # CYRILLIC CAPITAL LETTER U
|
|||
|
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
|||
|
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
|||
|
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
|||
|
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
|||
|
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
|||
|
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
|||
|
ф <> f ; # CYRILLIC SMALL LETTER EF
|
|||
|
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
|
|||
|
х <> h ; # CYRILLIC SMALL LETTER HA
|
|||
|
Х <> H; # CYRILLIC CAPITAL LETTER HA
|
|||
|
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
|||
|
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
|||
|
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
|||
|
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
|||
|
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
|||
|
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
|||
|
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
|||
|
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
|||
|
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
|||
|
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
|||
|
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
|||
|
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
|||
|
ц <> c ; # CYRILLIC SMALL LETTER TSE
|
|||
|
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
|
|||
|
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
|||
|
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
|||
|
|
|||
|
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
|||
|
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
|||
|
# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
|
|||
|
# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
|||
|
|
|||
|
|
|||
|
Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN
|
|||
|
ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN
|
|||
|
Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN
|
|||
|
ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN
|
|||
|
|
|||
|
ы <> y ; # CYRILLIC SMALL LETTER YERU
|
|||
|
Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU
|
|||
|
|
|||
|
# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
|
|||
|
# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
|||
|
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
|||
|
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
|||
|
|
|||
|
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
|||
|
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
|||
|
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
|||
|
# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
|||
|
# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
|
|||
|
# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
|
|||
|
# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
|
|||
|
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
|||
|
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
|||
|
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
|||
|
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
|||
|
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
|||
|
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
|||
|
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
|||
|
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
|||
|
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
|||
|
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
|||
|
# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
|||
|
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
|||
|
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
|||
|
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
|||
|
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
|||
|
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
|||
|
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
|||
|
### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
|||
|
### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
|
|||
|
### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
|
|||
|
### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
|
|||
|
### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
|
|||
|
### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
|
|||
|
### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
|||
|
### ё <> XXX ; # CYRILLIC SMALL LETTER IE
|
|||
|
### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
|||
|
### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
|
|||
|
### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
|||
|
### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
|||
|
### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
|||
|
### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
|||
|
### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
|||
|
### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
|
|||
|
### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
|
|||
|
### ѝ <> XXX ; # CYRILLIC SMALL LETTER I
|
|||
|
### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
|||
|
### ӣ <> XXX ; # CYRILLIC SMALL LETTER I
|
|||
|
### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
|||
|
### ӥ <> XXX ; # CYRILLIC SMALL LETTER I
|
|||
|
### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
|||
|
### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
|||
|
### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
|||
|
### ӧ <> XXX ; # CYRILLIC SMALL LETTER O
|
|||
|
### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
|
|||
|
### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
|||
|
### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
|||
|
### ќ <> XXX ; # CYRILLIC SMALL LETTER KA
|
|||
|
### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
|
|||
|
### ӯ <> XXX ; # CYRILLIC SMALL LETTER U
|
|||
|
### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
|||
|
### ў <> XXX ; # CYRILLIC SMALL LETTER U
|
|||
|
### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
|
|||
|
### ӱ <> XXX ; # CYRILLIC SMALL LETTER U
|
|||
|
### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
|||
|
### ӳ <> XXX ; # CYRILLIC SMALL LETTER U
|
|||
|
### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
|||
|
### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
|
|||
|
### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
|
|||
|
### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
|
|||
|
### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
|
|||
|
### ӭ <> XXX ; # CYRILLIC SMALL LETTER E
|
|||
|
### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
|
|||
|
### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
|||
|
### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
|||
|
|
|||
|
# Completeness
|
|||
|
$ignore = [[:Mark:]''] * ;
|
|||
|
| k < q ;
|
|||
|
| K < Q ;
|
|||
|
| u < w ;
|
|||
|
| U < W ;
|
|||
|
| KS < X } $ignore [:UppercaseLetter:] ;
|
|||
|
| KS < [:UppercaseLetter:] $ignore { X ;
|
|||
|
| Ks < X ;
|
|||
|
| ks < x ;
|
|||
|
|
|||
|
:: NFC (NFD) ;
|
|||
|
# note: a global filter is more efficient, but MUST include all source chars!!
|
|||
|
# :: ([\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
|
|||
|
# MINIMAL FILTER: Latin-Cyrillic
|
|||
|
:: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;
|