scuffed-code/icu4c/source/data/translit/Latin_InterIndic.txt
2004-08-02 20:06:55 +00:00

384 lines
8.9 KiB
Plaintext

#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Latin-InterIndic
#:: NFD;
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$vva=\ue081;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
$om = \ue050; # OM
# \u0951>; # UNMAPPED STRESS SIGN UDATTA
# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
# \u0953>; # UNMAPPED GRAVE ACCENT
# \u0954>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# For all other scripts
$ecp0=\ue070;
$ecp1=\ue071;
$ecp2=\ue072;
$ecp3=\ue073;
$ecp4=\ue074;
$ecp5=\ue075;
$ecp6=\ue076;
$ecp7=\ue077;
$ecp8=\ue078;
$ecp9=\ue079;
$ecpA=\ue07a;
$ecpB=\ue07b;
$ecpC=\ue07c;
$ecpD=\ue07d;
$ecpE=\ue07e;
$ecpF=\ue07f;
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
$endThing=[$danda$doubleDanda];
# $x was originally called '&'; $z was '%'
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
\u0315 > $avagraha;
\u0303>$chandrabindu$anusvara;
m\u0310>$chandrabindu;
h\u0323>$visarga;
x>$ka$virama$sa;
# convert to independent forms at start of word or syllable:
# dependent forms for roundtrip
\u0314a\u0304>$aa;
\u0314ai>$ai;
\u0314au>$au;
\u0314ii>$ii;
\u0314i\u0304>$ii;
\u0314i>$i;
\u0314u\u0304>$uu;
\u0314u>$u;
\u0314r\u0325\u0304>$rrh;
\u0314r\u0325>$rh;
\u0314l\u0325\u0304>$llh;
\u0314lh>$lh;
\u0314l\u0325>$lh;
\u0314e\u0304>$e;
\u0314o\u0304>$o;
\u0314a>;
\u0314e\u0306>$ce;
\u0314o\u0306>$co;
\u0314e>$se;
\u0314o>$so;
# preceeded by consonants
$consonants{ a\u0304>$aa;
$consonants{ ai>$ai;
$consonants{ au>$au;
$consonants{ ii>$ii;
$consonants{ i\u0304>$ii;
$consonants{ i>$i;
$consonants{ u\u0304>$uu;
$consonants{ u>$u;
$consonants{ r\u0325\u0304>$rrh;
$consonants{ r\u0325a>$rh;
$consonants{ r\u0325>$rh;
$consonants{ l\u0325\u0304>$llh;
$consonants{ lh>$lh;
$consonants{ l\u0325>$lh;
$consonants{ e\u0304>$e;
$consonants{ o\u0304>$o;
$consonants{ e\u0306>$ce;
$consonants{ o\u0306>$co;
$consonants{ e>$se;
$consonants{ o>$so;
# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
a\u0304>$waa;
ai>$wai;
au>$wau;
i\u0304>$wii;
i>$wi;
u\u0304>$wuu;
u>$wu;
r\u0325\u0304>$wrr;
r\u0325>$wr;
l\u0325\u0304>$wll;
lh>$wl;
l\u0325>$wl;
e\u0304>$we;
o\u0304>$wo;
a>$wa;
e\u0306>$wce;
o\u0306>$wco;
e>$wse;
''om>$om;
o>$wso;
# rules for anusvara
n}r\u0325 > $na|$virama;
n}l\u0325 > $na|$virama;
n}na > $na|$virama;
n\u0307}[kg] > $anusvara;
n\u0307}n\u0307 > $anusvara;
n\u0304}[cj] > $anusvara;
n\u0304}n\u0303 > $anusvara;
n\u0323}[tdn]\u0323 > $anusvara;
n}[tdn] > $anusvara;
m}[pbm] > $anusvara;
n}[ylvshr] > $anusvara;
m\u0307 > $anusvara;
#urdu compatibility
q>$uka|$virama;
k\u0331h\u0331>$ukha |$virama;
g\u0307> $ugha | $virama;
z > $ujha |$virama;
f > $ufa|$virama;
# dev
y\u0307>$uya|$virama;
l\u0331>$ela|$virama;
n\u0331>$ena|$virama;
n\u0307>$nga|$virama;
n\u0303>$nya|$virama;
n\u0323>$nna|$virama;
t\u0323h>$ttha|$virama;
t\u0323>$tta|$virama;
r\u0323h>$udha|$virama;
r\u0323>$uddha|$virama;
d\u0323h>$ddha|$virama;
d\u0323>$dda|$virama;
kh>$kha|$virama;
k>$ka|$virama;
gh>$gha|$virama;
g>$ga|$virama;
ch>$cha|$virama;
c>$ca|$virama;
jh>$jha|$virama;
j>$ja|$virama;
ny>$nya|$virama;
tth>$ttha|$virama;
ddh>$ddha|$virama;
th>$tha|$virama;
t>$ta|$virama;
dh>$dha|$virama;
d>$da|$virama;
n>$na|$virama;
ph>$pha|$virama;
p>$pa|$virama;
bh>$bha|$virama;
b>$ba|$virama;
m>$ma|$virama;
y>$ya|$virama;
r\u0331>$rra|$virama;
r>$ra|$virama;
l\u0323>$lla|$virama;
l>$la|$virama;
v>$va|$virama;
w\u0307>$vva|$virama;
w>$va|$virama;
sh>$sha|$virama;
ss>$ssa|$virama;
s\u0323>$ssa|$virama;
s\u0301>$sha|$virama;
s>$sa|$virama;
h>$ha|$virama;
'.'>$danda;
$danda'.'>$doubleDanda;
$depVowelAbove{'~'>$anusvara;
$depVowelBelow{'~'>$chandrabindu;
# convert to dependent forms after consonant with no vowel:
# e.g. kai -> {ka}{virama}ai -> {ka}{ai}
#$virama aa>$aa;
$virama a\u0304>$aa;
$virama ai>$ai;
$virama au>$au;
$virama ii>$ii;
$virama i\u0304>$ii;
$virama i>$i;
#$virama uu>$uu;
$virama u\u0304>$uu;
$virama u>$u;
#$virama rrh>$rrh;
$virama r\u0325\u0304>$rrh;
#$virama rh>$rh;
$virama r\u0325a>$rh;
$virama r\u0325>$rh;
$virama l\u0325\u0304>$llh;
$virama lh>$lh;
$virama l\u0325>$lh;
$virama e\u0304>$e;
$virama o\u0304>$o;
$virama a>;
$virama e\u0306>$ce;
$virama o\u0306>$co;
$virama e>$se;
$virama o>$so;
# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
#$virama''aa>$waa;
$virama''a\u0304>$waa;
$virama''ai>$wai;
$virama''au>$wau;
#$virama''ii>$wii;
$virama''i\u0304>$wii;
$virama''i>$wi;
#$virama''uu>$wuu;
$virama''u\u0304>$wuu;
$virama''u>$wu;
#$virama''rrh>$wrr;
$virama''r\u0325\u0304>$wrr;
#$virama''rh>$wr;
$virama''r\u0325>$wr;
$virama''l\u0325\u0304>$wll;
#$virama''lh>$wl;
$virama''l\u0325>$wl;
$virama''e\u0304>$we;
$virama''o\u0304>$wo;
$virama''a>$wa;
$virama''e\u0306>$wce;
$virama''o\u0306>$wco;
$virama''e>$wse;
$virama''o>$wso;
# no virama
''a\u0304>$waa;
''ai>$wai;
''au>$wau;
''i\u0304>$wii;
''i>$wi;
''u\u0304>$wuu;
''u>$wu;
''r\u0325\u0304>$wrr;
''r\u0325>$wr;
''l\u0325\u0304>$wll;
''l\u0325>$wl;
''e\u0304>$we;
''o\u0304>$wo;
''a>$wa;
''e\u0306>$wce;
''o\u0306>$wco;
''e>$wse;
''o>$wso;
$virama } [$z] > $virama;
$virama } ' ' > $virama ;
$virama}$endThing>;
0>$zero;
1>$one;
2>$two;
3>$three;
4>$four;
5>$five;
6>$six;
7>$seven;
8>$eight;
9>$nine;
''>;
#:: NFC (NFD) ;