scuffed-code/icu4c/source/data/translit/InterIndic_Latin.txt
2004-08-02 20:06:55 +00:00

529 lines
14 KiB
Plaintext

#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Latin
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$vva=\ue081;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
$om=\ue050; # OM
\ue051>; # UNMAPPED STRESS SIGN UDATTA
\ue052>; # UNMAPPED STRESS SIGN ANUDATTA
\ue053>; # UNMAPPED GRAVE ACCENT
\ue054>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
# $x was originally called '&'; $z was '%'
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$vowels=[aeiour\u0304\u0325\u0306];
$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];
######################################################################
# convert from Native letters to Latin letters
######################################################################
#transliterations for anusvara
$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
$anusvara} [$ta$tha$da$dha$na] > n ;
$anusvara} [$pa$pha$ba$bha$ma] > m ;
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
$anusvara> m\u0307;
# Urdu compatibility
$ya$nukta}$x > y\u0307 ;
$ya$nukta$virama > y\u0307 ;
$ya$nukta > y\u0307a ;
$la$nukta }$x > l\u0331 ;
$la$nukta$virama > l\u0331 ;
$la$nukta > l\u0331a ;
$na$nukta }$x > n\u0331 ;
$na$nukta$virama > n\u0331 ;
$na$nukta > n\u0331a ;
$ena }$x > n\u0331 ;
$ena$virama > n\u0331 ;
$ena > n\u0331a ;
$uka > qa ;
$ka$nukta }$x > q ;
$ka$nukta$virama > q ;
$ka$nukta > qa ;
$kha$nukta }$x > k\u0331h\u0331 ;
$kha$nukta$virama > k\u0331h\u0331 ;
$kha$nukta > k\u0331h\u0331a ;
$ukha$virama > k\u0331h\u0331;
$ukha > k\u0331h\u0331a;
$ugha > g\u0307a ;
$ga$nukta }$x > g\u0307 ;
$ga$nukta$virama > g\u0307 ;
$ga$nukta > g\u0307a ;
$ujha > za ;
$ja$nukta }$x > z ;
$ja$nukta$virama > z ;
$ja$nukta > za ;
$ddha$nukta}$x > r\u0323h ;
$ddha$nukta$virama > r\u0323h ;
$ddha$nukta > r\u0323ha;
$uddha}$x > r\u0323 ;
$uddha$virama > r\u0323 ;
$uddha > r\u0323a;
$udha > r\u0323a ;
$dda$nukta}$x > r\u0323 ;
$dda$nukta$virama > r\u0323 ;
$dda$nukta > r\u0323a ;
$pha$nukta }$x > f ;
$pha$nukta$virama > f ;
$pha$nukta > fa ;
$ufa }$x > f ;
$ufa$virama > f ;
$ufa > fa ;
$ra$nukta}$x > r\u0331;
$ra$nukta$virama > r\u0331;
$ra$nukta > r\u0331a;
$lla$nukta}$x > l\u0331;
$lla$nukta$virama > l\u0331;
$lla$nukta > l\u0331a;
$ela}$x > l\u0331;
$ela$virama > l\u0331;
$ela > l\u0331a;
$uya}$x > y\u0307;
$uya$virama > y\u0307;
$uya > y\u0307a;
# normal consonants
$ka$virama}$ha>k'';
$ka}$x>k;
$ka$virama>k;
$ka>ka;
$kha}$x>kh;
$kha$virama>kh;
$kha>kha;
$ga$virama}$ha>g'';
$ga}$x>g;
$ga$virama>g;
$ga>ga;
$gha}$x>gh;
$gha$virama>gh;
$gha>gha;
$nga}$x>n\u0307;
$nga$virama>n\u0307;
$nga>n\u0307a ;
$ca$virama}$ha>c'';
$ca}$x>c;
$ca$virama>c;
$ca>ca;
$cha}$x>ch;
$cha$virama>ch;
$cha>cha;
$ja$virama}$ha>j'';
$ja}$x>j;
$ja$virama>j;
$ja>ja;
$jha}$x>jh;
$jha$virama>jh;
$jha>jha;
$nya }$x>n\u0303 ;
$nya$virama>n\u0303;
$nya > n\u0303a ;
$tta$virama}$ha>t\u0323'';
$tta}$x>t\u0323;
$tta$virama>t\u0323;
$tta>t\u0323a;
$ttha}$x>t\u0323h;
$ttha$virama>t\u0323h;
$ttha>t\u0323ha;
$dda}$x$ha>d\u0323'';
$dda}$x>d\u0323;
$dda$virama>d\u0323;
$dda>d\u0323a;
$ddha}$x>d\u0323h;
$ddha$virama>d\u0323h;
$ddha>d\u0323ha;
$nna}$x>n\u0323 ;
$nna$virama>n\u0323;
$nna>n\u0323a ;
$ta$virama}$ha>t'';
$ta$virama}$ttha>t'';
$ta$virama}$tta>t'';
$ta$virama}$tha>t'';
$ta}$x>t;
$ta$virama>t;
$ta>ta;
$tha}$x>th;
$tha$virama>th;
$tha>tha;
$da$virama}$ha>d'';
$da$virama}$ddha>d'';
$da$virama}$dda>d'';
$da$virama}$dha>d'';
$da}$x>d;
$da$virama>d;
$da>da;
$dha}$x>dh;
$dha$virama>dh;
$dha>dha;
$na$virama}$ga>n'';
$na$virama}$ya>n'';
$na}$x>n;
$na$virama>n;
$na>na;
$pa$virama}$ha>p'';
$pa}$x>p;
$pa$virama>p;
$pa>pa;
$pha}$x>ph;
$pha$virama>ph;
$pha>pha;
$ba$virama}$ha>b'';
$ba}$x>b;
$ba$virama>b;
$ba>ba;
$bha}$x>bh;
$bha$virama>bh;
$bha>bha;
$ma$virama}$ma>m'';
$ma}$x>m;
$ma$virama>m;
$ma>ma;
$ya}$x>y;
$ya$virama>y;
$ya>ya;
$ra$virama}$ha>r'';
$ra}$x>r;
$ra$virama>r;
$ra>ra;
$vva$virama}$ha>w\u0307'';
$vva}$x>w\u0307;
$vva$virama>w\u0307;
$vva>w\u0307a;
$rra$virama}$ha>r\u0331'';
$rra}$x>r\u0331;
$rra$virama>r\u0331;
$rra>r\u0331a;
$la$virama}$ha>l'';
$la}$x>l;
$la$virama>l;
$la>la;
$lla$virama}$ha>l\u0323'';
$lla}$x>l\u0323;
$lla$virama>l\u0323;
$lla>l\u0323a;
$va}$x>v;
$va$virama>v;
$va>va;
$sa$virama}$ha>s'';
$sa$virama}$sha>s'';
$sa$virama}$ssa>s'';
$sa$virama}$sa>s'';
$sa}$x>s;
$sa$virama>s;
#for gurmukhi
$sa$nukta}$x>s\u0301;
$sa$nukta$virama>s\u0301;
$sa$nukta>s\u0301a;
$sa>sa;
$sha}$x>s\u0301;
$sha$virama>s\u0301;
$sha>s\u0301a;
$ssa}$x>s\u0323;
$ssa$virama>s\u0323;
$ssa>s\u0323a;
$ha}$x>h;
$ha$virama>h;
$ha>ha;
# dependent vowels (should never occur except following consonants)
$forceIndependentMatra{$aa > \u0314a\u0304 ;
$forceIndependentMatra{$ai > \u0314ai ;
$forceIndependentMatra{$au > \u0314au ;
$forceIndependentMatra{$ii > \u0314i\u0304 ;
$forceIndependentMatra{$i > \u0314i ;
$forceIndependentMatra{$uu > \u0314u\u0304 ;
$forceIndependentMatra{$u > \u0314u ;
$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;
$forceIndependentMatra{$rh > \u0314r\u0325 ;
$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;
$forceIndependentMatra{$lh > \u0314l\u0325 ;
$forceIndependentMatra{$e > \u0314e\u0304 ;
$forceIndependentMatra{$o > \u0314o\u0304 ;
#extra vowels
$forceIndependentMatra{$ce > \u0314e\u0306 ;
$forceIndependentMatra{$co > \u0314o\u0306 ;
$forceIndependentMatra{$se > \u0314e ;
$forceIndependentMatra{$so > \u0314o ;
$forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character
$forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character
$aa > a\u0304 ;
$ai > ai ;
$au > au ;
$ii > i\u0304 ;
$i > i ;
$uu > u\u0304 ;
$u > u ;
$rrh > r\u0325\u0304 ;
$rh > r\u0325 ;
$llh > l\u0325\u0304 ;
$lh > l\u0325 ;
$e > e\u0304 ;
$o > o\u0304 ;
#extra vowels
$ce > e\u0306 ;
$co > o\u0306 ;
$se > e ;
$so > o ;
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
$waa} $x > a\u0304\u0314 ;
$wai} $x > ai\u0314 ;
$wau} $x > au\u0314 ;
$wii} $x > i\u0304\u0314 ;
$wi } $x > i\u0314 ;
$wuu} $x > u\u0304\u0314 ;
$wu } $x > u\u0314 ;
$wrr} $x > r\u0325\u0304\u0314 ;
$wr } $x > r\u0325\u0314 ;
$wll} $x > l\u0325\u0304\u0314 ;
$wl } $x > l\u0325\u0314 ;
$we } $x > e\u0304\u0314 ;
$wo } $x > o\u0304\u0314 ;
$wa } $x > a\u0314 ;
#extra vowels
$wce} $x > e\u0306\u0314 ;
$wco} $x > o\u0306\u0314 ;
$wse} $x > e\u0314 ;
$wso} $x > o\u0314 ;
$om} $x > ''om\u0314 ;
# independent vowels when preceeded by vowels
$vowels{$waa > ''a\u0304 ;
$vowels{$wai > ''ai ;
$vowels{$wau > ''au ;
$vowels{$wii > ''i\u0304 ;
$vowels{$wi > ''i ;
$vowels{$wuu > ''u\u0304 ;
$vowels{$wu > ''u ;
$vowels{$wrr > ''r\u0325\u0304 ;
$vowels{$wr > ''r\u0325 ;
$vowels{$wll > ''l\u0325\u0304 ;
$vowels{$wl > ''l\u0325 ;
$vowels{$we > ''e\u0304 ;
$vowels{$wo > ''o\u0304 ;
$vowels{$wa > ''a ;
#extra vowels
$vowels{$wce > ''e\u0306 ;
$vowels{$wco > ''o\u0306 ;
$vowels{$wse > ''e ;
$vowels{$wso > ''o ;
# independent vowels (otherwise)
$waa > a\u0304 ;
$wai > ai ;
$wau > au ;
$wii > i\u0304 ;
$wi > i ;
$wuu > u\u0304 ;
$wu > u ;
$wrr > r\u0325\u0304 ;
$wr > r\u0325 ;
$wll > l\u0325\u0304 ;
$wl > l\u0325 ;
$we > e\u0304 ;
$wo > o\u0304 ;
$wa > a ;
#extra vowels
$wce > e\u0306 ;
$wco > o\u0306 ;
$wse > e ;
$wso > o ;
$om > ''om ;
#stress marks
$avagraha > \u0315;
$chandrabindu$anusvara>\u0303;
$chandrabindu > m\u0310;
$visarga>h\u0323;
#numbers
$zero > 0;
$one > 1;
$two > 2;
$three > 3;
$four > 4;
$five > 5;
$six > 6;
$seven > 7;
$eight > 8;
$nine > 9;
$lm >;
$ailm >;
$aulm >;
$danda>'.';
$doubleDanda>'.';
\ue070>; # ABBREVIATION SIGN
# LETTER RA WITH MIDDLE DIAGONAL
\ue071}$x>ra;
\ue071$virama>r;
\ue071>ra;
# LETTER RA WITH LOWER DIAGONAL
\ue072}$x>ra;
\ue072$virama>r;
\ue072>ra;
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE004>; # DEVANAGARI VOWEL SIGN SHORT A