scuffed-code/icu4c/source/data/translit/InterIndic_Latin.txt
2016-09-19 05:09:40 +00:00

496 lines
11 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
#
# File: InterIndic_Latin.txt
# Generated from CLDR
#
# InterIndic-Latin
#\u0E00 reserved
#consonants
$chandrabindu=\uE001;
$anusvara=\uE002;
$visarga=\uE003;
#\u0E004 reserved
# w←vowel→ represents the stand-alone form
$wa=\uE005;
$waa=\uE006;
$wi=\uE007;
$wii=\uE008;
$wu=\uE009;
$wuu=\uE00A;
$wr=\uE00B;
$wl=\uE00C;
$wce=\uE00D; # LETTER CANDRA E
$wse=\uE00E; # LETTER SHORT E
$we=\uE00F; # ए LETTER E
$wai=\uE010;
$wco=\uE011; # LETTER CANDRA O
$wso=\uE012; # LETTER SHORT O
$wo=\uE013; # ओ LETTER O
$wau=\uE014;
$ka=\uE015;
$kha=\uE016;
$ga=\uE017;
$gha=\uE018;
$nga=\uE019;
$ca=\uE01A;
$cha=\uE01B;
$ja=\uE01C;
$jha=\uE01D;
$nya=\uE01E;
$tta=\uE01F;
$ttha=\uE020;
$dda=\uE021;
$ddha=\uE022;
$nna=\uE023;
$ta=\uE024;
$tha=\uE025;
$da=\uE026;
$dha=\uE027;
$na=\uE028;
$ena=\uE029; #compatibility
$pa=\uE02A;
$pha=\uE02B;
$ba=\uE02C;
$bha=\uE02D;
$ma=\uE02E;
$ya=\uE02F;
$ra=\uE030;
$vva=\uE081;
$rra=\uE031;
$la=\uE032;
$lla=\uE033;
$ela=\uE034; #compatibility
$va=\uE035;
$sha=\uE036;
$ssa=\uE037;
$sa=\uE038;
$ha=\uE039;
#\u093A Reserved
#\u093B Reserved
$nukta=\uE03C;
$avagraha=\uE03D; # SIGN AVAGRAHA
# ←vowel→ represents the dependent form
$aa=\uE03E;
$i=\uE03F;
$ii=\uE040;
$u=\uE041;
$uu=\uE042;
$rh=\uE043;
$rrh=\uE044;
$ce=\uE045; #VOWEL SIGN CANDRA E
$se=\uE046; #VOWEL SIGN SHORT E
$e=\uE047;
$ai=\uE048;
$co=\uE049; # VOWEL SIGN CANDRA O
$so=\uE04A; # VOWEL SIGN SHORT O
$o=\uE04B; # ो
$au=\uE04C;
$virama=\uE04D;
# \u094E Reserved
# \u094F Reserved
$om=\uE050; # OM
\uE051→; # UNMAPPED STRESS SIGN UDATTA
\uE052→; # UNMAPPED STRESS SIGN ANUDATTA
\uE053→; # UNMAPPED GRAVE ACCENT
\uE054→; # UNMAPPED ACUTE ACCENT
$lm = \uE055;# Telugu Length Mark
$ailm=\uE056;# AI Length Mark
$aulm=\uE057;# AU Length Mark
#urdu compatibity forms
$uka=\uE058;
$ukha=\uE059;
$ugha=\uE05A;
$ujha=\uE05B;
$uddha=\uE05C;
$udha=\uE05D;
$ufa=\uE05E;
$uya=\uE05F;
$wrr=\uE060;
$wll=\uE061;
$lh=\uE062;
$llh=\uE063;
$danda=\uE064;
$doubleDanda=\uE065;
$zero=\uE066; # DIGIT ZERO
$one=\uE067; # DIGIT ONE
$two=\uE068; # DIGIT TWO
$three=\uE069; # DIGIT THREE
$four=\uE06A; # DIGIT FOUR
$five=\uE06B; # DIGIT FIVE
$six=\uE06C; # DIGIT SIX
$seven=\uE06D; # DIGIT SEVEN
$eight=\uE06E; # DIGIT EIGHT
$nine=\uE06F; # DIGIT NINE
# Glottal stop
$dgs=\uE082;
#Khanda-ta
$kta=\uE083;
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
$depVowelBelow=[\uE041-\uE044];
# $x was originally called '§'; $z was '%'
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$vowels=[aeiour\u0304\u0325\u0306];
$forceIndependentMatra = [^[[:L:][\u0300-\u034C]]];
######################################################################
# convert from Native letters to Latin letters
######################################################################
#transliterations for anusvara
$anusvara} [$ka$kha$ga$gha$nga] → n\u0307;
$anusvara} [$ca$cha$ja$jha$nya] → n\u0304;
$anusvara} [$tta$ttha$dda$ddha$nna] → n\u0323;
$anusvara} [$ta$tha$da$dha$na] → n;
$anusvara} [$pa$pha$ba$bha$ma] → m;
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
$anusvara→ m\u0307;
# Urdu compatibility
$ya$nukta}$x → y\u0307;
$ya$nukta$virama → y\u0307;
$ya$nukta → y\u0307a;
$la$nukta }$x → l\u0331;
$la$nukta$virama → l\u0331;
$la$nukta → l\u0331a;
$na$nukta }$x → n\u0331;
$na$nukta$virama → n\u0331;
$na$nukta → n\u0331a;
$ena }$x → n\u0331;
$ena$virama → n\u0331;
$ena → n\u0331a;
$uka → qa;
$ka$nukta }$x → q;
$ka$nukta$virama → q;
$ka$nukta → qa;
$kha$nukta }$x → k\u0331h\u0331;
$kha$nukta$virama → k\u0331h\u0331;
$kha$nukta → k\u0331h\u0331a;
$ukha$virama → k\u0331h\u0331;
$ukha → k\u0331h\u0331a;
$ugha → g\u0307a;
$ga$nukta }$x → g\u0307;
$ga$nukta$virama → g\u0307;
$ga$nukta → g\u0307a;
$ujha → za;
$ja$nukta }$x → z;
$ja$nukta$virama → z;
$ja$nukta → za;
$ddha$nukta}$x → r\u0323h;
$ddha$nukta$virama → r\u0323h;
$ddha$nukta → r\u0323ha;
$uddha}$x → r\u0323;
$uddha$virama → r\u0323;
$uddha → r\u0323a;
$udha → r\u0323a;
$dda$nukta}$x → r\u0323;
$dda$nukta$virama → r\u0323;
$dda$nukta → r\u0323a;
$pha$nukta }$x → f;
$pha$nukta$virama → f;
$pha$nukta → fa;
$ufa }$x → f;
$ufa$virama → f;
$ufa → fa;
$ra$nukta}$x → r\u0331;
$ra$nukta$virama → r\u0331;
$ra$nukta → r\u0331a;
$lla$nukta}$x → l\u0331;
$lla$nukta$virama → l\u0331;
$lla$nukta → l\u0331a;
$ela}$x → l\u0331;
$ela$virama → l\u0331;
$ela → l\u0331a;
$uya}$x → y\u0307;
$uya$virama → y\u0307;
$uya → y\u0307a;
# normal consonants
$ka$virama}$ha→k'';
$ka}$x→k;
$ka$virama→k;
$ka→ka;
$kha}$x→kh;
$kha$virama→kh;
$kha→kha;
$ga$virama}$ha→g'';
$ga}$x→g;
$ga$virama→g;
$ga→ga;
$gha}$x→gh;
$gha$virama→gh;
$gha→gha;
$nga}$x→n\u0307;
$nga$virama→n\u0307;
$nga→n\u0307a;
$ca$virama}$ha→c'';
$ca}$x→c;
$ca$virama→c;
$ca→ca;
$cha}$x→ch;
$cha$virama→ch;
$cha→cha;
$ja$virama}$ha→j'';
$ja}$x→j;
$ja$virama→j;
$ja→ja;
$jha}$x→jh;
$jha$virama→jh;
$jha→jha;
$nya }$x→n\u0303;
$nya$virama→n\u0303;
$nya → n\u0303a;
$tta$virama}$ha→t\u0323'';
$tta}$x→t\u0323;
$tta$virama→t\u0323;
$tta→t\u0323a;
$ttha}$x→t\u0323h;
$ttha$virama→t\u0323h;
$ttha→t\u0323ha;
$dda}$x$ha→d\u0323'';
$dda}$x→d\u0323;
$dda$virama→d\u0323;
$dda→d\u0323a;
$ddha}$x→d\u0323h;
$ddha$virama→d\u0323h;
$ddha→d\u0323ha;
$nna}$x→n\u0323;
$nna$virama→n\u0323;
$nna→n\u0323a;
$ta$virama}$ha→t'';
$ta$virama}$ttha→t'';
$ta$virama}$tta→t'';
$ta$virama}$tha→t'';
$ta}$x→t;
$ta$virama→t;
$ta→ta;
$tha}$x→th;
$tha$virama→th;
$tha→tha;
$da$virama}$ha→d'';
$da$virama}$ddha→d'';
$da$virama}$dda→d'';
$da$virama}$dha→d'';
$da}$x→d;
$da$virama→d;
$da→da;
$dha}$x→dh;
$dha$virama→dh;
$dha→dha;
$na$virama}$ga→n'';
$na$virama}$ya→n'';
$na}$x→n;
$na$virama→n;
$na→na;
$pa$virama}$ha→p'';
$pa}$x→p;
$pa$virama→p;
$pa→pa;
$pha}$x→ph;
$pha$virama→ph;
$pha→pha;
$ba$virama}$ha→b'';
$ba}$x→b;
$ba$virama→b;
$ba→ba;
$bha}$x→bh;
$bha$virama→bh;
$bha→bha;
$ma$virama}$ma→m'';
$ma}$x→m;
$ma$virama→m;
$ma→ma;
$ya}$x→y;
$ya$virama→y;
$ya→ya;
$ra$virama}$ha→r'';
$ra}$x→r;
$ra$virama→r;
$ra→ra;
$vva$virama}$ha→w\u0307'';
$vva}$x→w\u0307;
$vva$virama→w\u0307;
$vva→w\u0307a;
$rra$virama}$ha→r\u0331'';
$rra}$x→r\u0331;
$rra$virama→r\u0331;
$rra→r\u0331a;
$la$virama}$ha→l'';
$la}$x→l;
$la$virama→l;
$la→la;
$lla$virama}$ha→l\u0323'';
$lla}$x→l\u0323;
$lla$virama→l\u0323;
$lla→l\u0323a;
$va}$x→v;
$va$virama→v;
$va→va;
$sa$virama}$ha→s'';
$sa$virama}$sha→s'';
$sa$virama}$ssa→s'';
$sa$virama}$sa→s'';
$sa}$x→s;
$sa$virama→s;
#for gurmukhi
$sa$nukta}$x→s\u0301;
$sa$nukta$virama→s\u0301;
$sa$nukta→s\u0301a;
$sa→sa;
$sha}$x→s\u0301;
$sha$virama→s\u0301;
$sha→s\u0301a;
$ssa}$x→s\u0323;
$ssa$virama→s\u0323;
$ssa→s\u0323a;
$ha}$x→h;
$ha$virama→h;
$ha→ha;
# dependent vowels (should never occur except following consonants)
$forceIndependentMatra{$aa → \u0314a\u0304;
$forceIndependentMatra{$ai → \u0314ai;
$forceIndependentMatra{$au → \u0314au;
$forceIndependentMatra{$ii → \u0314i\u0304;
$forceIndependentMatra{$i → \u0314i;
$forceIndependentMatra{$uu → \u0314u\u0304;
$forceIndependentMatra{$u → \u0314u;
$forceIndependentMatra{$rrh → \u0314r\u0325\u0304;
$forceIndependentMatra{$rh → \u0314r\u0325;
$forceIndependentMatra{$llh → \u0314l\u0325\u0304;
$forceIndependentMatra{$lh → \u0314l\u0325;
$forceIndependentMatra{$e → \u0314e\u0304;
$forceIndependentMatra{$o → \u0314o\u0304;
#extra vowels
$forceIndependentMatra{$ce → \u0314e\u0306;
$forceIndependentMatra{$co → \u0314o\u0306;
$forceIndependentMatra{$se → \u0314e;
$forceIndependentMatra{$so → \u0314o;
$forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
$forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
$aa → a\u0304;
$ai → ai;
$au → au;
$ii → i\u0304;
$i → i;
$uu → u\u0304;
$u → u;
$rrh → r\u0325\u0304;
$rh → r\u0325;
$llh → l\u0325\u0304;
$lh → l\u0325;
$e → e\u0304;
$o → o\u0304;
#extra vowels
$ce → e\u0306;
$co → o\u0306;
$se → e;
$so → o;
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
$waa} $x → a\u0304\u0314;
$wai} $x → ai\u0314;
$wau} $x → au\u0314;
$wii} $x → i\u0304\u0314;
$wi } $x → i\u0314;
$wuu} $x → u\u0304\u0314;
$wu } $x → u\u0314;
$wrr} $x → r\u0325\u0304\u0314;
$wr } $x → r\u0325\u0314;
$wll} $x → l\u0325\u0304\u0314;
$wl } $x → l\u0325\u0314;
$we } $x → e\u0304\u0314;
$wo } $x → o\u0304\u0314;
$wa } $x → a\u0314;
#extra vowels
$wce} $x → e\u0306\u0314;
$wco} $x → o\u0306\u0314;
$wse} $x → e\u0314;
$wso} $x → o\u0314;
$om} $x → ''om\u0314;
# independent vowels when preceeded by vowels
$vowels{$waa → ''a\u0304;
$vowels{$wai → ''ai;
$vowels{$wau → ''au;
$vowels{$wii → ''i\u0304;
$vowels{$wi → ''i;
$vowels{$wuu → ''u\u0304;
$vowels{$wu → ''u;
$vowels{$wrr → ''r\u0325\u0304;
$vowels{$wr → ''r\u0325;
$vowels{$wll → ''l\u0325\u0304;
$vowels{$wl → ''l\u0325;
$vowels{$we → ''e\u0304;
$vowels{$wo → ''o\u0304;
$vowels{$wa → ''a;
#extra vowels
$vowels{$wce → ''e\u0306;
$vowels{$wco → ''o\u0306;
$vowels{$wse → ''e;
$vowels{$wso → ''o;
# independent vowels (otherwise)
$waa → a\u0304;
$wai → ai;
$wau → au;
$wii → i\u0304;
$wi → i;
$wuu → u\u0304;
$wu → u;
$wrr → r\u0325\u0304;
$wr → r\u0325;
$wll → l\u0325\u0304;
$wl → l\u0325;
$we → e\u0304;
$wo → o\u0304;
$wa → a;
#extra vowels
$wce → e\u0306;
$wco → o\u0306;
$wse → e;
$wso → o;
$om → ''om;
#stress marks
$avagraha → \u0315;
$chandrabindu$anusvara→\u0303;
$chandrabindu → m\u0310;
$visarga→h\u0323;
#numbers
$zero → 0;
$one → 1;
$two → 2;
$three → 3;
$four → 4;
$five → 5;
$six → 6;
$seven → 7;
$eight → 8;
$nine → 9;
$lm →;
$ailm →;
$aulm →;
$dgs→ʔ;
$kta→t\u0331;
$danda→'.';
$doubleDanda→'.';
\uE070→; # ABBREVIATION SIGN
# LETTER RA WITH MIDDLE DIAGONAL
\uE071}$x→ra;
\uE071$virama→r;
\uE071→ra;
# LETTER RA WITH LOWER DIAGONAL
\uE072}$x→ra;
\uE072$virama→r;
\uE072→ra;
\uE073→; # RUPEE MARK
\uE074→; # RUPEE SIGN
\uE075→; # CURRENCY NUMERATOR ONE
\uE076→; # CURRENCY NUMERATOR TWO
\uE077→; # CURRENCY NUMERATOR THREE
\uE078→; # CURRENCY NUMERATOR FOUR
\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\uE07A→; # CURRENCY DENOMINATOR SIXTEEN
\uE07B→; # ISSHAR
\uE07C→; # TIPPI
\uE07D→; # ADDAK
\uE07E→; # IRI
\uE07F→; # URA
\uE080→; # EK ONKAR
\uE004→; # DEVANAGARI VOWEL SIGN SHORT A