scuffed-code/icu4c/source/data/translit/t_InterIndic_Latn.txt
2001-11-13 09:09:14 +00:00

390 lines
10 KiB
Plaintext

// -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Latin.txt
// Date: Tue Nov 13 00:51:08 2001
//--------------------------------------------------------------------
// InterIndic_Latin
translit_InterIndic_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// InterIndic-Latin
// :: NFD (NFC) ;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
"$om=\ue050;" // OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$consonants=[$ka-$ha $virama];"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
//transliterations for anusvara
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
"$anusvara} [$ta$tha$da$dha$na] > n ;"
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
"$anusvara>'-'m\u0307;"
// normal consonants
"$cha}$x>ch;"
"$cha>cha;"
"$ca$virama}$ha>c'';"
"$ca}$x>c;"
"$ca>ca;"
"$jha}$x>jh;"
"$jha>jha;"
"$ja$virama}$ha>j'';"
"$ja}$x>j;"
"$ja>ja;"
//$nya}$x>ny;
//$nya>nya;
"$nya }$x>n\u0303 ;"
"$nya > n\u0303a ;"
"$ttha}$x>t\u0323h;"
"$tta$virama}$ha>t\u0323'';"
"$tta}$x>t\u0323;"
"$ddha}$x>d\u0323h;"
"$dda}$x$ha>d\u0323'';"
"$dda}$x>d\u0323;"
"$dha}$x>dh;"
"$da$virama}$ha>d'';"
"$da$virama}$ddha>d'';"
"$da$virama}$dda>d'';"
"$da$virama}$dha>d'';"
//$da$virama}$da>dda;
"$da}$x>d;"
"$tha}$x>th;"
"$ta$virama}$ha>t'';"
"$ta$virama}$ttha>t'';"
"$ta$virama}$tta>t'';"
"$ta$virama}$tha>t'';"
"$tta>t\u0323a;"
"$ttha>t\u0323ha;"
//$ta$virama}$ta>tta;
"$ta}$x>t;"
"$tha>tha;"
"$ta>ta;"
"$dda>d\u0323a;"
"$dha>dha;"
"$ddha>d\u0323ha;"
"$da>da;"
"$nna}$x>n\u0323 ;"
"$nna>n\u0323a ;"
"$na$virama}$ga>n'';"
"$na$virama}$ya>n'';"
"$na}$x>n;"
"$na>na;"
"$kha}$x>kh;"
"$kha>kha;"
"$ka$virama}$ha>k'';"
"$ka}$x>k;"
"$ka>ka;"
"$gha}$x>gh;"
"$gha>gha;"
"$ga$virama}$ha>g'';"
"$ga}$x>g;"
"$ga>ga;"
//ng<$nga}$x;
//nga<$nga;
"$nga}$x>n\u0307;"
"$nga>n\u0307a ;"
"$pha}$x>ph;"
"$pha>pha;"
"$pa$virama}$ha>p'';"
"$pa}$x>p;"
"$pa>pa;"
"$bha}$x>bh;"
"$bha>bha;"
"$ba$virama}$ha>b'';"
"$ba}$x>b;"
"$ba>ba;"
"$ma$virama}$ma>m'';"
//$ma$virama}$anusvara>m'';
"$ma}$x>m;"
"$ma>ma;"
"$ya}$x>y;"
"$ya>ya;"
"$ra$virama}$ha>r'';"
"$ra}$x>r;"
"$ra>ra;"
"$rra$virama}$ha>r\u0331'';"
"$rra}$x>r\u0331;"
"$rra>r\u0331a;"
"$la$virama}$ha>l'';"
"$la}$x>l;"
"$la>la;"
"$lla$virama}$ha>l\u0323'';"
"$lla}$x>l\u0323;"
"$lla>l\u0323a;"
"$va}$x>v;"
"$va>va;"
"$sha}$x>s\u0301;"
"$ssa}$x>s\u0323;"
"$sa$virama}$ha>s'';"
"$sa$virama}$sha>s'';"
"$sa$virama}$ssa>s'';"
"$sa$virama}$sa>s'';"
"$sa}$x>s;"
"$sha>s\u0301a;"
"$ssa>s\u0323a;"
"$sa>sa;"
"$ha}$x>h;"
"$ha>ha;"
// Urdu compatibility
"$uya}$x > y\u0307 ;"
"$uya > y\u0307a ;"
"$ela}$x > l\u0331 ;"
"$ela > l\u0331a ;"
"$ena}$x > n\u0331 ;"
"$ena > n\u0331a ;"
"$uka}$x > q ;"
"$uka > qa ;"
"$ukha}$x > k\u0331h\u0331 ;"
"$ukha > k\u0331h\u0331a ;"
"$ugha}$x > g\u0307 ;"
"$ugha > g\u0307a ;"
"$ujha}$x > z ;"
"$ujha > za ;"
"$udha}$x > r\u0323h ;"
"$udha > r\u0323ha;"
"$uddha}$x> r\u0323 ;"
"$uddha > r\u0323a ;"
"$ufa}$x > f ;"
"$ufa > fa ;"
// dependent vowels (should never occur except following consonants)
"$aa > a\u0304 ;"
"$ai > ai ;"
"$au > au ;"
"$ii > i\u0304 ;"
"$i > i ;"
"$uu > u\u0304 ;"
"$u > u ;"
"$rrh > r\u0325\u0304 ;"
"$rh}$consonants>r\u0325;"
"$rh > r\u0325a ;"
"$llh > l\u0325\u0304 ;"
"$lh > l\u0325 ;"
"$e > e\u0304 ;"
"$o > o\u0304 ;"
//extra vowels
"$ce > e\u0306 ;"
"$co > o\u0306 ;"
"$se > e ;"
"$so > o ;"
// independent vowels (when following consonants)
"a{$waa > ''a\u0304 ;"
"$z{$waa > ''a\u0304 ;"
"a{$wai > ''ai ;"
"$z{$wai > ''ai ;"
"a{$wau > ''au ;"
"$z{$wau > ''au ;"
"a{$wii > ''i\u0304 ;"
"$z{$wii > ''i\u0304 ;"
"a{$wi > ''i ;"
"$z{$wi > ''i ;"
"a{$wuu > ''u\u0304 ;"
"$z{$wuu > ''u\u0304 ;"
"a{$wu > ''u ;"
"$z{$wu > ''u ;"
"$z{$wrr > ''r\u0325\u0304 ;"
"$z{$wr > ''r\u0325 ;"
"$z{$wll > ''l\u0325\u0304 ;"
"$z{$wl > ''l\u0325 ;"
"$z{$we > ''e\u0304 ;"
"$z{$wo > ''o\u0304 ;"
"a{$wa > ''a ;"
"$z{$wa > ''a ;"
//extra vowels
"$z{$wce > ''e\u0306 ;"
"$z{$wco > ''o\u0306 ;"
"$z{$wse > ''e ;"
"$z{$wso > ''o ;"
// independent vowels (otherwise)
"$waa > a\u0304 ;"
"$wai > ai ;"
"$wau > au ;"
"$wii > i\u0304 ;"
"$wi > i ;"
"$wuu > u\u0304 ;"
"$wu > u ;"
"$wrr > r\u0325\u0304 ;"
"$wr > r\u0325 ;"
"$wll > l\u0325\u0304 ;"
"$wl > l\u0325 ;"
"$we > e\u0304 ;"
"$wo > o\u0304 ;"
"$wa > a ;"
//extra vowels
"$wce > e\u0306 ;"
"$wco > o\u0306 ;"
"$wse > e ;"
"$wso > o ;"
"$om > ''om ;"
//stress marks
"$avagraha > \u0315;"
"$chandrabindu$anusvara>'-'\u0303;"
"$chandrabindu > '-'m\u0310;"
"$visarga>'-'h\u0323;"
//numbers
"$zero > 0;"
"$one > 1;"
"$two > 2;"
"$three > 3;"
"$four > 4;"
"$five > 5;"
"$six > 6;"
"$seven > 7;"
"$eight > 8;"
"$nine > 9;"
// blow away any remaining viramas
"$virama>;"
// :: NFC;
}
}