scuffed-code/icu4c/data/translit_Latin_InterIndic.txt
Alan Liu b720561095 ICU-1560 sync C rules with Java
X-SVN-Rev: 7224
2001-11-30 21:24:16 +00:00

398 lines
10 KiB
Plaintext

// -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Source: ../../text/resources/Transliterator_Latin_InterIndic.txt
// Date: Fri Nov 30 13:01:42 2001
//--------------------------------------------------------------------
// Latin_InterIndic
translit_Latin_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Latin-InterIndic
//:: NFD;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
"$om = \ue050;" // OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];"
"\u0315 > $avagraha;"
"\u0303>$chandrabindu$anusvara;"
"m\u0310>$chandrabindu;"
"h\u0323>$visarga;"
"x>$ka$virama$sa;"
// convert to independent forms at start of word or syllable:
// dependent forms for roundtrip
"\u0314a\u0304>$aa;"
"\u0314ai>$ai;"
"\u0314au>$au;"
"\u0314ii>$ii;"
"\u0314i\u0304>$ii;"
"\u0314i>$i;"
"\u0314u\u0304>$uu;"
"\u0314u>$u;"
"\u0314r\u0325\u0304>$rrh;"
"\u0314r\u0325>$rh;"
"\u0314l\u0325\u0304>$llh;"
"\u0314lh>$lh;"
"\u0314l\u0325>$lh;"
"\u0314e\u0304>$e;"
"\u0314o\u0304>$o;"
"\u0314a>;"
"\u0314e\u0306>$ce;"
"\u0314o\u0306>$co;"
"\u0314e>$se;"
"\u0314o>$so;"
// preceeded by consonants
"$consonants{ a\u0304>$aa;"
"$consonants{ ai>$ai;"
"$consonants{ au>$au;"
"$consonants{ ii>$ii;"
"$consonants{ i\u0304>$ii;"
"$consonants{ i>$i;"
"$consonants{ u\u0304>$uu;"
"$consonants{ u>$u;"
"$consonants{ r\u0325\u0304>$rrh;"
"$consonants{ r\u0325a>$rh;"
"$consonants{ r\u0325>$rh;"
"$consonants{ l\u0325\u0304>$llh;"
"$consonants{ lh>$lh;"
"$consonants{ l\u0325>$lh;"
"$consonants{ e\u0304>$e;"
"$consonants{ o\u0304>$o;"
"$consonants{ e\u0306>$ce;"
"$consonants{ o\u0306>$co;"
"$consonants{ e>$se;"
"$consonants{ o>$so;"
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
"a\u0304>$waa;"
"ai>$wai;"
"au>$wau;"
"i\u0304>$wii;"
"i>$wi;"
"u\u0304>$wuu;"
"u>$wu;"
"r\u0325\u0304>$wrr;"
"r\u0325>$wr;"
"l\u0325\u0304>$wll;"
"lh>$wl;"
"l\u0325>$wl;"
"e\u0304>$we;"
"o\u0304>$wo;"
"a>$wa;"
"e\u0306>$wce;"
"o\u0306>$wco;"
"e>$wse;"
"''om>$om;"
"o>$wso;"
// rules for anusvara
"n}r\u0325 > $na|$virama;"
"n}l\u0325 > $na|$virama;"
"n}na > $na|$virama;"
"n\u0307}[kg] > $anusvara;"
"n\u0307}n\u0307 > $anusvara;"
"n\u0304}[cj] > $anusvara;"
"n\u0304}n\u0303 > $anusvara;"
"n\u0323}[tdn]\u0323 > $anusvara;"
"n}[tdn] > $anusvara;"
"m}[pbm] > $anusvara;"
"n}[ylvshr] > $anusvara;"
"m\u0307 > $anusvara;"
//urdu compatibility
"q>$uka|$virama;"
"k\u0331h\u0331>$ukha |$virama;"
"g\u0307> $ugha | $virama;"
"z > $ujha |$virama;"
"f > $ufa|$virama;"
// dev
"y\u0307>$uya|$virama;"
"l\u0331>$ela|$virama;"
"n\u0331>$ena|$virama;"
"n\u0307>$nga|$virama;"
"n\u0303>$nya|$virama;"
"n\u0323>$nna|$virama;"
"t\u0323h>$ttha|$virama;"
"t\u0323>$tta|$virama;"
"r\u0323h>$udha|$virama;"
"r\u0323>$uddha|$virama;"
"d\u0323h>$ddha|$virama;"
"d\u0323>$dda|$virama;"
"kh>$kha|$virama;"
"k>$ka|$virama;"
"gh>$gha|$virama;"
"g>$ga|$virama;"
"ch>$cha|$virama;"
"c>$ca|$virama;"
"jh>$jha|$virama;"
"j>$ja|$virama;"
"ny>$nya|$virama;"
"tth>$ttha|$virama;"
"ddh>$ddha|$virama;"
"th>$tha|$virama;"
"t>$ta|$virama;"
"dh>$dha|$virama;"
"d>$da|$virama;"
"n>$na|$virama;"
"ph>$pha|$virama;"
"p>$pa|$virama;"
"bh>$bha|$virama;"
"b>$ba|$virama;"
"m>$ma|$virama;"
"y>$ya|$virama;"
"r\u0331>$rra|$virama;"
"r>$ra|$virama;"
"l\u0323>$lla|$virama;"
"l>$la|$virama;"
"v>$va|$virama;"
"w>$va|$virama;"
"sh>$sha|$virama;"
"ss>$ssa|$virama;"
"s\u0323>$ssa|$virama;"
"s\u0301>$sha|$virama;"
"s>$sa|$virama;"
"h>$ha|$virama;"
"'.'>$danda;"
"$danda'.'>$doubleDanda;"
"$depVowelAbove{'~'>$anusvara;"
"$depVowelBelow{'~'>$chandrabindu;"
// convert to dependent forms after consonant with no vowel:
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
//$virama aa>$aa;
"$virama a\u0304>$aa;"
"$virama ai>$ai;"
"$virama au>$au;"
"$virama ii>$ii;"
"$virama i\u0304>$ii;"
"$virama i>$i;"
//$virama uu>$uu;
"$virama u\u0304>$uu;"
"$virama u>$u;"
//$virama rrh>$rrh;
"$virama r\u0325\u0304>$rrh;"
//$virama rh>$rh;
"$virama r\u0325a>$rh;"
"$virama r\u0325>$rh;"
"$virama l\u0325\u0304>$llh;"
"$virama lh>$lh;"
"$virama l\u0325>$lh;"
"$virama e\u0304>$e;"
"$virama o\u0304>$o;"
"$virama a>;"
"$virama e\u0306>$ce;"
"$virama o\u0306>$co;"
"$virama e>$se;"
"$virama o>$so;"
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
//$virama''aa>$waa;
"$virama''a\u0304>$waa;"
"$virama''ai>$wai;"
"$virama''au>$wau;"
//$virama''ii>$wii;
"$virama''i\u0304>$wii;"
"$virama''i>$wi;"
//$virama''uu>$wuu;
"$virama''u\u0304>$wuu;"
"$virama''u>$wu;"
//$virama''rrh>$wrr;
"$virama''r\u0325\u0304>$wrr;"
//$virama''rh>$wr;
"$virama''r\u0325>$wr;"
"$virama''l\u0325\u0304>$wll;"
//$virama''lh>$wl;
"$virama''l\u0325>$wl;"
"$virama''e\u0304>$we;"
"$virama''o\u0304>$wo;"
"$virama''a>$wa;"
"$virama''e\u0306>$wce;"
"$virama''o\u0306>$wco;"
"$virama''e>$wse;"
"$virama''o>$wso;"
// no virama
"''a\u0304>$waa;"
"''ai>$wai;"
"''au>$wau;"
"''i\u0304>$wii;"
"''i>$wi;"
"''u\u0304>$wuu;"
"''u>$wu;"
"''r\u0325\u0304>$wrr;"
"''r\u0325>$wr;"
"''l\u0325\u0304>$wll;"
"''l\u0325>$wl;"
"''e\u0304>$we;"
"''o\u0304>$wo;"
"''a>$wa;"
"''e\u0306>$wce;"
"''o\u0306>$wco;"
"''e>$wse;"
"''o>$wso;"
"$virama } [$z] > $virama;"
"$virama } ' ' > $virama ;"
"$virama}$endThing>;"
"0>$zero;"
"1>$one;"
"2>$two;"
"3>$three;"
"4>$four;"
"5>$five;"
"6>$six;"
"7>$seven;"
"8>$eight;"
"9>$nine;"
"''>;"
//:: NFC (NFD) ;
}
}