b720561095
X-SVN-Rev: 7224
398 lines
10 KiB
Plaintext
398 lines
10 KiB
Plaintext
// -*- Coding: utf-8; -*-
|
|
//--------------------------------------------------------------------
|
|
// Copyright (c) 1999-2001, International Business Machines
|
|
// Corporation and others. All Rights Reserved.
|
|
//--------------------------------------------------------------------
|
|
// THIS IS A MACHINE-GENERATED FILE
|
|
// Tool: dumpicurules.bat
|
|
// Source: ../../text/resources/Transliterator_Latin_InterIndic.txt
|
|
// Date: Fri Nov 30 13:01:42 2001
|
|
//--------------------------------------------------------------------
|
|
|
|
// Latin_InterIndic
|
|
|
|
translit_Latin_InterIndic {
|
|
Rule {
|
|
//--------------------------------------------------------------------
|
|
// Copyright (c) 2001-2004, International Business Machines
|
|
// Corporation and others. All Rights Reserved.
|
|
//--------------------------------------------------------------------
|
|
// Latin-InterIndic
|
|
//:: NFD;
|
|
//\u0e00 reserved
|
|
//consonants
|
|
"$chandrabindu=\ue001;"
|
|
"$anusvara=\ue002;"
|
|
"$visarga=\ue003;"
|
|
//\u0e004 reserved
|
|
// w<vowel> represents the stand-alone form
|
|
"$wa=\ue005;"
|
|
"$waa=\ue006;"
|
|
"$wi=\ue007;"
|
|
"$wii=\ue008;"
|
|
"$wu=\ue009;"
|
|
"$wuu=\ue00a;"
|
|
"$wr=\ue00b;"
|
|
"$wl=\ue00c;"
|
|
"$wce=\ue00d;" // LETTER CANDRA E
|
|
"$wse=\ue00e;" // LETTER SHORT E
|
|
"$we=\ue00f;" // \u090f LETTER E
|
|
"$wai=\ue010;"
|
|
"$wco=\ue011;" // LETTER CANDRA O
|
|
"$wso=\ue012;" // LETTER SHORT O
|
|
"$wo=\ue013;" // \u0913 LETTER O
|
|
"$wau=\ue014;"
|
|
"$ka=\ue015;"
|
|
"$kha=\ue016;"
|
|
"$ga=\ue017;"
|
|
"$gha=\ue018;"
|
|
"$nga=\ue019;"
|
|
"$ca=\ue01a;"
|
|
"$cha=\ue01b;"
|
|
"$ja=\ue01c;"
|
|
"$jha=\ue01d;"
|
|
"$nya=\ue01e;"
|
|
"$tta=\ue01f;"
|
|
"$ttha=\ue020;"
|
|
"$dda=\ue021;"
|
|
"$ddha=\ue022;"
|
|
"$nna=\ue023;"
|
|
"$ta=\ue024;"
|
|
"$tha=\ue025;"
|
|
"$da=\ue026;"
|
|
"$dha=\ue027;"
|
|
"$na=\ue028;"
|
|
"$ena=\ue029;" //compatibility
|
|
"$pa=\ue02a;"
|
|
"$pha=\ue02b;"
|
|
"$ba=\ue02c;"
|
|
"$bha=\ue02d;"
|
|
"$ma=\ue02e;"
|
|
"$ya=\ue02f;"
|
|
"$ra=\ue030;"
|
|
"$rra=\ue031;"
|
|
"$la=\ue032;"
|
|
"$lla=\ue033;"
|
|
"$ela=\ue034;" //compatibility
|
|
"$va=\ue035;"
|
|
"$sha=\ue036;"
|
|
"$ssa=\ue037;"
|
|
"$sa=\ue038;"
|
|
"$ha=\ue039;"
|
|
//\u093a Reserved
|
|
//\u093b Reserved
|
|
"$nukta=\ue03c;"
|
|
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
|
// <vowel> represents the dependent form
|
|
"$aa=\ue03e;"
|
|
"$i=\ue03f;"
|
|
"$ii=\ue040;"
|
|
"$u=\ue041;"
|
|
"$uu=\ue042;"
|
|
"$rh=\ue043;"
|
|
"$lh=\ue044;"
|
|
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
|
"$se=\ue046;" //VOWEL SIGN SHORT E
|
|
"$e=\ue047;"
|
|
"$ai=\ue048;"
|
|
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
|
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
|
"$o=\ue04b;" // \u094b
|
|
"$au=\ue04c;"
|
|
"$virama=\ue04d;"
|
|
// \u094e Reserved
|
|
// \u094f Reserved
|
|
"$om = \ue050;" // OM
|
|
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
|
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
|
// \u0953>; # UNMAPPED GRAVE ACCENT
|
|
// \u0954>; # UNMAPPED ACUTE ACCENT
|
|
"$lm = \ue055;"// Telugu Length Mark
|
|
"$ailm=\ue056;"// AI Length Mark
|
|
"$aulm=\ue057;"// AU Length Mark
|
|
//urdu compatibity forms
|
|
"$uka=\ue058;"
|
|
"$ukha=\ue059;"
|
|
"$ugha=\ue05a;"
|
|
"$ujha=\ue05b;"
|
|
"$uddha=\ue05c;"
|
|
"$udha=\ue05d;"
|
|
"$ufa=\ue05e;"
|
|
"$uya=\ue05f;"
|
|
"$wrr=\ue060;"
|
|
"$wll=\ue061;"
|
|
"$rrh=\ue062;"
|
|
"$llh=\ue063;"
|
|
"$danda=\ue064;"
|
|
"$doubleDanda=\ue065;"
|
|
"$zero=\ue066;" // DIGIT ZERO
|
|
"$one=\ue067;" // DIGIT ONE
|
|
"$two=\ue068;" // DIGIT TWO
|
|
"$three=\ue069;" // DIGIT THREE
|
|
"$four=\ue06a;" // DIGIT FOUR
|
|
"$five=\ue06b;" // DIGIT FIVE
|
|
"$six=\ue06c;" // DIGIT SIX
|
|
"$seven=\ue06d;" // DIGIT SEVEN
|
|
"$eight=\ue06e;" // DIGIT EIGHT
|
|
"$nine=\ue06f;" // DIGIT NINE
|
|
// For all other scripts
|
|
"$ecp0=\ue070;"
|
|
"$ecp1=\ue071;"
|
|
"$ecp2=\ue072;"
|
|
"$ecp3=\ue073;"
|
|
"$ecp4=\ue074;"
|
|
"$ecp5=\ue075;"
|
|
"$ecp6=\ue076;"
|
|
"$ecp7=\ue077;"
|
|
"$ecp8=\ue078;"
|
|
"$ecp9=\ue079;"
|
|
"$ecpA=\ue07a;"
|
|
"$ecpB=\ue07b;"
|
|
"$ecpC=\ue07c;"
|
|
"$ecpD=\ue07d;"
|
|
"$ecpE=\ue07e;"
|
|
"$ecpF=\ue07f;"
|
|
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
|
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
|
"$depVowelBelow=[\ue041-\ue044];"
|
|
"$endThing=[$danda$doubleDanda];"
|
|
// $x was originally called '&'; $z was '%'
|
|
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
|
"$z=[bcdfghjklmnpqrstvwxyz];"
|
|
"$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];"
|
|
"\u0315 > $avagraha;"
|
|
"\u0303>$chandrabindu$anusvara;"
|
|
"m\u0310>$chandrabindu;"
|
|
"h\u0323>$visarga;"
|
|
"x>$ka$virama$sa;"
|
|
// convert to independent forms at start of word or syllable:
|
|
// dependent forms for roundtrip
|
|
"\u0314a\u0304>$aa;"
|
|
"\u0314ai>$ai;"
|
|
"\u0314au>$au;"
|
|
"\u0314ii>$ii;"
|
|
"\u0314i\u0304>$ii;"
|
|
"\u0314i>$i;"
|
|
"\u0314u\u0304>$uu;"
|
|
"\u0314u>$u;"
|
|
"\u0314r\u0325\u0304>$rrh;"
|
|
"\u0314r\u0325>$rh;"
|
|
"\u0314l\u0325\u0304>$llh;"
|
|
"\u0314lh>$lh;"
|
|
"\u0314l\u0325>$lh;"
|
|
"\u0314e\u0304>$e;"
|
|
"\u0314o\u0304>$o;"
|
|
"\u0314a>;"
|
|
"\u0314e\u0306>$ce;"
|
|
"\u0314o\u0306>$co;"
|
|
"\u0314e>$se;"
|
|
"\u0314o>$so;"
|
|
|
|
// preceeded by consonants
|
|
"$consonants{ a\u0304>$aa;"
|
|
"$consonants{ ai>$ai;"
|
|
"$consonants{ au>$au;"
|
|
"$consonants{ ii>$ii;"
|
|
"$consonants{ i\u0304>$ii;"
|
|
"$consonants{ i>$i;"
|
|
"$consonants{ u\u0304>$uu;"
|
|
"$consonants{ u>$u;"
|
|
"$consonants{ r\u0325\u0304>$rrh;"
|
|
"$consonants{ r\u0325a>$rh;"
|
|
"$consonants{ r\u0325>$rh;"
|
|
"$consonants{ l\u0325\u0304>$llh;"
|
|
"$consonants{ lh>$lh;"
|
|
"$consonants{ l\u0325>$lh;"
|
|
"$consonants{ e\u0304>$e;"
|
|
"$consonants{ o\u0304>$o;"
|
|
"$consonants{ e\u0306>$ce;"
|
|
"$consonants{ o\u0306>$co;"
|
|
"$consonants{ e>$se;"
|
|
"$consonants{ o>$so;"
|
|
|
|
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
|
"a\u0304>$waa;"
|
|
"ai>$wai;"
|
|
"au>$wau;"
|
|
"i\u0304>$wii;"
|
|
"i>$wi;"
|
|
"u\u0304>$wuu;"
|
|
"u>$wu;"
|
|
"r\u0325\u0304>$wrr;"
|
|
"r\u0325>$wr;"
|
|
"l\u0325\u0304>$wll;"
|
|
"lh>$wl;"
|
|
"l\u0325>$wl;"
|
|
"e\u0304>$we;"
|
|
"o\u0304>$wo;"
|
|
"a>$wa;"
|
|
"e\u0306>$wce;"
|
|
"o\u0306>$wco;"
|
|
"e>$wse;"
|
|
"''om>$om;"
|
|
"o>$wso;"
|
|
|
|
// rules for anusvara
|
|
"n}r\u0325 > $na|$virama;"
|
|
"n}l\u0325 > $na|$virama;"
|
|
"n}na > $na|$virama;"
|
|
"n\u0307}[kg] > $anusvara;"
|
|
"n\u0307}n\u0307 > $anusvara;"
|
|
"n\u0304}[cj] > $anusvara;"
|
|
"n\u0304}n\u0303 > $anusvara;"
|
|
"n\u0323}[tdn]\u0323 > $anusvara;"
|
|
"n}[tdn] > $anusvara;"
|
|
"m}[pbm] > $anusvara;"
|
|
"n}[ylvshr] > $anusvara;"
|
|
"m\u0307 > $anusvara;"
|
|
|
|
//urdu compatibility
|
|
"q>$uka|$virama;"
|
|
"k\u0331h\u0331>$ukha |$virama;"
|
|
"g\u0307> $ugha | $virama;"
|
|
"z > $ujha |$virama;"
|
|
"f > $ufa|$virama;"
|
|
|
|
// dev
|
|
"y\u0307>$uya|$virama;"
|
|
"l\u0331>$ela|$virama;"
|
|
"n\u0331>$ena|$virama;"
|
|
"n\u0307>$nga|$virama;"
|
|
"n\u0303>$nya|$virama;"
|
|
"n\u0323>$nna|$virama;"
|
|
"t\u0323h>$ttha|$virama;"
|
|
"t\u0323>$tta|$virama;"
|
|
"r\u0323h>$udha|$virama;"
|
|
"r\u0323>$uddha|$virama;"
|
|
"d\u0323h>$ddha|$virama;"
|
|
"d\u0323>$dda|$virama;"
|
|
"kh>$kha|$virama;"
|
|
"k>$ka|$virama;"
|
|
"gh>$gha|$virama;"
|
|
"g>$ga|$virama;"
|
|
"ch>$cha|$virama;"
|
|
"c>$ca|$virama;"
|
|
"jh>$jha|$virama;"
|
|
"j>$ja|$virama;"
|
|
"ny>$nya|$virama;"
|
|
"tth>$ttha|$virama;"
|
|
"ddh>$ddha|$virama;"
|
|
"th>$tha|$virama;"
|
|
"t>$ta|$virama;"
|
|
"dh>$dha|$virama;"
|
|
"d>$da|$virama;"
|
|
"n>$na|$virama;"
|
|
"ph>$pha|$virama;"
|
|
"p>$pa|$virama;"
|
|
"bh>$bha|$virama;"
|
|
"b>$ba|$virama;"
|
|
"m>$ma|$virama;"
|
|
"y>$ya|$virama;"
|
|
"r\u0331>$rra|$virama;"
|
|
"r>$ra|$virama;"
|
|
"l\u0323>$lla|$virama;"
|
|
"l>$la|$virama;"
|
|
"v>$va|$virama;"
|
|
"w>$va|$virama;"
|
|
"sh>$sha|$virama;"
|
|
"ss>$ssa|$virama;"
|
|
"s\u0323>$ssa|$virama;"
|
|
"s\u0301>$sha|$virama;"
|
|
"s>$sa|$virama;"
|
|
"h>$ha|$virama;"
|
|
"'.'>$danda;"
|
|
"$danda'.'>$doubleDanda;"
|
|
"$depVowelAbove{'~'>$anusvara;"
|
|
"$depVowelBelow{'~'>$chandrabindu;"
|
|
// convert to dependent forms after consonant with no vowel:
|
|
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
|
//$virama aa>$aa;
|
|
"$virama a\u0304>$aa;"
|
|
"$virama ai>$ai;"
|
|
"$virama au>$au;"
|
|
"$virama ii>$ii;"
|
|
"$virama i\u0304>$ii;"
|
|
"$virama i>$i;"
|
|
//$virama uu>$uu;
|
|
"$virama u\u0304>$uu;"
|
|
"$virama u>$u;"
|
|
//$virama rrh>$rrh;
|
|
"$virama r\u0325\u0304>$rrh;"
|
|
//$virama rh>$rh;
|
|
"$virama r\u0325a>$rh;"
|
|
"$virama r\u0325>$rh;"
|
|
"$virama l\u0325\u0304>$llh;"
|
|
"$virama lh>$lh;"
|
|
"$virama l\u0325>$lh;"
|
|
"$virama e\u0304>$e;"
|
|
"$virama o\u0304>$o;"
|
|
"$virama a>;"
|
|
"$virama e\u0306>$ce;"
|
|
"$virama o\u0306>$co;"
|
|
"$virama e>$se;"
|
|
"$virama o>$so;"
|
|
|
|
|
|
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
|
//$virama''aa>$waa;
|
|
"$virama''a\u0304>$waa;"
|
|
"$virama''ai>$wai;"
|
|
"$virama''au>$wau;"
|
|
//$virama''ii>$wii;
|
|
"$virama''i\u0304>$wii;"
|
|
"$virama''i>$wi;"
|
|
//$virama''uu>$wuu;
|
|
"$virama''u\u0304>$wuu;"
|
|
"$virama''u>$wu;"
|
|
//$virama''rrh>$wrr;
|
|
"$virama''r\u0325\u0304>$wrr;"
|
|
//$virama''rh>$wr;
|
|
"$virama''r\u0325>$wr;"
|
|
"$virama''l\u0325\u0304>$wll;"
|
|
//$virama''lh>$wl;
|
|
"$virama''l\u0325>$wl;"
|
|
"$virama''e\u0304>$we;"
|
|
"$virama''o\u0304>$wo;"
|
|
"$virama''a>$wa;"
|
|
"$virama''e\u0306>$wce;"
|
|
"$virama''o\u0306>$wco;"
|
|
"$virama''e>$wse;"
|
|
"$virama''o>$wso;"
|
|
// no virama
|
|
"''a\u0304>$waa;"
|
|
"''ai>$wai;"
|
|
"''au>$wau;"
|
|
"''i\u0304>$wii;"
|
|
"''i>$wi;"
|
|
"''u\u0304>$wuu;"
|
|
"''u>$wu;"
|
|
"''r\u0325\u0304>$wrr;"
|
|
"''r\u0325>$wr;"
|
|
"''l\u0325\u0304>$wll;"
|
|
"''l\u0325>$wl;"
|
|
"''e\u0304>$we;"
|
|
"''o\u0304>$wo;"
|
|
"''a>$wa;"
|
|
"''e\u0306>$wce;"
|
|
"''o\u0306>$wco;"
|
|
"''e>$wse;"
|
|
"''o>$wso;"
|
|
|
|
"$virama } [$z] > $virama;"
|
|
"$virama } ' ' > $virama ;"
|
|
"$virama}$endThing>;"
|
|
"0>$zero;"
|
|
"1>$one;"
|
|
"2>$two;"
|
|
"3>$three;"
|
|
"4>$four;"
|
|
"5>$five;"
|
|
"6>$six;"
|
|
"7>$seven;"
|
|
"8>$eight;"
|
|
"9>$nine;"
|
|
"''>;"
|
|
//:: NFC (NFD) ;
|
|
}
|
|
}
|