8daf32a9b5
X-SVN-Rev: 6071
414 lines
9.8 KiB
Plaintext
414 lines
9.8 KiB
Plaintext
//--------------------------------------------------------------------
|
|
// Copyright (c) 2001-2004, International Business Machines
|
|
// Corporation and others. All Rights Reserved.
|
|
//--------------------------------------------------------------------
|
|
|
|
// InterIndic-Latin
|
|
|
|
InterIndic_Latin{
|
|
Rule{
|
|
// ":: NFD (NFC) ;"
|
|
//\u0e00 reserved
|
|
//consonants
|
|
"$chandrabindu=\ue001;"
|
|
"$anusvara=\ue002;"
|
|
"$visarga=\ue003;"
|
|
//\u0e004 reserved
|
|
// w<vowel> represents the stand-alone form
|
|
"$wa=\ue005;"
|
|
"$waa=\ue006;"
|
|
"$wi=\ue007;"
|
|
"$wii=\ue008;"
|
|
"$wu=\ue009;"
|
|
"$wuu=\ue00a;"
|
|
"$wr=\ue00b;"
|
|
"$wl=\ue00c;"
|
|
|
|
"$wce=\ue00d;" // LETTER CANDRA E
|
|
"$wse=\ue00e;" // LETTER SHORT E
|
|
|
|
"$we=\ue00f;" // \u090f LETTER E
|
|
"$wai=\ue010;"
|
|
|
|
"$wco=\ue011;" // LETTER CANDRA O
|
|
"$wso=\ue012;" // LETTER SHORT O
|
|
|
|
"$wo=\ue013;" // \u0913 LETTER O
|
|
"$wau=\ue014;"
|
|
|
|
"$ka=\ue015;"
|
|
"$kha=\ue016;"
|
|
"$ga=\ue017;"
|
|
"$gha=\ue018;"
|
|
"$nga=\ue019;"
|
|
|
|
"$ca=\ue01a;"
|
|
"$cha=\ue01b;"
|
|
"$ja=\ue01c;"
|
|
"$jha=\ue01d;"
|
|
"$nya=\ue01e;"
|
|
|
|
"$tta=\ue01f;"
|
|
"$ttha=\ue020;"
|
|
"$dda=\ue021;"
|
|
"$ddha=\ue022;"
|
|
"$nna=\ue023;"
|
|
|
|
"$ta=\ue024;"
|
|
"$tha=\ue025;"
|
|
"$da=\ue026;"
|
|
"$dha=\ue027;"
|
|
"$na=\ue028;"
|
|
"$ena=\ue029;" //compatibility
|
|
|
|
"$pa=\ue02a;"
|
|
"$pha=\ue02b;"
|
|
"$ba=\ue02c;"
|
|
"$bha=\ue02d;"
|
|
"$ma=\ue02e;"
|
|
|
|
"$ya=\ue02f;"
|
|
"$ra=\ue030;"
|
|
"$rra=\ue031;"
|
|
"$la=\ue032;"
|
|
"$lla=\ue033;"
|
|
"$ela=\ue034;" //compatibility
|
|
"$va=\ue035;"
|
|
|
|
"$sha=\ue036;"
|
|
"$ssa=\ue037;"
|
|
"$sa=\ue038;"
|
|
"$ha=\ue039;"
|
|
//\u093a Reserved
|
|
//\u093b Reserved
|
|
"$nukta=\ue03c;"
|
|
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
|
|
|
// <vowel> represents the dependent form
|
|
"$aa=\ue03e;"
|
|
"$i=\ue03f;"
|
|
"$ii=\ue040;"
|
|
"$u=\ue041;"
|
|
"$uu=\ue042;"
|
|
"$rh=\ue043;"
|
|
"$lh=\ue044;"
|
|
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
|
"$se=\ue046;" //VOWEL SIGN SHORT E
|
|
"$e=\ue047;"
|
|
"$ai=\ue048;"
|
|
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
|
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
|
"$o=\ue04b;" // \u094b
|
|
"$au=\ue04c;"
|
|
"$virama=\ue04d;"
|
|
// \u094e Reserved
|
|
// \u094f Reserved
|
|
//"\u0950>\ue050;" // OM
|
|
// \u0951>; // UNMAPPED STRESS SIGN UDATTA
|
|
// \u0952>; // UNMAPPED STRESS SIGN ANUDATTA
|
|
// \u0953>; // UNMAPPED GRAVE ACCENT
|
|
// \u0954>; // UNMAPPED ACUTE ACCENT
|
|
|
|
"$lm = \ue055;"// Telugu Length Mark
|
|
"$ailm=\ue056;"// AI Length Mark
|
|
"$aulm=\ue057;"// AU Length Mark
|
|
|
|
//urdu compatibity forms
|
|
"$uka=\ue058;"
|
|
"$ukha=\ue059;"
|
|
"$ugha=\ue05a;"
|
|
"$ujha=\ue05b;"
|
|
"$uddha=\ue05c;"
|
|
"$udha=\ue05d;"
|
|
"$ufa=\ue05e;"
|
|
"$uya=\ue05f;"
|
|
|
|
"$wrr=\ue060;"
|
|
"$wll=\ue061;"
|
|
"$rrh=\ue062;"
|
|
"$llh=\ue063;"
|
|
|
|
"$danda=\ue064;"
|
|
"$doubleDanda=\ue065;"
|
|
|
|
"$zero=\ue066;" // DIGIT ZERO
|
|
"$one=\ue067;" // DIGIT ONE
|
|
"$two=\ue068;" // DIGIT TWO
|
|
"$three=\ue069;" // DIGIT THREE
|
|
"$four=\ue06a;" // DIGIT FOUR
|
|
"$five=\ue06b;" // DIGIT FIVE
|
|
"$six=\ue06c;" // DIGIT SIX
|
|
"$seven=\ue06d;" // DIGIT SEVEN
|
|
"$eight=\ue06e;" // DIGIT EIGHT
|
|
"$nine=\ue06f;" // DIGIT NINE
|
|
|
|
// For all other scripts
|
|
"$ecp0=\ue070;"
|
|
"$ecp1=\ue071;"
|
|
"$ecp2=\ue072;"
|
|
"$ecp3=\ue073;"
|
|
"$ecp4=\ue074;"
|
|
"$ecp5=\ue075;"
|
|
"$ecp6=\ue076;"
|
|
"$ecp7=\ue077;"
|
|
"$ecp8=\ue078;"
|
|
"$ecp9=\ue079;"
|
|
"$ecpA=\ue07a;"
|
|
"$ecpB=\ue07b;"
|
|
"$ecpC=\ue07c;"
|
|
"$ecpD=\ue07d;"
|
|
"$ecpE=\ue07e;"
|
|
"$ecpF=\ue07f;"
|
|
|
|
|
|
// \u0970>; // UNMAPPED ABBREVIATION SIGN
|
|
|
|
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
|
"$depVowelBelow=[\ue041-\ue044];"
|
|
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
|
|
|
|
// $x was originally called '&'; $z was '%'
|
|
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
|
"$z=[bcdfghjklmnpqrstvwxyz];"
|
|
|
|
|
|
//#####################################################################
|
|
// convert from Native letters to Latin letters
|
|
//#####################################################################
|
|
|
|
//transliterations for anusvara
|
|
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
|
|
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
|
|
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
|
|
"$anusvara} [$ta$tha$da$dha$na] > n ;"
|
|
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
|
|
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
|
|
"$anusvara>'-'m\u0307;"
|
|
|
|
// normal consonants
|
|
|
|
"$cha}$x>ch;"
|
|
"$cha>cha;"
|
|
"$ca$virama}$ha>c'';"
|
|
"$ca}$x>c;"
|
|
"$ca>ca;"
|
|
"$jha}$x>jh;"
|
|
"$jha>jha;"
|
|
"$ja$virama}$ha>j'';"
|
|
"$ja}$x>j;"
|
|
"$ja>ja;"
|
|
//"$nya}$x>ny;"
|
|
//"$nya>nya;"
|
|
"$nya }$x>n\u0303 ;"
|
|
"$nya > n\u0303a ;"
|
|
|
|
"$ttha}$x>t\u0323h;"
|
|
"$tta$virama}$ha>t\u0323'';"
|
|
"$tta}$x>t\u0323;"
|
|
"$ddha}$x>d\u0323h;"
|
|
"$dda}$x$ha>d\u0323'';"
|
|
"$dda}$x>d\u0323;"
|
|
"$dha}$x>dh;"
|
|
"$da$virama}$ha>d'';"
|
|
"$da$virama}$ddha>d'';"
|
|
"$da$virama}$dda>d'';"
|
|
"$da$virama}$dha>d'';"
|
|
//"$da$virama}$da>dda;"
|
|
"$da}$x>d;"
|
|
"$tha}$x>th;"
|
|
"$ta$virama}$ha>t'';"
|
|
"$ta$virama}$ttha>t'';"
|
|
"$ta$virama}$tta>t'';"
|
|
"$ta$virama}$tha>t'';"
|
|
"$tta>t\u0323a;"
|
|
"$ttha>t\u0323ha;"
|
|
//"$ta$virama}$ta>tta;"
|
|
"$ta}$x>t;"
|
|
"$tha>tha;"
|
|
"$ta>ta;"
|
|
"$dda>d\u0323a;"
|
|
"$dha>dha;"
|
|
"$ddha>d\u0323ha;"
|
|
"$da>da;"
|
|
"$nna}$x>n\u0323 ;"
|
|
"$nna>n\u0323a ;"
|
|
"$na$virama}$ga>n'';"
|
|
"$na$virama}$ya>n'';"
|
|
"$na}$x>n;"
|
|
"$na>na;"
|
|
|
|
"$kha}$x>kh;"
|
|
"$kha>kha;"
|
|
"$ka$virama}$ha>k'';"
|
|
"$ka}$x>k;"
|
|
"$ka>ka;"
|
|
"$gha}$x>gh;"
|
|
"$gha>gha;"
|
|
"$ga$virama}$ha>g'';"
|
|
"$ga}$x>g;"
|
|
"$ga>ga;"
|
|
//"ng<$nga}$x;"
|
|
//"nga<$nga;"
|
|
"$nga}$x>n\u0307;"
|
|
"$nga>n\u0307a ;"
|
|
|
|
"$pha}$x>ph;"
|
|
"$pha>pha;"
|
|
"$pa$virama}$ha>p'';"
|
|
"$pa}$x>p;"
|
|
"$pa>pa;"
|
|
"$bha}$x>bh;"
|
|
"$bha>bha;"
|
|
"$ba$virama}$ha>b'';"
|
|
"$ba}$x>b;"
|
|
"$ba>ba;"
|
|
"$ma$virama}$ma>m'';"
|
|
//"$ma$virama}$anusvara>m'';"
|
|
"$ma}$x>m;"
|
|
"$ma>ma;"
|
|
|
|
"$ya}$x>y;"
|
|
"$ya>ya;"
|
|
"$ra$virama}$ha>r'';"
|
|
"$ra}$x>r;"
|
|
"$ra>ra;"
|
|
"$la$virama}$ha>l'';"
|
|
"$la}$x>l;"
|
|
"$la>la;"
|
|
"$lla$virama}$ha>l\u0323'';"
|
|
"$lla}$x>l\u0323;"
|
|
"$lla>l\u0323a;"
|
|
"$va}$x>v;"
|
|
"$va>va;"
|
|
"$sha}$x>s\u0301;"
|
|
"$ssa}$x>s\u0323;"
|
|
"$sa$virama}$ha>s'';"
|
|
"$sa$virama}$sha>s'';"
|
|
"$sa$virama}$ssa>s'';"
|
|
"$sa$virama}$sa>s'';"
|
|
"$sa}$x>s;"
|
|
"$sha>s\u0301a;"
|
|
"$ssa>s\u0323a;"
|
|
"$sa>sa;"
|
|
"$ha}$x>h;"
|
|
"$ha>ha;"
|
|
|
|
// Urdu compatibility
|
|
"$uya}$x > y\u0307 ;"
|
|
"$uya > y\u0307a ;"
|
|
"$ela}$x > l\u0331 ;"
|
|
"$ela > l\u0331a ;"
|
|
"$ena}$x > n\u0331 ;"
|
|
"$ena > n\u0331a ;"
|
|
|
|
"$uka}$x > q ;"
|
|
"$uka > qa ;"
|
|
"$ukha}$x > k\u0323 ;"
|
|
"$ukha > k\u0323a ;"
|
|
"$ugha}$x > g\u0307 ;"
|
|
"$ugha > g\u0307a ;"
|
|
"$ujha}$x > z ;"
|
|
"$ujha > za ;"
|
|
"$udha}$x > r\u0323h ;"
|
|
"$udha > r\u0323ha;"
|
|
"$uddha}$x> r\u0323 ;"
|
|
"$uddha > r\u0323a ;"
|
|
"$ufa}$x > f\u0323 ;"
|
|
"$ufa > f\u0323a ;"
|
|
|
|
// dependent vowels (should never occur except following consonants)
|
|
|
|
"$aa > a\u0304 ;"
|
|
"$ai > ai ;"
|
|
"$au > au ;"
|
|
"$ii > i\u0304 ;"
|
|
"$i > i ;"
|
|
"$uu > u\u0304 ;"
|
|
"$u > u ;"
|
|
"$rrh > r\u0325\u0304 ;"
|
|
"$rh > r\u0325 ;"
|
|
"$llh > l\u0325\u0304 ;"
|
|
"$lh > l\u0325 ;"
|
|
"$e > e\u0304 ;"
|
|
"$o > o\u0304 ;"
|
|
//extra vowels
|
|
"$ce > e\u0306 ;"
|
|
"$co > o\u0306 ;"
|
|
"$se > e ;"
|
|
"$so > o ;"
|
|
|
|
// independent vowels (when following consonants)
|
|
|
|
"a}$waa > ''a\u0304 ;"
|
|
"$z}$waa > ''a\u0304 ;"
|
|
"a}$wai > ''ai ;"
|
|
"$z}$wai > ''ai ;"
|
|
"a}$wau > ''au ;"
|
|
"$z}$wau > ''au ;"
|
|
"a}$wii > ''i\u0304 ;"
|
|
"$z}$wii > ''i\u0304 ;"
|
|
"a}$wi > ''i ;"
|
|
"$z}$wi > ''i ;"
|
|
"a}$wuu > ''u\u0304 ;"
|
|
"$z}$wuu > ''u\u0304 ;"
|
|
"a}$wu > ''u ;"
|
|
"$z}$wu > ''u ;"
|
|
"$z}$wrr > ''r\u0325\u0304 ;"
|
|
"$z}$wr > ''r\u0325 ;"
|
|
"$z}$wll > ''l\u0325\u0304 ;"
|
|
"$z}$wl > ''l\u0325 ;"
|
|
"$z}$we > ''e\u0304 ;"
|
|
"$z}$wo > ''o\u0304 ;"
|
|
"a}$wa > ''a ;"
|
|
"$z}$wa > ''a ;"
|
|
//extra vowels
|
|
"$z}$wce > ''e\u0306 ;"
|
|
"$z}$wco > ''o\u0306 ;"
|
|
"$z}$wse > ''e ;"
|
|
"$z}$wso > ''o ;"
|
|
|
|
// independent vowels (otherwise)
|
|
"$waa > a\u0304 ;"
|
|
"$wai > ai ;"
|
|
"$wau > au ;"
|
|
"$wii > i\u0304 ;"
|
|
"$wi > i ;"
|
|
"$wuu > u\u0304 ;"
|
|
"$wu > u ;"
|
|
"$wrr > r\u0325\u0304 ;"
|
|
"$wr > r\u0325 ;"
|
|
"$wll > l\u0325\u0304 ;"
|
|
"$wl > l\u0325 ;"
|
|
"$we > e\u0304 ;"
|
|
"$wo > o\u0304 ;"
|
|
"$wa > a ;"
|
|
//extra vowels
|
|
"$wce > e\u0306 ;"
|
|
"$wco > o\u0306 ;"
|
|
"$wse > e ;"
|
|
"$wso > o ;"
|
|
|
|
//stress marks
|
|
"$avagraha > \u0315;"
|
|
"$chandrabindu$anusvara>'-'\u0303;"
|
|
"$chandrabindu > '-'m\u0310;"
|
|
"$visarga>'-'h\u0323;"
|
|
|
|
|
|
//numbers
|
|
"$zero > 0;"
|
|
"$one > 1;"
|
|
"$two > 2;"
|
|
"$three > 3;"
|
|
"$four > 4;"
|
|
"$five > 5;"
|
|
"$six > 6;"
|
|
"$seven > 7;"
|
|
"$eight > 8;"
|
|
"$nine > 9;"
|
|
|
|
// blow away any remaining viramas
|
|
"$virama>;"
|
|
// ":: NFC;"
|
|
}
|
|
} |