scuffed-code/icu4c/source/data/translit/t_InterIndic_Latn.txt
Alan Liu 2443c39da1 ICU-1575 full rule update
X-SVN-Rev: 7281
2001-12-03 20:51:19 +00:00

540 lines
15 KiB
Plaintext

// -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpicurules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Latin.txt
// Date: Mon Dec 3 11:44:30 2001
//--------------------------------------------------------------------
// InterIndic_Latin
translit_InterIndic_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// InterIndic-Latin
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
"$om=\ue050;" // OM
"\ue051>;" // UNMAPPED STRESS SIGN UDATTA
"\ue052>;" // UNMAPPED STRESS SIGN ANUDATTA
"\ue053>;" // UNMAPPED GRAVE ACCENT
"\ue054>;" // UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
// $x was originally called '&'; $z was '%'
"$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$vowels=[aeiour\u0304\u0325\u0306];"
"$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
//transliterations for anusvara
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
"$anusvara} [$ta$tha$da$dha$na] > n ;"
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
"$anusvara> m\u0307;"
// Urdu compatibility
"$ya$nukta}$x > y\u0307 ;"
"$ya$nukta$virama > y\u0307 ;"
"$ya$nukta > y\u0307a ;"
"$la$nukta }$x > l\u0331 ;"
"$la$nukta$virama > l\u0331 ;"
"$la$nukta > l\u0331a ;"
"$na$nukta }$x > n\u0331 ;"
"$na$nukta$virama > n\u0331 ;"
"$na$nukta > n\u0331a ;"
"$ena }$x > n\u0331 ;"
"$ena$virama > n\u0331 ;"
"$ena > n\u0331a ;"
"$ka$nukta }$x > q ;"
"$ka$nukta$virama > q ;"
"$ka$nukta > qa ;"
"$kha$nukta }$x > k\u0331h\u0331 ;"
"$kha$nukta$virama > k\u0331h\u0331 ;"
"$kha$nukta > k\u0331h\u0331a ;"
"$ukha$virama > k\u0331h\u0331;"
"$ukha > k\u0331h\u0331a;"
"$ga$nukta }$x > g\u0307 ;"
"$ga$nukta$virama > g\u0307 ;"
"$ga$nukta > g\u0307a ;"
"$ja$nukta }$x > z ;"
"$ja$nukta$virama > z ;"
"$ja$nukta > za ;"
"$ddha$nukta}$x > r\u0323h ;"
"$ddha$nukta$virama > r\u0323h ;"
"$ddha$nukta > r\u0323ha;"
"$uddha}$x > r\u0323 ;"
"$uddha$virama > r\u0323 ;"
"$uddha > r\u0323a;"
"$dda$nukta}$x > r\u0323 ;"
"$dda$nukta$virama > r\u0323 ;"
"$dda$nukta > r\u0323a ;"
"$pha$nukta }$x > f ;"
"$pha$nukta$virama > f ;"
"$pha$nukta > fa ;"
"$ufa }$x > f ;"
"$ufa$virama > f ;"
"$ufa > fa ;"
"$ra$nukta}$x > r\u0331;"
"$ra$nukta$virama > r\u0331;"
"$ra$nukta > r\u0331a;"
"$lla$nukta}$x > l\u0331;"
"$lla$nukta$virama > l\u0331;"
"$lla$nukta > l\u0331a;"
"$ela}$x > l\u0331;"
"$ela$virama > l\u0331;"
"$ela > l\u0331a;"
"$uya}$x > y\u0307;"
"$uya$virama > y\u0307;"
"$uya > y\u0307a;"
// normal consonants
"$ka$virama}$ha>k'';"
"$ka}$x>k;"
"$ka$virama>k;"
"$ka>ka;"
"$kha}$x>kh;"
"$kha$virama>kh;"
"$kha>kha;"
"$ga$virama}$ha>g'';"
"$ga}$x>g;"
"$ga$virama>g;"
"$ga>ga;"
"$gha}$x>gh;"
"$gha$virama>gh;"
"$gha>gha;"
"$nga}$x>n\u0307;"
"$nga$virama>n\u0307;"
"$nga>n\u0307a ;"
"$ca$virama}$ha>c'';"
"$ca}$x>c;"
"$ca$virama>c;"
"$ca>ca;"
"$cha}$x>ch;"
"$cha$virama>ch;"
"$cha>cha;"
"$ja$virama}$ha>j'';"
"$ja}$x>j;"
"$ja$virama>j;"
"$ja>ja;"
"$jha}$x>jh;"
"$jha$virama>jh;"
"$jha>jha;"
"$nya }$x>n\u0303 ;"
"$nya$virama>n\u0303;"
"$nya > n\u0303a ;"
"$tta$virama}$ha>t\u0323'';"
"$tta}$x>t\u0323;"
"$tta$virama>t\u0323;"
"$tta>t\u0323a;"
"$ttha}$x>t\u0323h;"
"$ttha$virama>t\u0323h;"
"$ttha>t\u0323ha;"
"$dda}$x$ha>d\u0323'';"
"$dda}$x>d\u0323;"
"$dda$virama>d\u0323;"
"$dda>d\u0323a;"
"$ddha}$x>d\u0323h;"
"$ddha$virama>d\u0323h;"
"$ddha>d\u0323ha;"
"$nna}$x>n\u0323 ;"
"$nna$virama>n\u0323;"
"$nna>n\u0323a ;"
"$ta$virama}$ha>t'';"
"$ta$virama}$ttha>t'';"
"$ta$virama}$tta>t'';"
"$ta$virama}$tha>t'';"
"$ta}$x>t;"
"$ta$virama>t;"
"$ta>ta;"
"$tha}$x>th;"
"$tha$virama>th;"
"$tha>tha;"
"$da$virama}$ha>d'';"
"$da$virama}$ddha>d'';"
"$da$virama}$dda>d'';"
"$da$virama}$dha>d'';"
"$da}$x>d;"
"$da$virama>d;"
"$da>da;"
"$dha}$x>dh;"
"$dha$virama>dh;"
"$dha>dha;"
"$na$virama}$ga>n'';"
"$na$virama}$ya>n'';"
"$na}$x>n;"
"$na$virama>n;"
"$na>na;"
"$pa$virama}$ha>p'';"
"$pa}$x>p;"
"$pa$virama>p;"
"$pa>pa;"
"$pha}$x>ph;"
"$pha$virama>ph;"
"$pha>pha;"
"$ba$virama}$ha>b'';"
"$ba}$x>b;"
"$ba$virama>b;"
"$ba>ba;"
"$bha}$x>bh;"
"$bha$virama>bh;"
"$bha>bha;"
"$ma$virama}$ma>m'';"
"$ma}$x>m;"
"$ma$virama>m;"
"$ma>ma;"
"$ya}$x>y;"
"$ya$virama>y;"
"$ya>ya;"
"$ra$virama}$ha>r'';"
"$ra}$x>r;"
"$ra$virama>r;"
"$ra>ra;"
"$rra$virama}$ha>r\u0331'';"
"$rra}$x>r\u0331;"
"$rra$virama>r\u0331;"
"$rra>r\u0331a;"
"$la$virama}$ha>l'';"
"$la}$x>l;"
"$la$virama>l;"
"$la>la;"
"$lla$virama}$ha>l\u0323'';"
"$lla}$x>l\u0323;"
"$lla$virama>l\u0323;"
"$lla>l\u0323a;"
"$va}$x>v;"
"$va$virama>v;"
"$va>va;"
"$sa$virama}$ha>s'';"
"$sa$virama}$sha>s'';"
"$sa$virama}$ssa>s'';"
"$sa$virama}$sa>s'';"
"$sa}$x>s;"
"$sa$virama>s;"
//for gurmukhi
"$sa$nukta}$x>s\u0301;"
"$sa$nukta$virama>s\u0301;"
"$sa$nukta>s\u0301a;"
"$sa>sa;"
"$sha}$x>s\u0301;"
"$sha$virama>s\u0301;"
"$sha>s\u0301a;"
"$ssa}$x>s\u0323;"
"$ssa$virama>s\u0323;"
"$ssa>s\u0323a;"
"$ha}$x>h;"
"$ha$virama>h;"
"$ha>ha;"
// dependent vowels (should never occur except following consonants)
"$forceIndependentMatra{$aa > \u0314a\u0304 ;"
"$forceIndependentMatra{$ai > \u0314ai ;"
"$forceIndependentMatra{$au > \u0314au ;"
"$forceIndependentMatra{$ii > \u0314i\u0304 ;"
"$forceIndependentMatra{$i > \u0314i ;"
"$forceIndependentMatra{$uu > \u0314u\u0304 ;"
"$forceIndependentMatra{$u > \u0314u ;"
"$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;"
"$forceIndependentMatra{$rh > \u0314r\u0325 ;"
"$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;"
"$forceIndependentMatra{$lh > \u0314l\u0325 ;"
"$forceIndependentMatra{$e > \u0314e\u0304 ;"
"$forceIndependentMatra{$o > \u0314o\u0304 ;"
//extra vowels
"$forceIndependentMatra{$ce > \u0314e\u0306 ;"
"$forceIndependentMatra{$co > \u0314o\u0306 ;"
"$forceIndependentMatra{$se > \u0314e ;"
"$forceIndependentMatra{$so > \u0314o ;"
"$forceIndependentMatra{$nukta >;" // Nukta cannot appear independently or as first character
"$forceIndependentMatra{$virama >;" // Virama cannot appear independently or as first character
"$aa > a\u0304 ;"
"$ai > ai ;"
"$au > au ;"
"$ii > i\u0304 ;"
"$i > i ;"
"$uu > u\u0304 ;"
"$u > u ;"
"$rrh > r\u0325\u0304 ;"
"$rh > r\u0325 ;"
"$llh > l\u0325\u0304 ;"
"$lh > l\u0325 ;"
"$e > e\u0304 ;"
"$o > o\u0304 ;"
//extra vowels
"$ce > e\u0306 ;"
"$co > o\u0306 ;"
"$se > e ;"
"$so > o ;"
//dependent vowels when following independent vowels. Generally Illegal only for roundtripping
"$waa} $x > a\u0304\u0314 ;"
"$wai} $x > ai\u0314 ;"
"$wau} $x > au\u0314 ;"
"$wii} $x > i\u0304\u0314 ;"
"$wi } $x > i\u0314 ;"
"$wuu} $x > u\u0304\u0314 ;"
"$wu } $x > u\u0314 ;"
"$wrr} $x > r\u0325\u0304\u0314 ;"
"$wr } $x > r\u0325\u0314 ;"
"$wll} $x > l\u0325\u0304\u0314 ;"
"$wl } $x > l\u0325\u0314 ;"
"$we } $x > e\u0304\u0314 ;"
"$wo } $x > o\u0304\u0314 ;"
"$wa } $x > a\u0314 ;"
//extra vowels
"$wce} $x > e\u0306\u0314 ;"
"$wco} $x > o\u0306\u0314 ;"
"$wse} $x > e\u0314 ;"
"$wso} $x > o\u0314 ;"
"$om} $x > ''om\u0314 ;"
// independent vowels when preceeded by vowels
"$vowels{$waa > ''a\u0304 ;"
"$vowels{$wai > ''ai ;"
"$vowels{$wau > ''au ;"
"$vowels{$wii > ''i\u0304 ;"
"$vowels{$wi > ''i ;"
"$vowels{$wuu > ''u\u0304 ;"
"$vowels{$wu > ''u ;"
"$vowels{$wrr > ''r\u0325\u0304 ;"
"$vowels{$wr > ''r\u0325 ;"
"$vowels{$wll > ''l\u0325\u0304 ;"
"$vowels{$wl > ''l\u0325 ;"
"$vowels{$we > ''e\u0304 ;"
"$vowels{$wo > ''o\u0304 ;"
"$vowels{$wa > ''a ;"
//extra vowels
"$vowels{$wce > ''e\u0306 ;"
"$vowels{$wco > ''o\u0306 ;"
"$vowels{$wse > ''e ;"
"$vowels{$wso > ''o ;"
// independent vowels (otherwise)
"$waa > a\u0304 ;"
"$wai > ai ;"
"$wau > au ;"
"$wii > i\u0304 ;"
"$wi > i ;"
"$wuu > u\u0304 ;"
"$wu > u ;"
"$wrr > r\u0325\u0304 ;"
"$wr > r\u0325 ;"
"$wll > l\u0325\u0304 ;"
"$wl > l\u0325 ;"
"$we > e\u0304 ;"
"$wo > o\u0304 ;"
"$wa > a ;"
//extra vowels
"$wce > e\u0306 ;"
"$wco > o\u0306 ;"
"$wse > e ;"
"$wso > o ;"
"$om > ''om ;"
//stress marks
"$avagraha > \u0315;"
"$chandrabindu$anusvara>\u0303;"
"$chandrabindu > m\u0310;"
"$visarga>h\u0323;"
//numbers
"$zero > 0;"
"$one > 1;"
"$two > 2;"
"$three > 3;"
"$four > 4;"
"$five > 5;"
"$six > 6;"
"$seven > 7;"
"$eight > 8;"
"$nine > 9;"
"$lm >;"
"$ailm >;"
"$aulm >;"
"$ecp0 >;"
"$ecp1 >;"
"$ecp2 >;"
"$ecp3 >;"
"$ecp4 >;"
"$ecp5 >;"
"$ecp6 >;"
"$ecp7 >;"
"$ecp8 >;"
"$ecp9 >;"
"$ecpA >;"
"$ecpB >;"
"$ecpC >;"
"$ecpD >;"
"$ecpE >;"
"$ecpF >;"
"$danda>'.';"
"$doubleDanda>'.';"
}
}