scuffed-code/icu4c/data/InterIndic_Latin.txt
Ram Viswanadha 8daf32a9b5 ICU-1255 Fix NFC and NFD for rules.
X-SVN-Rev: 6071
2001-10-05 02:53:45 +00:00

414 lines
9.8 KiB
Plaintext

//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// InterIndic-Latin
InterIndic_Latin{
Rule{
// ":: NFD (NFC) ;"
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
//"\u0950>\ue050;" // OM
// \u0951>; // UNMAPPED STRESS SIGN UDATTA
// \u0952>; // UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; // UNMAPPED GRAVE ACCENT
// \u0954>; // UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; // UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
//transliterations for anusvara
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
"$anusvara} [$ta$tha$da$dha$na] > n ;"
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
"$anusvara>'-'m\u0307;"
// normal consonants
"$cha}$x>ch;"
"$cha>cha;"
"$ca$virama}$ha>c'';"
"$ca}$x>c;"
"$ca>ca;"
"$jha}$x>jh;"
"$jha>jha;"
"$ja$virama}$ha>j'';"
"$ja}$x>j;"
"$ja>ja;"
//"$nya}$x>ny;"
//"$nya>nya;"
"$nya }$x>n\u0303 ;"
"$nya > n\u0303a ;"
"$ttha}$x>t\u0323h;"
"$tta$virama}$ha>t\u0323'';"
"$tta}$x>t\u0323;"
"$ddha}$x>d\u0323h;"
"$dda}$x$ha>d\u0323'';"
"$dda}$x>d\u0323;"
"$dha}$x>dh;"
"$da$virama}$ha>d'';"
"$da$virama}$ddha>d'';"
"$da$virama}$dda>d'';"
"$da$virama}$dha>d'';"
//"$da$virama}$da>dda;"
"$da}$x>d;"
"$tha}$x>th;"
"$ta$virama}$ha>t'';"
"$ta$virama}$ttha>t'';"
"$ta$virama}$tta>t'';"
"$ta$virama}$tha>t'';"
"$tta>t\u0323a;"
"$ttha>t\u0323ha;"
//"$ta$virama}$ta>tta;"
"$ta}$x>t;"
"$tha>tha;"
"$ta>ta;"
"$dda>d\u0323a;"
"$dha>dha;"
"$ddha>d\u0323ha;"
"$da>da;"
"$nna}$x>n\u0323 ;"
"$nna>n\u0323a ;"
"$na$virama}$ga>n'';"
"$na$virama}$ya>n'';"
"$na}$x>n;"
"$na>na;"
"$kha}$x>kh;"
"$kha>kha;"
"$ka$virama}$ha>k'';"
"$ka}$x>k;"
"$ka>ka;"
"$gha}$x>gh;"
"$gha>gha;"
"$ga$virama}$ha>g'';"
"$ga}$x>g;"
"$ga>ga;"
//"ng<$nga}$x;"
//"nga<$nga;"
"$nga}$x>n\u0307;"
"$nga>n\u0307a ;"
"$pha}$x>ph;"
"$pha>pha;"
"$pa$virama}$ha>p'';"
"$pa}$x>p;"
"$pa>pa;"
"$bha}$x>bh;"
"$bha>bha;"
"$ba$virama}$ha>b'';"
"$ba}$x>b;"
"$ba>ba;"
"$ma$virama}$ma>m'';"
//"$ma$virama}$anusvara>m'';"
"$ma}$x>m;"
"$ma>ma;"
"$ya}$x>y;"
"$ya>ya;"
"$ra$virama}$ha>r'';"
"$ra}$x>r;"
"$ra>ra;"
"$la$virama}$ha>l'';"
"$la}$x>l;"
"$la>la;"
"$lla$virama}$ha>l\u0323'';"
"$lla}$x>l\u0323;"
"$lla>l\u0323a;"
"$va}$x>v;"
"$va>va;"
"$sha}$x>s\u0301;"
"$ssa}$x>s\u0323;"
"$sa$virama}$ha>s'';"
"$sa$virama}$sha>s'';"
"$sa$virama}$ssa>s'';"
"$sa$virama}$sa>s'';"
"$sa}$x>s;"
"$sha>s\u0301a;"
"$ssa>s\u0323a;"
"$sa>sa;"
"$ha}$x>h;"
"$ha>ha;"
// Urdu compatibility
"$uya}$x > y\u0307 ;"
"$uya > y\u0307a ;"
"$ela}$x > l\u0331 ;"
"$ela > l\u0331a ;"
"$ena}$x > n\u0331 ;"
"$ena > n\u0331a ;"
"$uka}$x > q ;"
"$uka > qa ;"
"$ukha}$x > k\u0323 ;"
"$ukha > k\u0323a ;"
"$ugha}$x > g\u0307 ;"
"$ugha > g\u0307a ;"
"$ujha}$x > z ;"
"$ujha > za ;"
"$udha}$x > r\u0323h ;"
"$udha > r\u0323ha;"
"$uddha}$x> r\u0323 ;"
"$uddha > r\u0323a ;"
"$ufa}$x > f\u0323 ;"
"$ufa > f\u0323a ;"
// dependent vowels (should never occur except following consonants)
"$aa > a\u0304 ;"
"$ai > ai ;"
"$au > au ;"
"$ii > i\u0304 ;"
"$i > i ;"
"$uu > u\u0304 ;"
"$u > u ;"
"$rrh > r\u0325\u0304 ;"
"$rh > r\u0325 ;"
"$llh > l\u0325\u0304 ;"
"$lh > l\u0325 ;"
"$e > e\u0304 ;"
"$o > o\u0304 ;"
//extra vowels
"$ce > e\u0306 ;"
"$co > o\u0306 ;"
"$se > e ;"
"$so > o ;"
// independent vowels (when following consonants)
"a}$waa > ''a\u0304 ;"
"$z}$waa > ''a\u0304 ;"
"a}$wai > ''ai ;"
"$z}$wai > ''ai ;"
"a}$wau > ''au ;"
"$z}$wau > ''au ;"
"a}$wii > ''i\u0304 ;"
"$z}$wii > ''i\u0304 ;"
"a}$wi > ''i ;"
"$z}$wi > ''i ;"
"a}$wuu > ''u\u0304 ;"
"$z}$wuu > ''u\u0304 ;"
"a}$wu > ''u ;"
"$z}$wu > ''u ;"
"$z}$wrr > ''r\u0325\u0304 ;"
"$z}$wr > ''r\u0325 ;"
"$z}$wll > ''l\u0325\u0304 ;"
"$z}$wl > ''l\u0325 ;"
"$z}$we > ''e\u0304 ;"
"$z}$wo > ''o\u0304 ;"
"a}$wa > ''a ;"
"$z}$wa > ''a ;"
//extra vowels
"$z}$wce > ''e\u0306 ;"
"$z}$wco > ''o\u0306 ;"
"$z}$wse > ''e ;"
"$z}$wso > ''o ;"
// independent vowels (otherwise)
"$waa > a\u0304 ;"
"$wai > ai ;"
"$wau > au ;"
"$wii > i\u0304 ;"
"$wi > i ;"
"$wuu > u\u0304 ;"
"$wu > u ;"
"$wrr > r\u0325\u0304 ;"
"$wr > r\u0325 ;"
"$wll > l\u0325\u0304 ;"
"$wl > l\u0325 ;"
"$we > e\u0304 ;"
"$wo > o\u0304 ;"
"$wa > a ;"
//extra vowels
"$wce > e\u0306 ;"
"$wco > o\u0306 ;"
"$wse > e ;"
"$wso > o ;"
//stress marks
"$avagraha > \u0315;"
"$chandrabindu$anusvara>'-'\u0303;"
"$chandrabindu > '-'m\u0310;"
"$visarga>'-'h\u0323;"
//numbers
"$zero > 0;"
"$one > 1;"
"$two > 2;"
"$three > 3;"
"$four > 4;"
"$five > 5;"
"$six > 6;"
"$seven > 7;"
"$eight > 8;"
"$nine > 9;"
// blow away any remaining viramas
"$virama>;"
// ":: NFC;"
}
}