scuffed-code/icu4c/data/translit/ldevan.txt
2000-01-13 07:30:26 +00:00

412 lines
11 KiB
Plaintext

//--------------------------------------------------------------------
// Copyright (C) 1999, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date Name Description
// 11/17/99 aliu Creation.
//--------------------------------------------------------------------
// Latin-Devanagari
ldevan {
Rule {
//#####################################################################
// Keyboard Transliteration Table
//#####################################################################
// Conversions should be:
// 1. complete
// * convert every sequence of Latin letters (a to z plus apostrophe)
// to a sequence of Native letters
// * convert every sequence of Native letters to Latin letters
// 2. reversable
// * any string of Native converted to Latin and back should be the same
// * this is not true for English converted to Native & back, e.g.:
// k -> {kaf} -> k
// c -> {kaf} -> k
//#####################################################################
// Sequences of Latin letters may convert to a single Native letter.
// When this is the case, an apostrophe can be used to indicate separate
// letters.$
// E.g. sh -> {shin}
// s'h -> {sin}{heh}
// ss -> {sad}
// s's -> {sin}{shadda}
//#####################################################################
// To Do:
// finish adding shadda, add sokoon, fix uppercase
// make two transliteration tables: one with vowels, one without
//#####################################################################
// Modifications
// Devanagari Transliterator: broken up with consonsants/vowels
//#####################################################################
// Unicode character name definitions
//#####################################################################
//consonants
"candrabindu=\u0901;"
"bindu=\u0902;"
"visarga=\u0903;"
// w<vowel> represents the stand-alone form
"wa=\u0905;"
"waa=\u0906;"
"wi=\u0907;"
"wii=\u0908;"
"wu=\u0909;"
"wuu=\u090A;"
"wr=\u090B;"
"wl=\u090C;"
"we=\u090F;"
"wai=\u0910;"
"wo=\u0913;"
"wau=\u0914;"
"ka=\u0915;"
"kha=\u0916;"
"ga=\u0917;"
"gha=\u0918;"
"nga=\u0919;"
"ca=\u091A;"
"cha=\u091B;"
"ja=\u091C;"
"jha=\u091D;"
"nya=\u091E;"
"tta=\u091F;"
"ttha=\u0920;"
"dda=\u0921;"
"ddha=\u0922;"
"nna=\u0923;"
"ta=\u0924;"
"tha=\u0925;"
"da=\u0926;"
"dha=\u0927;"
"na=\u0928;"
"pa=\u092A;"
"pha=\u092B;"
"ba=\u092C;"
"bha=\u092D;"
"ma=\u092E;"
"ya=\u092F;"
"ra=\u0930;"
"rra=\u0931;"
"la=\u0933;"
"va=\u0935;"
"sha=\u0936;"
"ssa=\u0937;"
"sa=\u0938;"
"ha=\u0939;"
// <vowel> represents the dependent form
"aa=\u093E;"
"i=\u093F;"
"ii=\u0940;"
"u=\u0941;"
"uu=\u0942;"
"rh=\u0943;"
"lh=\u0944;"
"e=\u0947;"
"ai=\u0948;"
"o=\u094B;"
"au=\u094C;"
"virama=\u094D;"
"wrr=\u0960;"
"rrh=\u0962;"
"danda=\u0964;"
"doubleDanda=\u0965;"
"depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
"depVowelBelow=[\u0941-\u0944];"
// Ech: Double escape U+0000, so UnicodeString doesn't consider it
// to be the end of the string. This is only necessary for U+0000
// right now. [liu]
"endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF];"
"&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
"%=[bcdfghjklmnpqrstvwxyz];"
//#####################################################################
// convert from Latin letters to Native letters
//#####################################################################
//Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
// special forms with no good conversion
"mm>{bindu};"
"x>{visarga};"
// convert to independent forms at start of word or syllable:
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
// Moved up [LIU]
"aa>{waa};"
"ai>{wai};"
"au>{wau};"
"ii>{wii};"
"i>{wi};"
"uu>{wuu};"
"u>{wu};"
"rrh>{wrr};"
"rh>{wr};"
"lh>{wl};"
"e>{we};"
"o>{wo};"
"a>{wa};"
// normal consonants
"kh>{kha}|{virama};"
"k>{ka}|{virama};"
"q>{ka}|{virama};"
"gh>{gha}|{virama};"
"g>{ga}|{virama};"
"ng>{nga}|{virama};"
"ch>{cha}|{virama};"
"c>{ca}|{virama};"
"jh>{jha}|{virama};"
"j>{ja}|{virama};"
"ny>{nya}|{virama};"
"tth>{ttha}|{virama};"
"tt>{tta}|{virama};"
"ddh>{ddha}|{virama};"
"dd>{dda}|{virama};"
"nn>{nna}|{virama};"
"th>{tha}|{virama};"
"t>{ta}|{virama};"
"dh>{dha}|{virama};"
"d>{da}|{virama};"
"n>{na}|{virama};"
"ph>{pha}|{virama};"
"p>{pa}|{virama};"
"bh>{bha}|{virama};"
"b>{ba}|{virama};"
"m>{ma}|{virama};"
"y>{ya}|{virama};"
"r>{ra}|{virama};"
"l>{la}|{virama};"
"v>{va}|{virama};"
"f>{va}|{virama};"
"w>{va}|{virama};"
"sh>{sha}|{virama};"
"ss>{ssa}|{virama};"
"s>{sa}|{virama};"
"z>{sa}|{virama};"
"h>{ha}|{virama};"
".>{danda};"
"{danda}.>{doubleDanda};"
"{depVowelAbove})~>{bindu};"
"{depVowelBelow})~>{candrabindu};"
// convert to dependent forms after consonant with no vowel:
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
"{virama}aa>{aa};"
"{virama}ai>{ai};"
"{virama}au>{au};"
"{virama}ii>{ii};"
"{virama}i>{i};"
"{virama}uu>{uu};"
"{virama}u>{u};"
"{virama}rrh>{rrh};"
"{virama}rh>{rh};"
"{virama}lh>{lh};"
"{virama}e>{e};"
"{virama}o>{o};"
"{virama}a>;"
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
"{virama}''aa>{waa};"
"{virama}''ai>{wai};"
"{virama}''au>{wau};"
"{virama}''ii>{wii};"
"{virama}''i>{wi};"
"{virama}''uu>{wuu};"
"{virama}''u>{wu};"
"{virama}''rrh>{wrr};"
"{virama}''rh>{wr};"
"{virama}''lh>{wl};"
"{virama}''e>{we};"
"{virama}''o>{wo};"
"{virama}''a>{wa};"
"{virama}({endThing}>;"
// convert any left-over apostrophes used for separation
"''>;"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
// special forms with no good conversion
"mm<{bindu};"
"x<{visarga};"
// normal consonants
"kh<{kha}(&;"
"kha<{kha};"
"k''<{ka}{virama}({ha};"
"k<{ka}(&;"
"ka<{ka};"
"gh<{gha}(&;"
"gha<{gha};"
"g''<{ga}{virama}({ha};"
"g<{ga}(&;"
"ga<{ga};"
"ng<{nga}(&;"
"nga<{nga};"
"ch<{cha}(&;"
"cha<{cha};"
"c''<{ca}{virama}({ha};"
"c<{ca}(&;"
"ca<{ca};"
"jh<{jha}(&;"
"jha<{jha};"
"j''<{ja}{virama}({ha};"
"j<{ja}(&;"
"ja<{ja};"
"ny<{nya}(&;"
"nya<{nya};"
"tth<{ttha}(&;"
"ttha<{ttha};"
"tt''<{tta}{virama}({ha};"
"tt<{tta}(&;"
"tta<{tta};"
"ddh<{ddha}(&;"
"ddha<{ddha};"
"dd''<{dda}(&{ha};"
"dd<{dda}(&;"
"dda<{dda};"
"dh<{dha}(&;"
"dha<{dha};"
"d''<{da}{virama}({ha};"
"d''<{da}{virama}({ddha};"
"d''<{da}{virama}({dda};"
"d''<{da}{virama}({dha};"
"d''<{da}{virama}({da};"
"d<{da}(&;"
"da<{da};"
"th<{tha}(&;"
"tha<{tha};"
"t''<{ta}{virama}({ha};"
"t''<{ta}{virama}({ttha};"
"t''<{ta}{virama}({tta};"
"t''<{ta}{virama}({tha};"
"t''<{ta}{virama}({ta};"
"t<{ta}(&;"
"ta<{ta};"
"n''<{na}{virama}({ga};"
"n''<{na}{virama}({ya};"
"n<{na}(&;"
"na<{na};"
"ph<{pha}(&;"
"pha<{pha};"
"p''<{pa}{virama}({ha};"
"p<{pa}(&;"
"pa<{pa};"
"bh<{bha}(&;"
"bha<{bha};"
"b''<{ba}{virama}({ha};"
"b<{ba}(&;"
"ba<{ba};"
"m''<{ma}{virama}({ma};"
"m''<{ma}{virama}({bindu};"
"m<{ma}(&;"
"ma<{ma};"
"y<{ya}(&;"
"ya<{ya};"
"r''<{ra}{virama}({ha};"
"r<{ra}(&;"
"ra<{ra};"
"l''<{la}{virama}({ha};"
"l<{la}(&;"
"la<{la};"
"v<{va}(&;"
"va<{va};"
"sh<{sha}(&;"
"sha<{sha};"
"ss<{ssa}(&;"
"ssa<{ssa};"
"s''<{sa}{virama}({ha};"
"s''<{sa}{virama}({sha};"
"s''<{sa}{virama}({ssa};"
"s''<{sa}{virama}({sa};"
"s<{sa}(&;"
"sa<{sa};"
"h<{ha}(&;"
"ha<{ha};"
// dependent vowels (should never occur except following consonants)
"aa<{aa};"
"ai<{ai};"
"au<{au};"
"ii<{ii};"
"i<{i};"
"uu<{uu};"
"u<{u};"
"rrh<{rrh};"
"rh<{rh};"
"lh<{lh};"
"e<{e};"
"o<{o};"
// independent vowels (when following consonants)
"''aa<a){waa};"
"''aa<%){waa};"
"''ai<a){wai};"
"''ai<%){wai};"
"''au<a){wau};"
"''au<%){wau};"
"''ii<a){wii};"
"''ii<%){wii};"
"''i<a){wi};"
"''i<%){wi};"
"''uu<a){wuu};"
"''uu<%){wuu};"
"''u<a){wu};"
"''u<%){wu};"
"''rrh<%){wrr};"
"''rh<%){wr};"
"''lh<%){wl};"
"''e<%){we};"
"''o<%){wo};"
"''a<a){wa};"
"''a<%){wa};"
// independent vowels (otherwise)
"aa<{waa};"
"ai<{wai};"
"au<{wau};"
"ii<{wii};"
"i<{wi};"
"uu<{wuu};"
"u<{wu};"
"rrh<{wrr};"
"rh<{wr};"
"lh<{wl};"
"e<{we};"
"o<{wo};"
"a<{wa};"
// blow away any remaining viramas
"<{virama};"
}
}