ICU-1259 check in new 2.0 rules mechanically generated from icu4j masters

X-SVN-Rev: 6453
This commit is contained in:
Alan Liu 2001-10-26 05:41:16 +00:00
parent 4853938bd8
commit 3f29a7e290
62 changed files with 11138 additions and 156 deletions

View File

@ -103,28 +103,34 @@ uk.txt uk_UA.txt\
vi.txt vi_VN.txt\
zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
larabic.txt lcyril.txt\
lgreek.txt lhebrew.txt ljamo.txt\
lkana.txt quotes.txt\
Bengali_InterIndic.txt\
Devanagari_InterIndic.txt\
Gujarati_InterIndic.txt\
Gurmukhi_InterIndic.txt\
Kannada_InterIndic.txt\
Malayalam_InterIndic.txt\
Oriya_InterIndic.txt\
Tamil_InterIndic.txt\
Telugu_InterIndic.txt\
InterIndic_Bengali.txt\
InterIndic_Devanagari.txt\
InterIndic_Gujarati.txt\
InterIndic_Gurmukhi.txt\
InterIndic_Kannada.txt\
InterIndic_Malayalam.txt\
InterIndic_Oriya.txt\
InterIndic_Tamil.txt\
InterIndic_Telugu.txt\
Latin_InterIndic.txt\
InterIndic_Latin.txt
TRANSLIT_SOURCE=translit_Any_Accents.txt\
translit_Any_Publishing.txt\
translit_Bengali_InterIndic.txt\
translit_Cyrillic_Latin.txt\
translit_Devanagari_InterIndic.txt\
translit_Fullwidth_Halfwidth.txt\
translit_Greek_Latin.txt\
translit_Gujarati_InterIndic.txt\
translit_Gurmukhi_InterIndic.txt\
translit_Hiragana_Katakana.txt\
translit_Hiragana_Latin.txt\
translit_InterIndic_Bengali.txt\
translit_InterIndic_Devanagari.txt\
translit_InterIndic_Gujarati.txt\
translit_InterIndic_Gurmukhi.txt\
translit_InterIndic_Kannada.txt\
translit_InterIndic_Latin.txt\
translit_InterIndic_Malayalam.txt\
translit_InterIndic_Oriya.txt\
translit_InterIndic_Tamil.txt\
translit_InterIndic_Telugu.txt\
translit_Kannada_InterIndic.txt\
translit_Latin_InterIndic.txt\
translit_Latin_Jamo.txt\
translit_Latin_Katakana.txt\
translit_Malayalam_InterIndic.txt\
translit_Oriya_InterIndic.txt\
translit_Tamil_InterIndic.txt\
translit_Telugu_InterIndic.txt\
translit_index.txt

View File

@ -0,0 +1,311 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Any_Accents.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Any_Accents
translit_Any_Accents {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Any_Accents.txt,v $
// $Date: 2001/10/26 05:41:15 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
":: NFD (NFC) ;"
// to do: make reversible
// define special conversion characters.
// varients of this could use different characters, or set one or the other to null.
"$pre = \< ;"
"$post = \> ;"
// Provide keyboard equivalents for common diacritics used in transliteration
"$pre \` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
"$pre \' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
"$pre \^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
"$pre \~ $post <> \u0303 ;" // COMBINING TILDE
"$pre \- $post <> \u0304 ;" // COMBINING MACRON
"$pre \" $post <> \u0308 ;" // COMBINING DIAERESIS
"$pre \* $post <> \u030A ;" // COMBINING RING ABOVE
"$pre \, $post <> \u0327 ;" // COMBINING CEDILLA
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
"$pre \. $post <> \u0323 ;" // COMBINING DOT BELOW
// Combine common characters
"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE
"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE
"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH
"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH
"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE
"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE
"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN
"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN
"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE
"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE
"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S
"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG
"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG
"$pre T $post <> \u0398 ;" // THETA
"$pre t $post <> \u03B8 ;" // THETA
"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH
"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH
"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH
"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH
"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON
"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON
"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA
"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA
"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O
"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O
"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E
"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E
// three that don't have uppercases
"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP
"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I
"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V
// Additional Characters that may be added in the future
// $pre XXX $post <> \u0306 ; # COMBINING BREVE
// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
// $pre XXX $post <> \u030C ; # COMBINING CARON
// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
// $pre XXX $post <> \u031B ; # COMBINING HORN
// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
// $pre XXX $post <> \u0328 ; # COMBINING OGONEK
// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
":: NFC (NFD) ;"
}
}

View File

@ -0,0 +1,55 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Any_Publishing.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Any_Publishing
translit_Any_Publishing {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Any_Publishing.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
// Test case
// "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
// Variables
"$single = \' ;"
"$space = ' ' ;"
"$double = \" ;"
"$back = \` ;"
"$tab = '\u0008' ;"
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
// fix UNIX quotes
"$back $back > “ ;"
"$back > ;"
// fix typewriter quotes, by context
"$makeRight {$double} <> “ ;"
"$double <> ” ;"
"$makeRight {$single} <> ;"
"$single <> ;"
// fix multiple spaces and hyphens
"$space {$space} > ;"
"'--' <> — ;"
}
}

View File

@ -0,0 +1,121 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Bengali_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Bengali_InterIndic
translit_Bengali_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Bengali_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:57 2001
//--------------------------------------------------------------------
// Bengali-InterIndic
//:: NFD (NFC) ;
"\u0981>\ue001;" // SIGN CANDRABINDU
"\u0982>\ue002;" // SIGN ANUSVARA
"\u0983>\ue003;" // SIGN VISARGA
"\u0985>\ue005;" // LETTER A
"\u0986>\ue006;" // LETTER AA
"\u0987>\ue007;" // LETTER I
"\u0988>\ue008;" // LETTER II
"\u0989>\ue009;" // LETTER U
"\u098a>\ue00a;" // LETTER UU
"\u098b>\ue00b;" // LETTER VOCALIC R
"\u098c>\ue00c;" // LETTER VOCALIC L
"\u098f>\ue00f;" // LETTER E
"\u0990>\ue010;" // LETTER AI
"\u0993>\ue013;" // LETTER O
"\u0994>\ue014;" // LETTER AU
"\u0995>\ue015;" // LETTER KA
"\u0996>\ue016;" // LETTER KHA
"\u0997>\ue017;" // LETTER GA
"\u0998>\ue018;" // LETTER GHA
"\u0999>\ue019;" // LETTER NGA
"\u099a>\ue01a;" // LETTER CA
"\u099b>\ue01b;" // LETTER CHA
"\u099c>\ue01c;" // LETTER JA
"\u099d>\ue01d;" // LETTER JHA
"\u099e>\ue01e;" // LETTER NYA
"\u099f>\ue01f;" // LETTER TTA
"\u09a0>\ue020;" // LETTER TTHA
"\u09a1>\ue021;" // LETTER DDA
"\u09a2>\ue022;" // LETTER DDHA
"\u09a3>\ue023;" // LETTER NNA
"\u09a4>\ue024;" // LETTER TA
"\u09a5>\ue025;" // LETTER THA
"\u09a6>\ue026;" // LETTER DA
"\u09a7>\ue027;" // LETTER DHA
"\u09a8>\ue028;" // LETTER NA
"\u09aa>\ue02a;" // LETTER PA
"\u09ab>\ue02b;" // LETTER PHA
"\u09ac>\ue02c;" // LETTER BA
"\u09ad>\ue02d;" // LETTER BHA
"\u09ae>\ue02e;" // LETTER MA
"\u09af>\ue02f;" // LETTER YA
"\u09b0>\ue030;" // LETTER RA
"\u09b2>\ue032;" // LETTER LA
"\u09b6>\ue036;" // LETTER SHA
"\u09b7>\ue037;" // LETTER SSA
"\u09b8>\ue038;" // LETTER SA
"\u09b9>\ue039;" // LETTER HA
"\u09bc>\ue03c;" // SIGN NUKTA
"\u09be>\ue03e;" // VOWEL SIGN AA
"\u09bf>\ue03f;" // VOWEL SIGN I
"\u09c0>\ue040;" // VOWEL SIGN II
"\u09c1>\ue041;" // VOWEL SIGN U
"\u09c2>\ue042;" // VOWEL SIGN UU
"\u09c3>\ue043;" // VOWEL SIGN VOCALIC R
"\u09c4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u09c7>\ue047;" // VOWEL SIGN E
"\u09c8>\ue048;" // VOWEL SIGN AI
"\u09cb>\ue04b;" // VOWEL SIGN O
"\u09cc>\ue04c;" // VOWEL SIGN AU
"\u09cd>\ue04d;" // SIGN VIRAMA
"\u09d7>\ue057;" // AU LENGTH MARK
"\u09dc>\ue053;" // LETTER RRA
"\u09dd>\ue05d;" // LETTER RHA
"\u09df>\ue05f;" // LETTER YYA
"\u09e0>\ue060;" // LETTER VOCALIC RR
"\u09e1>\ue061;" // LETTER VOCALIC LL
"\u09e2>\ue062;" // VOWEL SIGN VOCALIC L
"\u09e3>\ue063;" // VOWEL SIGN VOCALIC LL
"\u09e6>\ue066;" // DIGIT ZERO
"\u09e7>\ue067;" // DIGIT ONE
"\u09e8>\ue068;" // DIGIT TWO
"\u09e9>\ue069;" // DIGIT THREE
"\u09ea>\ue06a;" // DIGIT FOUR
"\u09eb>\ue06b;" // DIGIT FIVE
"\u09ec>\ue06c;" // DIGIT SIX
"\u09ed>\ue06d;" // DIGIT SEVEN
"\u09ee>\ue06e;" // DIGIT EIGHT
"\u09ef>\ue06f;" // DIGIT NINE
// \u09f0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
// \u09f1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
// \u09f2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK
// \u09f3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN
// \u09f4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
// \u09f5>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR TWO
// \u09f6>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR THREE
// \u09f7>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
// \u09f8>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
// \u09f9>; # UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
"\u09fa>\ue070;" // ISSHAR
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,316 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Cyrillic_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Cyrillic_Latin
translit_Cyrillic_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Cyrillic_Latin.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
// TODO: add remaining characters
// Should add variants for Russian-English, Russian-German
// Those can use this as a base, and then remap cases
// like a $hat to ya or ja.
":: NFD (NFC) ;"
"$modprime = \u02B9;"
"$modprime2 = \u02BA;"
"$grave = \u0300;"
"$acute = \u0301;"
"$hat = \u0302;"
"$breve = \u0306 ;"
"$dot = \u0307 ;"
"$caron = \u030C ;"
"$comma = \u0326 ;"
// move up so not masked
"я <> a $hat ;" // CYRILLIC SMALL LETTER YA
"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA
"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE
"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE
// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
"э <> e $acute;" // CYRILLIC SMALL LETTER E
"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E
"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE
"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE
"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA
"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA
"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA
"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA
"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE
"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE
// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU
"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU
"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
"ј <> j $caron;" // CYRILLIC SMALL LETTER JE
"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE
"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE
"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE
"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE
"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE
"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE
"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE
"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE
"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE
// Normal order
"а <> a ;" // CYRILLIC SMALL LETTER A
"А <> A ;" // CYRILLIC CAPITAL LETTER A
"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA
"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA
"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE
"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE
"б <> b ;" // CYRILLIC SMALL LETTER BE
"Б <> B ;" // CYRILLIC CAPITAL LETTER BE
"в <> v ;" // CYRILLIC SMALL LETTER VE
"В <> V ;" // CYRILLIC CAPITAL LETTER VE
"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN
"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE
"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE
"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
"г <> g ;" // CYRILLIC SMALL LETTER GHE
"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE
"д <> d;" // CYRILLIC SMALL LETTER DE
"Д <> D;" // CYRILLIC CAPITAL LETTER DE
"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE
"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE
"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER
"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
"е <> e ;" // CYRILLIC SMALL LETTER IE
"Е <> E;" // CYRILLIC CAPITAL LETTER IE
"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE
"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE
// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
"з <> z ;" // CYRILLIC SMALL LETTER ZE
"З <> Z;" // CYRILLIC CAPITAL LETTER ZE
"й <> j ;" // CYRILLIC SMALL LETTER I
"Й <> J ;" // CYRILLIC CAPITAL LETTER I
"и <> i ;" // CYRILLIC SMALL LETTER I
"И <> I ;" // CYRILLIC CAPITAL LETTER I
"к <> k ;" // CYRILLIC SMALL LETTER KA
"К <> K;" // CYRILLIC CAPITAL LETTER KA
// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
"л <> l ;" // CYRILLIC SMALL LETTER EL
"Л <> L;" // CYRILLIC CAPITAL LETTER EL
"м <> m ;" // CYRILLIC SMALL LETTER EM
"М <> M ;" // CYRILLIC CAPITAL LETTER EM
"н <> n ;" // CYRILLIC SMALL LETTER EN
"Н <> N;" // CYRILLIC CAPITAL LETTER EN
// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
"о <> o ;" // CYRILLIC SMALL LETTER O
"О <> O ;" // CYRILLIC CAPITAL LETTER O
// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
"п <> p ;" // CYRILLIC SMALL LETTER PE
"П <> P ;" // CYRILLIC CAPITAL LETTER PE
// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
"р <> r ;" // CYRILLIC SMALL LETTER ER
"Р <> R ;" // CYRILLIC CAPITAL LETTER ER
// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
"с <> s ;" // CYRILLIC SMALL LETTER ES
"С <> S ;" // CYRILLIC CAPITAL LETTER ES
// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
"т <> t ;" // CYRILLIC SMALL LETTER TE
"Т <> T ;" // CYRILLIC CAPITAL LETTER TE
// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
"у <> u ;" // CYRILLIC SMALL LETTER U
"У <> U ;" // CYRILLIC CAPITAL LETTER U
// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
"ф <> f ;" // CYRILLIC SMALL LETTER EF
"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF
"х <> h ;" // CYRILLIC SMALL LETTER HA
"Х <> H;" // CYRILLIC CAPITAL LETTER HA
// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
"ц <> c ;" // CYRILLIC SMALL LETTER TSE
"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE
// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
"ъ <> [:Ll:] { $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN
"Ъ <> $modprime2 ;" // CYRILLIC CAPITAL LETTER HARD SIGN
"ы <> y ;" // CYRILLIC SMALL LETTER YERU
"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU
"ь <> [:Ll:] { $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN
"Ь <> $modprime ;" // CYRILLIC CAPITAL LETTER SOFT SIGN
// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A
//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A
//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ё <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O
//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA
//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ў <> XXX ; # CYRILLIC SMALL LETTER U
//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E
//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
// Completeness
"$ignore = [[:Mark:]''] * ;"
"| k < q ;"
"| K < Q ;"
"| u < w ;"
"| U < W ;"
"| KS < X } $ignore [:UppercaseLetter:] ;"
"| KS < [:UppercaseLetter:] $ignore { X ;"
"| Ks < X ;"
"| ks < x ;"
":: NFC (NFD) ;"
}
}

View File

@ -0,0 +1,147 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Devanagari_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Devanagari_InterIndic
translit_Devanagari_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Devanagari_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:57 2001
//--------------------------------------------------------------------
// Devanagari-InterIndic
// :: NFD;
//Rules for Decomposed characters
"\u0928\u093c > \ue029;" //\u0929
"\u0930\u093c > \ue031;" //\u0932
"\u0933\u093c > \ue034;" //\u0934
"\u0915\u093c > \ue058;" //\u0958 LETTER QA (For Urdu)
"\u0916\u093c > \ue059;" //\u0959 LETTER KHHA (For Urdu)
"\u0917\u093c > \ue05a;" //\u095a LETTER GHHA (For Urdu)
"\u091c\u093c > \ue05b;" //\u095b LETTER ZA (For Urdu)
"\u0921\u093c > \ue05c;" //\u095c LETTER DDDHA (pronounced RRA)
"\u0922\u093c > \ue05d;" //\u095d LETTER RHA (pronounced RRHA)
"\u092b\u093c > \ue05e;" //\u095e LETTER FA
"\u092f\u093c > \ue05f;" //\u095f LETTER YYA
"\u0901>\ue001;" // SIGN CANDRABINDU
"\u0902>\ue002;" // SIGN ANUSVARA
"\u0903>\ue003;" // SIGN VISARGA
"\u0905>\ue005;" // LETTER A
"\u0906>\ue006;" // LETTER AA
"\u0907>\ue007;" // LETTER I
"\u0908>\ue008;" // LETTER II
"\u0909>\ue009;" // LETTER U
"\u090a>\ue00a;" // LETTER UU
"\u090b>\ue00b;" // LETTER VOCALIC R
"\u090c>\ue00c;" // LETTER VOCALIC L
"\u090d>\ue00d;" // LETTER CANDRA E (For representing English sounds)
//\u090e>\ue00e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
"\u090f>\ue00f;" // LETTER E
"\u0910>\ue010;" // LETTER AI
"\u0911>\ue011;" // LETTER CANDRA O (For representing English sounds)
//\u0912>\ue012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
"\u0913>\ue013;" // LETTER O
"\u0914>\ue014;" // LETTER AU
"\u0915>\ue015;" // LETTER KA
"\u0916>\ue016;" // LETTER KHA
"\u0917>\ue017;" // LETTER GA
"\u0918>\ue018;" // LETTER GHA
"\u0919>\ue019;" // LETTER NGA
"\u091a>\ue01a;" // LETTER CA
"\u091b>\ue01b;" // LETTER CHA
"\u091c>\ue01c;" // LETTER JA
"\u091d>\ue01d;" // LETTER JHA
"\u091e>\ue01e;" // LETTER NYA
"\u091f>\ue01f;" // LETTER TTA
"\u0920>\ue020;" // LETTER TTHA
"\u0921>\ue021;" // LETTER DDA
"\u0922>\ue022;" // LETTER DDHA
"\u0923>\ue023;" // LETTER NNA
"\u0924>\ue024;" // LETTER TA
"\u0925>\ue025;" // LETTER THA
"\u0926>\ue026;" // LETTER DA
"\u0927>\ue027;" // LETTER DHA
"\u0928>\ue028;" // LETTER NA
"\u0929>\ue029;" // LETTER NNNA
"\u092a>\ue02a;" // LETTER PA
"\u092b>\ue02b;" // LETTER PHA
"\u092c>\ue02c;" // LETTER BA
"\u092d>\ue02d;" // LETTER BHA
"\u092e>\ue02e;" // LETTER MA
"\u092f>\ue02f;" // LETTER YA
"\u0930>\ue030;" // LETTER RA
//\u0931>\ue031; # UNMAPPED LETTER RRA (Eyelash RA for Southern scripts)
"\u0932>\ue032;" // LETTER LA
"\u0933>\ue033;" // LETTER LLA
//\u0934>\ue034; # UNMAPPED LETTER LLLA (LLLA for Southern scripts)
"\u0935>\ue035;" // LETTER VA
"\u0936>\ue036;" // LETTER SHA
"\u0937>\ue037;" // LETTER SSA
"\u0938>\ue038;" // LETTER SA
"\u0939>\ue039;" // LETTER HA
"\u093c>\ue03c;" // SIGN NUKTA
"\u093d>\ue03d;" // SIGN AVAGRAHA
"\u093e>\ue03e;" // VOWEL SIGN AA
"\u093f>\ue03f;" // VOWEL SIGN I
"\u0940>\ue040;" // VOWEL SIGN II
"\u0941>\ue041;" // VOWEL SIGN U
"\u0942>\ue042;" // VOWEL SIGN UU
"\u0943>\ue043;" // VOWEL SIGN VOCALIC R
"\u0944>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0945>\ue045;" // VOWEL SIGN CANDRA E
//\u0946>\ue046; # UNMAPPED VOWEL SIGN SHORT E
"\u0947>\ue047;" // VOWEL SIGN E
"\u0948>\ue048;" // VOWEL SIGN AI
"\u0949>\ue049;" // VOWEL SIGN CANDRA O
//\u094a>\ue04a; # UNMAPPED VOWEL SIGN SHORT O
"\u094b>\ue04b;" // VOWEL SIGN O
"\u094c>\ue04c;" // VOWEL SIGN AU
"\u094d>\ue04d;" // SIGN VIRAMA
"\u0950>\ue050;" // OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"\u0958>\ue058;" // LETTER QA (For Urdu)
"\u0959>\ue059;" // LETTER KHHA (For Urdu)
"\u095a>\ue05a;" // LETTER GHHA (For Urdu)
"\u095b>\ue05b;" // LETTER ZA (For Urdu)
"\u095c>\ue05c;" // LETTER DDDHA (pronounced RRA)
"\u095d>\ue05d;" // LETTER RHA (pronounced RRHA)
"\u095e>\ue05e;" // LETTER FA
"\u095f>\ue05f;" // LETTER YYA
"\u0960>\ue060;" // LETTER VOCALIC RR
"\u0961>\ue061;" // LETTER VOCALIC LL
"\u0962>\ue062;" // VOWEL SIGN VOCALIC L
"\u0963>\ue063;" // VOWEL SIGN VOCALIC LL
// \u0964>; # UNMAPPED Devanagari-InterIndic: DANDA
// \u0965>; # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
"\u0966>\ue066;" // DIGIT ZERO
"\u0967>\ue067;" // DIGIT ONE
"\u0968>\ue068;" // DIGIT TWO
"\u0969>\ue069;" // DIGIT THREE
"\u096a>\ue06a;" // DIGIT FOUR
"\u096b>\ue06b;" // DIGIT FIVE
"\u096c>\ue06c;" // DIGIT SIX
"\u096d>\ue06d;" // DIGIT SEVEN
"\u096e>\ue06e;" // DIGIT EIGHT
"\u096f>\ue06f;" // DIGIT NINE
// \u0970>; # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
// :: NFC (NFD) ;
}
}

View File

@ -0,0 +1,287 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Fullwidth_Halfwidth
translit_Fullwidth_Halfwidth {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:57 2001
//--------------------------------------------------------------------
// Fullwidth-Halfwidth
// Mechanically generated from Unicode Character Database
// multicharacter
"ガ<>ガ;" // to KATAKANA LETTER GA
"ギ<>ギ;" // to KATAKANA LETTER GI
"グ<>グ;" // to KATAKANA LETTER GU
"ゲ<>ゲ;" // to KATAKANA LETTER GE
"ゴ<>ゴ;" // to KATAKANA LETTER GO
"ザ<>ザ;" // to KATAKANA LETTER ZA
"ジ<>ジ;" // to KATAKANA LETTER ZI
"ズ<>ズ;" // to KATAKANA LETTER ZU
"ゼ<>ゼ;" // to KATAKANA LETTER ZE
"ゾ<>ゾ;" // to KATAKANA LETTER ZO
"ダ<>ダ;" // to KATAKANA LETTER DA
"ヂ<>ヂ;" // to KATAKANA LETTER DI
"ヅ<>ヅ;" // to KATAKANA LETTER DU
"デ<>デ;" // to KATAKANA LETTER DE
"ド<>ド;" // to KATAKANA LETTER DO
"バ<>バ;" // to KATAKANA LETTER BA
"パ<>パ;" // to KATAKANA LETTER PA
"ビ<>ビ;" // to KATAKANA LETTER BI
"ピ<>ピ;" // to KATAKANA LETTER PI
"ブ<>ブ;" // to KATAKANA LETTER BU
"プ<>プ;" // to KATAKANA LETTER PU
"ベ<>ベ;" // to KATAKANA LETTER BE
"ペ<>ペ;" // to KATAKANA LETTER PE
"ボ<>ボ;" // to KATAKANA LETTER BO
"ポ<>ポ;" // to KATAKANA LETTER PO
"ヴ<>ヴ;" // to KATAKANA LETTER VU
"ヷ<>ヷ;" // to KATAKANA LETTER VA
"ヺ<>ヺ;" // to KATAKANA LETTER VO
// single character
"<>'!';" // from FULLWIDTH EXCLAMATION MARK
"<>'\"';" // from FULLWIDTH QUOTATION MARK
"<>'#';" // from FULLWIDTH NUMBER SIGN
"<>'$';" // from FULLWIDTH DOLLAR SIGN
"<>'%';" // from FULLWIDTH PERCENT SIGN
"<>'&';" // from FULLWIDTH AMPERSAND
"<>'';" // from FULLWIDTH APOSTROPHE
"<>'(';" // from FULLWIDTH LEFT PARENTHESIS
"<>')';" // from FULLWIDTH RIGHT PARENTHESIS
"<>'*';" // from FULLWIDTH ASTERISK
"<>'+';" // from FULLWIDTH PLUS SIGN
"<>',';" // from FULLWIDTH COMMA
"<>'-';" // from FULLWIDTH HYPHEN-MINUS
"<>'.';" // from FULLWIDTH FULL STOP
"<>'/';" // from FULLWIDTH SOLIDUS
"<>'0';" // from FULLWIDTH DIGIT ZERO
"<>'1';" // from FULLWIDTH DIGIT ONE
"<>'2';" // from FULLWIDTH DIGIT TWO
"<>'3';" // from FULLWIDTH DIGIT THREE
"<>'4';" // from FULLWIDTH DIGIT FOUR
"<>'5';" // from FULLWIDTH DIGIT FIVE
"<>'6';" // from FULLWIDTH DIGIT SIX
"<>'7';" // from FULLWIDTH DIGIT SEVEN
"<>'8';" // from FULLWIDTH DIGIT EIGHT
"<>'9';" // from FULLWIDTH DIGIT NINE
"<>':';" // from FULLWIDTH COLON
"<>';';" // from FULLWIDTH SEMICOLON
"<>'<';" // from FULLWIDTH LESS-THAN SIGN
"<>'=';" // from FULLWIDTH EQUALS SIGN
"<>'>';" // from FULLWIDTH GREATER-THAN SIGN
"<>'?';" // from FULLWIDTH QUESTION MARK
"<>'@';" // from FULLWIDTH COMMERCIAL AT
"<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
"<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
"<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
"<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
"<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
"<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
"<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
"<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
"<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
"<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
"<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
"<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
"<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
"<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
"<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
"<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
"<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
"<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
"<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
"<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
"<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
"<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
"<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
"<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
"<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
"<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
"<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
"<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
"<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
"<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
"_<>'_';" // from FULLWIDTH LOW LINE
"<>'`';" // from FULLWIDTH GRAVE ACCENT
"<>a;" // from FULLWIDTH LATIN SMALL LETTER A
"<>b;" // from FULLWIDTH LATIN SMALL LETTER B
"<>c;" // from FULLWIDTH LATIN SMALL LETTER C
"<>d;" // from FULLWIDTH LATIN SMALL LETTER D
"<>e;" // from FULLWIDTH LATIN SMALL LETTER E
"<>f;" // from FULLWIDTH LATIN SMALL LETTER F
"<>g;" // from FULLWIDTH LATIN SMALL LETTER G
"<>h;" // from FULLWIDTH LATIN SMALL LETTER H
"<>i;" // from FULLWIDTH LATIN SMALL LETTER I
"<>j;" // from FULLWIDTH LATIN SMALL LETTER J
"<>k;" // from FULLWIDTH LATIN SMALL LETTER K
"<>l;" // from FULLWIDTH LATIN SMALL LETTER L
"<>m;" // from FULLWIDTH LATIN SMALL LETTER M
"<>n;" // from FULLWIDTH LATIN SMALL LETTER N
"<>o;" // from FULLWIDTH LATIN SMALL LETTER O
"<>p;" // from FULLWIDTH LATIN SMALL LETTER P
"<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
"<>r;" // from FULLWIDTH LATIN SMALL LETTER R
"<>s;" // from FULLWIDTH LATIN SMALL LETTER S
"<>t;" // from FULLWIDTH LATIN SMALL LETTER T
"<>u;" // from FULLWIDTH LATIN SMALL LETTER U
"<>v;" // from FULLWIDTH LATIN SMALL LETTER V
"<>w;" // from FULLWIDTH LATIN SMALL LETTER W
"<>x;" // from FULLWIDTH LATIN SMALL LETTER X
"<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
"<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
"<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
"<>'|';" // from FULLWIDTH VERTICAL LINE
"<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
"<>'~';" // from FULLWIDTH TILDE
"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET
"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET
"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA
"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT
"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO
"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A
"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I
"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U
"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E
"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O
"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA
"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU
"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO
"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU
"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A
"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I
"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U
"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E
"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O
"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA
"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI
"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU
"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE
"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO
"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA
"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI
"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU
"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE
"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO
"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA
"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI
"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU
"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE
"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO
"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA
"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI
"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU
"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE
"<>ノ;" // to HALFWIDTH KATAKANA LETTER NO
"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA
"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI
"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU
"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE
"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO
"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA
"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI
"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU
"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME
"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO
"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA
"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU
"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO
"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA
"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI
"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU
"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE
"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO
"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA
"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N
"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
"<>;" // to HALFWIDTH HANGUL FILLER
"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK
"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN
"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT
"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL
"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM
"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP
"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS
"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG
"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC
"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH
"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH
"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH
"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH
"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH
"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A
"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE
"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA
"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE
"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO
"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E
"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO
"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE
"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O
"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA
"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE
"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE
"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO
"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U
"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO
"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE
"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI
"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU
"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU
"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI
"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I
"¢<>'¢';" // from FULLWIDTH CENT SIGN
"£<>'£';" // from FULLWIDTH POUND SIGN
"¬<>'¬';" // from FULLWIDTH NOT SIGN
" ̄<>' '̄;" // from FULLWIDTH MACRON
"' '<>' ';" // ideographic space (place this after MACRON)
"¦<>'¦';" // from FULLWIDTH BROKEN BAR
"¥<>'¥';" // from FULLWIDTH YEN SIGN
"₩<>₩;" // from FULLWIDTH WON SIGN
"│<>;" // to HALFWIDTH FORMS LIGHT VERTICAL
"←<>←;" // to HALFWIDTH LEFTWARDS ARROW
"↑<>↑;" // to HALFWIDTH UPWARDS ARROW
"→<>→;" // to HALFWIDTH RIGHTWARDS ARROW
"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW
"■<>■;" // to HALFWIDTH BLACK SQUARE
"○<>○;" // to HALFWIDTH WHITE CIRCLE
// eof
}
}

View File

@ -0,0 +1,283 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Greek_Latin
translit_Greek_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Greek_Latin.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
// Rules are predicated on running NFD first, and NFC afterwards
"::NFD (NFC) ;"
// TEST CASES
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
// ᾳ ῃ ῳ ὃ ὄ
// ὠς ὡς ὢς ὣς
// Ὠς Ὡς Ὢς Ὣς
// ὨΣ ὩΣ ὪΣ ὫΣ
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
// Useful variables
"$lower = [:Ll:] ;"
"$upper = [:Lu:] ;"
"$accent = [:M:] ;"
"$macron = \u0304 ;"
"$ddot = \u0308 ;"
"$lcgvowel = [αεηιουω] ;"
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
"$gvowel = [$lcgvowel $ucgvowel] ;"
"$lcgvowelC = [$lcgvowel $accent] ;"
"$vowel = [ AEIOUaeiou $gvowel] ;"
"$beforeLower = $accent * $lower ;"
"$gammaLike = [ΓΚΞΧγκξχ] ;"
"$smooth = ̓ ;"
"$rough = ̔ ;"
"$iotasub = ͅ ;"
// Fix punctuation
"\; <> \? ;"
"· <> \: ;"
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
"\u0342 <> \u0302 ;"
// IOTA: convert iota subscript to iota
// first make previous alpha long!
"Α } $accent * $iotasub > A $macron ;"
"α } $accent * $iotasub > a $macron ;"
// now convert to uppercase if after uppercase, ow to lowercase
"$upper $accent * { $iotasub > I ;"
"$iotasub > i ;"
"| $1 $iotasub < ([:L:] $macron [:M:]*) i ;"
// BREATHING
// Convert rough breathing to h, and move before letters.
// Make A ` x = > H a x
"Α $rough } $beforeLower > H | α ;"
"Ε $rough } $beforeLower > H | ε;"
"Η $rough } $beforeLower > H | η ;"
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
"Ο $rough } $beforeLower > H | ο ;"
"Υ $rough } $beforeLower > H | υ ;"
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
// Make A x ` = > H a x
"Α ($lower) $rough > H | α $1 ;"
"Ε ($lower) $rough > H | ε $1 ;"
"Η ($lower) $rough > H | η $1 ;"
"Ι ($lower $ddot?) $rough > H | ι $1 ;"
"Ο ($lower) $rough > H | ο $1 ;"
"Υ ($lower) $rough > H | υ $1 ;"
"Ω ($lower $ddot?) $rough > H | ω $1 ;"
//Otherwise, make x ` into h x and X ` into H X
"($lcgvowel + $ddot? ) $rough > h | $1 ;"
"($gvowel + $ddot? ) $rough > H | $1 ;"
// Go backwards with H
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot?) ;"
"| $1 $rough < H ([AEIOUY] $macron? $ddot?[aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
// titlecase, have to fix individually
"| $1 $rough < H (a $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (e $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (i $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (o $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (u $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (y $macron? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (a $macron? $ddot? ) ;"
"| $1 $rough < H (e $macron? $ddot? ) ;"
"| $1 $rough < H (i $macron? $ddot? ) ;"
"| $1 $rough < H (o $macron? $ddot? ) ;"
"| $1 $rough < H (u $macron? $ddot? ) ;"
"| $1 $rough < H (y $macron? $ddot? ) ;"
// Now do smooth
//delete smooth breathing for Latin
"$smooth > ;"
// insert in Greek
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron? [aeiouyAEIOUY] $macron?) } [^[$smooth]] ;"
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] ;"
// TODO: preserve smooth/rough breathing if not
// on initial vowel sequence
// need to have these up here so the rules don't mask
"η <> e $macron ;"
"Η <> E $macron ;"
"φ <> ph ;"
"Ψ } $beforeLower <> Ps ;"
"Ψ <> PS ;"
"Φ } $beforeLower <> Ph ;"
"Φ <> PH ;"
"ψ <> ps ;"
"ω <> o $macron ;"
"Ω <> O $macron;"
// NORMAL
"α <> a ;"
"Α <> A ;"
"β <> b ;"
"Β <> B ;"
"γ } $gammaLike <> n } [gkc] ;"
"γ <> g ;"
"Γ } $gammaLike <> N } [gkc] ;"
"Γ <> G ;"
"δ <> d ;"
"Δ <> D ;"
"ε <> e ;"
"Ε <> E ;"
"ζ <> z ;"
"Ζ <> Z ;"
"θ <> th ;"
"Θ } $beforeLower <> Th ;"
"Θ <> TH ;"
"ι <> i ;"
"Ι <> I ;"
"κ <> k ;"
"Κ <> K ;"
"λ <> l ;"
"Λ <> L ;"
"μ <> m ;"
"Μ <> M ;"
"ν } $gammaLike > n\' ;"
"ν <> n ;"
"Ν } $gammaLike <> N\' ;"
"Ν <> N ;"
"ξ <> x ;"
"Ξ <> X ;"
"ο <> o ;"
"Ο <> O ;"
"π <> p ;"
"Π <> P ;"
"ρ $rough <> rh;"
"Ρ $rough } $beforeLower <> Rh ;"
"Ρ $rough <> RH ;"
"ρ <> r ;"
"Ρ <> R ;"
"[Pp] {ς > \'s ;"
"[Pp] {σ > \'s ;"
"σ < [:^L:] [:M:]* { s } [:^L:] ;"
"ς <> s } [:^L:] ;"
"σ <> s ;"
"[Pp] { Σ <> \'S ;"
"Σ <> S ;"
"τ <> t ;"
"Τ <> T ;"
"$vowel {υ } <> u ;"
"υ <> y ;"
"$vowel { Υ <> U ;"
"Υ <> Y ;"
"χ <> ch ;"
"Χ } $beforeLower <> Ch ;"
"Χ <> CH ;"
// Completeness for ASCII
"$ignore = [[:Mark:]''] * ;"
"| k < c ;"
"| ph < f ;"
"| i < j ;"
"| k < q ;"
"| u < v ;"
"| u < w ;"
"| K < C ;"
"| PH < F } $ignore [:UppercaseLetter:] ;"
"| PH < [:UppercaseLetter:] $ignore { F ;"
"| PH < F ;"
"| I < J ;"
"| K < Q ;"
"| U < V ;"
"| U < W ;"
"$rough } $ignore [:UppercaseLetter:] > H ;"
"$ignore [:UppercaseLetter:] { $rough > H ;"
"$rough < H ;"
"$rough <> h ;"
// Completeness for Greek
"ϐ > | β ;"
"ϑ > | θ ;"
"ϒ > | Υ ;"
"ϕ > | φ ;"
"ϖ > | π ;"
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"
"ͺ > i;"
"::NFC (NFD) ;"
}
}

View File

@ -0,0 +1,115 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Gujarati_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Gujarati_InterIndic
translit_Gujarati_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gujarati_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:54 2001
//--------------------------------------------------------------------
// Gujarati_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:58 2001
//--------------------------------------------------------------------
// Gujarati-InterIndic
//:: NFD (NFC) ;
"\u0a81>\ue001;" // SIGN CANDRABINDU
"\u0a82>\ue002;" // SIGN ANUSVARA
"\u0a83>\ue003;" // SIGN VISARGA
"\u0a85>\ue005;" // LETTER A
"\u0a86>\ue006;" // LETTER AA
"\u0a87>\ue007;" // LETTER I
"\u0a88>\ue008;" // LETTER II
"\u0a89>\ue009;" // LETTER U
"\u0a8a>\ue00a;" // LETTER UU
"\u0a8b>\ue00b;" // LETTER VOCALIC R
// \u0a8d>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E
"\u0a8f>\ue00f;" // LETTER E
"\u0a90>\ue010;" // LETTER AI
// \u0a91>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O
"\u0a93>\ue013;" // LETTER O
"\u0a94>\ue014;" // LETTER AU
"\u0a95>\ue015;" // LETTER KA
"\u0a96>\ue016;" // LETTER KHA
"\u0a97>\ue017;" // LETTER GA
"\u0a98>\ue018;" // LETTER GHA
"\u0a99>\ue019;" // LETTER NGA
"\u0a9a>\ue01a;" // LETTER CA
"\u0a9b>\ue01b;" // LETTER CHA
"\u0a9c>\ue01c;" // LETTER JA
"\u0a9d>\ue01d;" // LETTER JHA
"\u0a9e>\ue01e;" // LETTER NYA
"\u0a9f>\ue01f;" // LETTER TTA
"\u0aa0>\ue020;" // LETTER TTHA
"\u0aa1>\ue021;" // LETTER DDA
"\u0aa2>\ue022;" // LETTER DDHA
"\u0aa3>\ue023;" // LETTER NNA
"\u0aa4>\ue024;" // LETTER TA
"\u0aa5>\ue025;" // LETTER THA
"\u0aa6>\ue026;" // LETTER DA
"\u0aa7>\ue027;" // LETTER DHA
"\u0aa8>\ue028;" // LETTER NA
"\u0aaa>\ue02a;" // LETTER PA
"\u0aab>\ue02b;" // LETTER PHA
"\u0aac>\ue02c;" // LETTER BA
"\u0aad>\ue02d;" // LETTER BHA
"\u0aae>\ue02e;" // LETTER MA
"\u0aaf>\ue02f;" // LETTER YA
"\u0ab0>\ue030;" // LETTER RA
"\u0ab2>\ue032;" // LETTER LA
"\u0ab3>\ue033;" // LETTER LLA
"\u0ab5>\ue035;" // LETTER VA
"\u0ab6>\ue036;" // LETTER SHA
"\u0ab7>\ue037;" // LETTER SSA
"\u0ab8>\ue038;" // LETTER SA
"\u0ab9>\ue039;" // LETTER HA
"\u0abc>\ue03c;" // SIGN NUKTA
"\u0abd>\ue03d;" // SIGN AVAGRAHA
"\u0abe>\ue03e;" // VOWEL SIGN AA
"\u0abf>\ue03f;" // VOWEL SIGN I
"\u0ac0>\ue040;" // VOWEL SIGN II
"\u0ac1>\ue041;" // VOWEL SIGN U
"\u0ac2>\ue042;" // VOWEL SIGN UU
"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R
"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E
"\u0ac7>\ue047;" // VOWEL SIGN E
"\u0ac8>\ue048;" // VOWEL SIGN AI
"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O
"\u0acb>\ue04b;" // VOWEL SIGN O
"\u0acc>\ue04c;" // VOWEL SIGN AU
"\u0acd>\ue04d;" // SIGN VIRAMA
"\u0ad0>\ue050;" // OM
"\u0ae0>\ue060;" // LETTER VOCALIC RR
"\u0ae6>\ue066;" // DIGIT ZERO
"\u0ae7>\ue067;" // DIGIT ONE
"\u0ae8>\ue068;" // DIGIT TWO
"\u0ae9>\ue069;" // DIGIT THREE
"\u0aea>\ue06a;" // DIGIT FOUR
"\u0aeb>\ue06b;" // DIGIT FIVE
"\u0aec>\ue06c;" // DIGIT SIX
"\u0aed>\ue06d;" // DIGIT SEVEN
"\u0aee>\ue06e;" // DIGIT EIGHT
"\u0aef>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,112 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Gurmukhi_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Gurmukhi_InterIndic
translit_Gurmukhi_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gurmukhi_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:54 2001
//--------------------------------------------------------------------
// Gurmukhi_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:58 2001
//--------------------------------------------------------------------
// Gurmukhi-InterIndic
//:: NFD (NFC) ;
"\u0a02>\ue001;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
"\u0a05>\ue005;" // LETTER A
"\u0a06>\ue006;" // LETTER AA
"\u0a07>\ue007;" // LETTER I
"\u0a08>\ue008;" // LETTER II
"\u0a09>\ue009;" // LETTER U
"\u0a0a>\ue00a;" // LETTER UU
"\u0a0f>\ue00f;" // LETTER EE
"\u0a10>\ue010;" // LETTER AI
"\u0a13>\ue013;" // LETTER OO
"\u0a14>\ue014;" // LETTER AU
"\u0a15>\ue015;" // LETTER KA
"\u0a16>\ue016;" // LETTER KHA
"\u0a17>\ue017;" // LETTER GA
"\u0a18>\ue018;" // LETTER GHA
"\u0a19>\ue019;" // LETTER NGA
"\u0a1a>\ue01a;" // LETTER CA
"\u0a1b>\ue01b;" // LETTER CHA
"\u0a1c>\ue01c;" // LETTER JA
"\u0a1d>\ue01d;" // LETTER JHA
"\u0a1e>\ue01e;" // LETTER NYA
"\u0a1f>\ue01f;" // LETTER TTA
"\u0a20>\ue020;" // LETTER TTHA
"\u0a21>\ue021;" // LETTER DDA
"\u0a22>\ue022;" // LETTER DDHA
"\u0a23>\ue023;" // LETTER NNA
"\u0a24>\ue024;" // LETTER TA
"\u0a25>\ue025;" // LETTER THA
"\u0a26>\ue026;" // LETTER DA
"\u0a27>\ue027;" // LETTER DHA
"\u0a28>\ue028;" // LETTER NA
"\u0a2a>\ue02a;" // LETTER PA
"\u0a2b>\ue02b;" // LETTER PHA
"\u0a2c>\ue02c;" // LETTER BA
"\u0a2d>\ue02d;" // LETTER BHA
"\u0a2e>\ue02e;" // LETTER MA
"\u0a2f>\ue02f;" // LETTER YA
"\u0a30>\ue030;" // LETTER RA
"\u0a32>\ue032;" // LETTER LA
"\u0a33>\ue033;" // LETTER LLA
"\u0a35>\ue035;" // LETTER VA
"\u0a36>\ue036;" // LETTER SHA
"\u0a38>\ue038;" // LETTER SA
"\u0a39>\ue039;" // LETTER HA
"\u0a3c>\ue03c;" // SIGN NUKTA
"\u0a3e>\ue03e;" // VOWEL SIGN AA
"\u0a3f>\ue03f;" // VOWEL SIGN I
"\u0a40>\ue040;" // VOWEL SIGN II
"\u0a41>\ue041;" // VOWEL SIGN U
"\u0a42>\ue042;" // VOWEL SIGN UU
"\u0a47>\ue047;" // VOWEL SIGN EE
"\u0a48>\ue048;" // VOWEL SIGN AI
"\u0a4b>\ue04b;" // VOWEL SIGN OO
"\u0a4c>\ue04c;" // VOWEL SIGN AU
"\u0a4d>\ue04d;" // SIGN VIRAMA
"\u0a59>\ue059;" // LETTER KHHA
"\u0a5a>\ue05a;" // LETTER GHHA
"\u0a5b>\ue05b;" // LETTER ZA
"\u0a5c>\ue05c;" // LETTER RRA
"\u0a5e>\ue05e;" // LETTER FA
"\u0a66>\ue066;" // DIGIT ZERO
"\u0a67>\ue067;" // DIGIT ONE
"\u0a68>\ue068;" // DIGIT TWO
"\u0a69>\ue069;" // DIGIT THREE
"\u0a6a>\ue06a;" // DIGIT FOUR
"\u0a6b>\ue06b;" // DIGIT FIVE
"\u0a6c>\ue06c;" // DIGIT SIX
"\u0a6d>\ue06d;" // DIGIT SEVEN
"\u0a6e>\ue06e;" // DIGIT EIGHT
"\u0a6f>\ue06f;" // DIGIT NINE
// \u0a70>; # UNMAPPED Gurmukhi-InterIndic: TIPPI
// \u0a71>; # UNMAPPED Gurmukhi-InterIndic: ADDAK
// \u0a72>; # UNMAPPED Gurmukhi-InterIndic: IRI
// \u0a73>; # UNMAPPED Gurmukhi-InterIndic: URA
// \u0a74>; # UNMAPPED Gurmukhi-InterIndic: EK ONKAR
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,217 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Hiragana_Katakana.utf8.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Hiragana_Katakana
translit_Hiragana_Katakana {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 2001
//--------------------------------------------------------------------
// Hiragana-Katana
// This is largely a one-to-one mapping, but it has a
// few kinks:
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
// (308F-3092) with a voicing mark (3099), which is
// semantically equivalent. However, this is a non-
// roundtripping transformation.
// 2. The Katakana small ka/ke (30F5,30F6) have no
// Hiragana equiavlents. We convert them to normal
// Hiragana ka/ke (304B,3051). This is a one-way
// information-losing transformation and precludes
// round-tripping of 30F5 and 30F6.
// 3. The combining marks 3099-309C are in the Hiragana
// block, but they apply to Katakana as well, so we
// leave them untouched.
// 4. The Katakana prolonged sound mark 30FC doubles the
// preceding vowel. This is a one-way information-
// losing transformation from Katakana to Hiragana.
// 5. The Katakana middle dot separates words in foreign
// expressions; we leave this unmodified.
// The above points preclude successful round-trip
// transformations of arbitrary input text. However,
// they provide naturalistic results that should conform
// to user expectations.
// Combining equivalents va/vi/ve/vo
"わ゙ <> ヷ;"
"ゐ゙ <> ヸ;"
"ゑ゙ <> ヹ;"
"を゙ <> ヺ;"
// One-to-one mappings, main block
// 3041:3094 <> 30A1:30F4
// 309D,E <> 30FD,E
"ぁ <> ァ;"
"あ <> ア;"
"ぃ <> ィ;"
"い <> イ;"
"ぅ <> ゥ;"
"う <> ウ;"
"ぇ <> ェ;"
"え <> エ;"
"ぉ <> ォ;"
"お <> オ;"
"か <> カ;"
"が <> ガ;"
"き <> キ;"
"ぎ <> ギ;"
"く <> ク;"
"ぐ <> グ;"
"け <> ケ;"
"げ <> ゲ;"
"こ <> コ;"
"ご <> ゴ;"
"さ <> サ;"
"ざ <> ザ;"
"し <> シ;"
"じ <> ジ;"
"す <> ス;"
"ず <> ズ;"
"せ <> セ;"
"ぜ <> ゼ;"
"そ <> ソ;"
"ぞ <> ゾ;"
"た <> タ;"
"だ <> ダ;"
"ち <> チ;"
"ぢ <> ヂ;"
"っ <> ッ;"
"つ <> ツ;"
"づ <> ヅ;"
"て <> テ;"
"で <> デ;"
"と <> ト;"
"ど <> ド;"
"な <> ナ;"
"に <> ニ;"
"ぬ <> ヌ;"
"ね <> ネ;"
"の <> ;"
"は <> ハ;"
"ば <> バ;"
"ぱ <> パ;"
"ひ <> ヒ;"
"び <> ビ;"
"ぴ <> ピ;"
"ふ <> フ;"
"ぶ <> ブ;"
"ぷ <> プ;"
"へ <> ヘ;"
"べ <> ベ;"
"ぺ <> ペ;"
"ほ <> ホ;"
"ぼ <> ボ;"
"ぽ <> ポ;"
"ま <> マ;"
"み <> ミ;"
"む <> ム;"
"め <> メ;"
"も <> モ;"
"ゃ <> ャ;"
"や <> ヤ;"
"ゅ <> ュ;"
"ゆ <> ユ;"
"ょ <> ョ;"
"よ <> ヨ;"
"ら <> ラ;"
"り <> リ;"
"る <> ル;"
"れ <> レ;"
"ろ <> ロ;"
"ゎ <> ヮ;"
"わ <> ワ;"
"ゐ <> ヰ;"
"ゑ <> ヱ;"
"を <> ヲ;"
"ん <> ン;"
"ゔ <> ヴ;"
"ゝ <> ヽ;"
"ゞ <> ヾ;"
// One-way Katakana-Hiragana xform of small K ka/ke to
// normal H ka/ke.
"か < ヵ;"
"け < ヶ;"
// Katakana followed by a prolonged sound mark 30FC has
// its final vowel doubled. This is a Katakana-Hiragana
// one-way information-losing transformation. We
// include the small Katakana (e.g., small A 3041) and
// do not distinguish them from their large
// counterparts. It doesn't make sense to double a
// small counterpart vowel as a small Hiragana vowel, so
// we don't do so. In natural text this should never
// occur anyway. If a 30FC is seen without a preceding
// vowel sound (e.g., after n 30F3) we do not change it.
//## $long = ー;
// The following categories are Hiragana, not Katakana
// as might be expected, since by the time we get to the
// 30FC, the preceding character will have already been
// transformed to Hiragana.
// {The following mechanically generated from the
// Unicode 3.0 data:}
"$xa = ["
"ぁ あ か が さ ざ"
"た だ な は ば ぱ"
"ま ゃ や ら ゎ わ"
"];"
"$xi = ["
"ぃ い き ぎ し じ"
"ち ぢ に ひ び ぴ"
"み り ゐ"
"];"
"$xu = ["
"ぅ う く ぐ す ず"
"っ つ づ ぬ ふ ぶ"
"ぷ む ゅ ゆ る ゔ"
"];"
"$xe = ["
"ぇ え け げ せ ぜ"
"て で ね へ べ ぺ"
"め れ ゑ"
"];"
"$xo = ["
"ぉ お こ ご そ ぞ"
"と ど の ほ ぼ ぽ"
"も ょ よ ろ を"
"];"
"あ < $xa {ー};"
"い < $xi {ー};"
"う < $xu {ー};"
"え < $xe {ー};"
"お < $xo {ー};"
// eof
}
}

View File

@ -0,0 +1,32 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Hiragana_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Hiragana_Latin
translit_Hiragana_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Hiragana_Latin.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
":: [:^Katakana:] ;" // don't touch any katakana that was in the text!
":: Hiragana-Katakana;"
":: Katakana-Latin;"
":: ([:^Katakana:]) ;" // don't touch any katakana that was in the text!
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Bengali.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Bengali
translit_InterIndic_Bengali {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Bengali.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Bengali
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:59 2001
//--------------------------------------------------------------------
// InterIndic-Bengali
//:: NFD (NFC) ;
"\ue001>\u0981;" // SIGN CANDRABINDU
"\ue002>\u0982;" // SIGN ANUSVARA
"\ue003>\u0983;" // SIGN VISARGA
"\ue005>\u0985;" // LETTER A
"\ue006>\u0986;" // LETTER AA
"\ue007>\u0987;" // LETTER I
"\ue008>\u0988;" // LETTER II
"\ue009>\u0989;" // LETTER U
"\ue00a>\u098a;" // LETTER UU
"\ue00b>\u098b;" // LETTER VOCALIC R
"\ue00c>\u098c;" // LETTER VOCALIC L
// \ue00f>; # UNMAPPED InterIndic-Bengali: LETTER EE (\u098f = LETTER E)
"\ue010>\u0990;" // LETTER AI
// \ue013>; # UNMAPPED InterIndic-Bengali: LETTER OO (\u0993 = LETTER O)
"\ue014>\u0994;" // LETTER AU
"\ue015>\u0995;" // LETTER KA
"\ue016>\u0996;" // LETTER KHA
"\ue017>\u0997;" // LETTER GA
"\ue018>\u0998;" // LETTER GHA
"\ue019>\u0999;" // LETTER NGA
"\ue01a>\u099a;" // LETTER CA
"\ue01b>\u099b;" // LETTER CHA
"\ue01c>\u099c;" // LETTER JA
"\ue01d>\u099d;" // LETTER JHA
"\ue01e>\u099e;" // LETTER NYA
"\ue01f>\u099f;" // LETTER TTA
"\ue020>\u09a0;" // LETTER TTHA
"\ue021>\u09a1;" // LETTER DDA
"\ue022>\u09a2;" // LETTER DDHA
"\ue023>\u09a3;" // LETTER NNA
"\ue024>\u09a4;" // LETTER TA
"\ue025>\u09a5;" // LETTER THA
"\ue026>\u09a6;" // LETTER DA
"\ue027>\u09a7;" // LETTER DHA
"\ue028>\u09a8;" // LETTER NA
"\ue029>\u09a8;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
"\ue02a>\u09aa;" // LETTER PA
"\ue02b>\u09ab;" // LETTER PHA
"\ue02c>\u09ac;" // LETTER BA
"\ue02d>\u09ad;" // LETTER BHA
"\ue02e>\u09ae;" // LETTER MA
"\ue02f>\u09af;" // LETTER YA
"\ue030>\u09b0;" // LETTER RA
"\ue032>\u09b2;" // LETTER LA
"\ue033>\u09b2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
"\ue034>\u09b2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
"\ue035>\u09ac;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
"\ue036>\u09b6;" // LETTER SHA
"\ue037>\u09b7;" // LETTER SSA
"\ue038>\u09b8;" // LETTER SA
"\ue039>\u09b9;" // LETTER HA
"\ue03c>\u09bc;" // SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA
"\ue03e>\u09be;" // VOWEL SIGN AA
"\ue03f>\u09bf;" // VOWEL SIGN I
"\ue040>\u09c0;" // VOWEL SIGN II
"\ue041>\u09c1;" // VOWEL SIGN U
"\ue042>\u09c2;" // VOWEL SIGN UU
"\ue043>\u09c3;" // VOWEL SIGN VOCALIC R
"\ue044>\u09c4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u09c7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
// \ue047>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (\u09c7 = VOWEL SIGN E)
"\ue048>\u09c8;" // VOWEL SIGN AI
"\ue049>\u09cb;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
// \ue04b>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (\u09cb = VOWEL SIGN O)
"\ue04c>\u09cc;" // VOWEL SIGN AU
"\ue04d>\u09cd;" // SIGN VIRAMA
// \ue050>; # UNMAPPED InterIndic-Bengali: OM
// \ue055>; # UNMAPPED InterIndic-Bengali: LENGTH MARK
"\ue056>\u09c8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u09d7;" // AU LENGTH MARK
"\ue059>\u0996;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
"\ue05a>\u0997;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
"\ue05b>\u099c;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
"\ue05d>\u09dd;" // LETTER RHA
"\ue05e>\u09ab;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
"\ue05f>\u09df;" // LETTER YYA
"\ue060>\u09e0;" // LETTER VOCALIC RR
"\ue061>\u09e1;" // LETTER VOCALIC LL
"\ue062>\u09e2;" // VOWEL SIGN VOCALIC L
"\ue063>\u09e3;" // VOWEL SIGN VOCALIC LL
"\ue066>\u09e6;" // DIGIT ZERO
"\ue067>\u09e7;" // DIGIT ONE
"\ue068>\u09e8;" // DIGIT TWO
"\ue069>\u09e9;" // DIGIT THREE
"\ue06a>\u09ea;" // DIGIT FOUR
"\ue06b>\u09eb;" // DIGIT FIVE
"\ue06c>\u09ec;" // DIGIT SIX
"\ue06d>\u09ed;" // DIGIT SEVEN
"\ue06e>\u09ee;" // DIGIT EIGHT
"\ue06f>\u09ef;" // DIGIT NINE
"\ue0fa>\u09fa;" // ISSHAR
"\ue00f>\u098f;" // LETTER E
"\ue013>\u0993;" // LETTER O
"\ue031>\u09dc;" // LETTER RRA
"\ue047>\u09c7;" // VOWEL SIGN E
"\ue04b>\u09cb;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,159 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Devanagari.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Devanagari
translit_InterIndic_Devanagari {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Devanagari.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Devanagari
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:59 2001
//--------------------------------------------------------------------
// InterIndic-Devanagari
//:: NFD (NFC) ;
//Rules for Decomposed characters
"\ue028\ue03c > \u0929;" //\ue029
"\ue030\ue03c > \u0931;" //\ue031
"\ue033\ue03c > \u0934;" //\ue034
"\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu)
"\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu)
"\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu)
"\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu)
"\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA)
"\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA)
"\ue02b\ue03c > \u095e;" //\ue05e LETTER FA
"\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA
"\ue001 > \u0901;" // SIGN CANDRABINDU
"\ue002 > \u0902;" // SIGN ANUSVARA
"\ue003 > \u0903;" // SIGN VISARGA
"\ue005 > \u0905;" // LETTER A
"\ue006 > \u0906;" // LETTER AA
"\ue007 > \u0907;" // LETTER I
"\ue008 > \u0908;" // LETTER II
"\ue009 > \u0909;" // LETTER U
"\ue00a > \u090a;" // LETTER UU
"\ue00b > \u090b;" // LETTER VOCALIC R
"\ue00c > \u090c;" // LETTER VOCALIC L
"\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds)
//\ue00e > \u090e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
"\ue00e > \u090f;"
"\ue00f > \u090f;" // LETTER E
"\ue010 > \u0910;" // LETTER AI
"\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds)
//\ue012 > \u0912; # UNMAPPED LETTER SHORT O (For Southern Scripts)
"\ue012 > \u0913;"
"\ue013 > \u0913;" // LETTER O
"\ue014 > \u0914;" // LETTER AU
"\ue015 > \u0915;" // LETTER KA
"\ue016 > \u0916;" // LETTER KHA
"\ue017 > \u0917;" // LETTER GA
"\ue018 > \u0918;" // LETTER GHA
"\ue019 > \u0919;" // LETTER NGA
"\ue01a > \u091a;" // LETTER CA
"\ue01b > \u091b;" // LETTER CHA
"\ue01c > \u091c;" // LETTER JA
"\ue01d > \u091d;" // LETTER JHA
"\ue01e > \u091e;" // LETTER NYA
"\ue01f > \u091f;" // LETTER TTA
"\ue020 > \u0920;" // LETTER TTHA
"\ue021 > \u0921;" // LETTER DDA
"\ue022 > \u0922;" // LETTER DDHA
"\ue023 > \u0923;" // LETTER NNA
"\ue024 > \u0924;" // LETTER TA
"\ue025 > \u0925;" // LETTER THA
"\ue026 > \u0926;" // LETTER DA
"\ue027 > \u0927;" // LETTER DHA
"\ue028 > \u0928;" // LETTER NA
"\ue029 > \u0929;" // LETTER NNNA
"\ue02a > \u092a;" // LETTER PA
"\ue02b > \u092b;" // LETTER PHA
"\ue02c > \u092c;" // LETTER BA
"\ue02d > \u092d;" // LETTER BHA
"\ue02e > \u092e;" // LETTER MA
"\ue02f > \u092f;" // LETTER YA
"\ue030 > \u0930;" // LETTER RA
//\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
"\ue031 > \u0930;"
"\ue032 > \u0932;" // LETTER LA
"\ue033 > \u0933;" // LETTER LLA
//\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
"\ue034 > \u0933;"
"\ue035 > \u0935;" // LETTER VA
"\ue036 > \u0936;" // LETTER SHA
"\ue037 > \u0937;" // LETTER SSA
"\ue038 > \u0938;" // LETTER SA
"\ue039 > \u0939;" // LETTER HA
"\ue03c > \u093c;" // SIGN NUKTA
"\ue03d > \u093d;" // SIGN AVAGRAHA
"\ue03e > \u093e;" // VOWEL SIGN AA
"\ue03f > \u093f;" // VOWEL SIGN I
"\ue040 > \u0940;" // VOWEL SIGN II
"\ue041 > \u0941;" // VOWEL SIGN U
"\ue042 > \u0942;" // VOWEL SIGN UU
"\ue043 > \u0943;" // VOWEL SIGN VOCALIC R
"\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR
"\ue045 > \u0945;" // VOWEL SIGN CANDRA E
//\ue046 > \u0946; # UNMAPPED VOWEL SIGN SHORT E
"\ue046 > \u0947;"
"\ue047 > \u0947;" // VOWEL SIGN E
"\ue048 > \u0948;" // VOWEL SIGN AI
"\ue049 > \u0949;" // VOWEL SIGN CANDRA O
//\ue04a > \u094a; # UNMAPPED VOWEL SIGN SHORT O
"\ue04a > \u094b;"
"\ue04b > \u094b;" // VOWEL SIGN O
"\ue04c > \u094c;" // VOWEL SIGN AU
"\ue04d > \u094d;" // SIGN VIRAMA
"\ue050 > \u0950;" // OM
// \u0951 # UNMAPPED STRESS SIGN UDATTA
// \u0952 # UNMAPPED STRESS SIGN ANUDATTA
// \u0953 # UNMAPPED GRAVE ACCENT
// \u0954 # UNMAPPED ACUTE ACCENT
"\ue058 > \u0958;" // LETTER QA (For Urdu)
"\ue059 > \u0959;" // LETTER KHHA (For Urdu)
"\ue05a > \u095a;" // LETTER GHHA (For Urdu)
"\ue05b > \u095b;" // LETTER ZA (For Urdu)
"\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA)
"\ue05d > \u095d;" // LETTER RHA (pronounced RRHA)
"\ue05e > \u095e;" // LETTER FA
"\ue05f > \u095f;" // LETTER YYA
"\ue060 > \u0960;" // LETTER VOCALIC RR
"\ue061 > \u0961;" // LETTER VOCALIC LL
"\ue062 > \u0962;" // VOWEL SIGN VOCALIC L
"\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL
// > ; \u0964 # UNMAPPED Devanagari-InterIndic: DANDA
// > ; \u0965 # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
"\ue066 > \u0966;" // DIGIT ZERO
"\ue067 > \u0967;" // DIGIT ONE
"\ue068 > \u0968;" // DIGIT TWO
"\ue069 > \u0969;" // DIGIT THREE
"\ue06a > \u096a;" // DIGIT FOUR
"\ue06b > \u096b;" // DIGIT FIVE
"\ue06c > \u096c;" // DIGIT SIX
"\ue06d > \u096d;" // DIGIT SEVEN
"\ue06e > \u096e;" // DIGIT EIGHT
"\ue06f > \u096f;" // DIGIT NINE
// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
// :: NFC;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Gujarati.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Gujarati
translit_InterIndic_Gujarati {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gujarati.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Gujarati
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:59 2001
//--------------------------------------------------------------------
// InterIndic-Gujarati
//:: NFD (NFC) ;
"\ue001>\u0a81;" // SIGN CANDRABINDU
"\ue002>\u0a82;" // SIGN ANUSVARA
"\ue003>\u0a83;" // SIGN VISARGA
"\ue005>\u0a85;" // LETTER A
"\ue006>\u0a86;" // LETTER AA
"\ue007>\u0a87;" // LETTER I
"\ue008>\u0a88;" // LETTER II
"\ue009>\u0a89;" // LETTER U
"\ue00a>\u0a8a;" // LETTER UU
"\ue00b>\u0a8b;" // LETTER VOCALIC R
"\ue00c>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0a8c>\u0ab2\u0ac3 = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R
// \ue00f>; # UNMAPPED InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
"\ue010>\u0a90;" // LETTER AI
// \ue013>; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
"\ue014>\u0a94;" // LETTER AU
"\ue015>\u0a95;" // LETTER KA
"\ue016>\u0a96;" // LETTER KHA
"\ue017>\u0a97;" // LETTER GA
"\ue018>\u0a98;" // LETTER GHA
"\ue019>\u0a99;" // LETTER NGA
"\ue01a>\u0a9a;" // LETTER CA
"\ue01b>\u0a9b;" // LETTER CHA
"\ue01c>\u0a9c;" // LETTER JA
"\ue01d>\u0a9d;" // LETTER JHA
"\ue01e>\u0a9e;" // LETTER NYA
"\ue01f>\u0a9f;" // LETTER TTA
"\ue020>\u0aa0;" // LETTER TTHA
"\ue021>\u0aa1;" // LETTER DDA
"\ue022>\u0aa2;" // LETTER DDHA
"\ue023>\u0aa3;" // LETTER NNA
"\ue024>\u0aa4;" // LETTER TA
"\ue025>\u0aa5;" // LETTER THA
"\ue026>\u0aa6;" // LETTER DA
"\ue027>\u0aa7;" // LETTER DHA
"\ue028>\u0aa8;" // LETTER NA
"\ue029>\u0aa8;" // REMAP (indicExceptions.txt): \u0aa9>\u0aa8 = LETTER NNNA>LETTER NA
"\ue02a>\u0aaa;" // LETTER PA
"\ue02b>\u0aab;" // LETTER PHA
"\ue02c>\u0aac;" // LETTER BA
"\ue02d>\u0aad;" // LETTER BHA
"\ue02e>\u0aae;" // LETTER MA
"\ue02f>\u0aaf;" // LETTER YA
"\ue030>\u0ab0;" // LETTER RA
"\ue032>\u0ab2;" // LETTER LA
"\ue033>\u0ab3;" // LETTER LLA
"\ue034>\u0ab3;" // REMAP (indicExceptions.txt): \u0ab4>\u0ab3 = LETTER LLLA>LETTER LLA
"\ue035>\u0ab5;" // LETTER VA
"\ue036>\u0ab6;" // LETTER SHA
"\ue037>\u0ab7;" // LETTER SSA
"\ue038>\u0ab8;" // LETTER SA
"\ue039>\u0ab9;" // LETTER HA
"\ue03c>\u0abc;" // SIGN NUKTA
"\ue03d>\u0abd;" // SIGN AVAGRAHA
"\ue03e>\u0abe;" // VOWEL SIGN AA
"\ue03f>\u0abf;" // VOWEL SIGN I
"\ue040>\u0ac0;" // VOWEL SIGN II
"\ue041>\u0ac1;" // VOWEL SIGN U
"\ue042>\u0ac2;" // VOWEL SIGN UU
"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R
"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E
// \ue047>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
"\ue048>\u0ac8;" // VOWEL SIGN AI
"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O
// \ue04b>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
"\ue04c>\u0acc;" // VOWEL SIGN AU
"\ue04d>\u0acd;" // SIGN VIRAMA
"\ue050>\u0ad0;" // OM
// \ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
"\ue060>\u0ae0;" // LETTER VOCALIC RR
"\ue061>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0ae1>\u0ab2\u0ac3 = LETTER VOCALIC LL>LETTER LA.VOWEL SIGN VOCALIC R
"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
"\ue066>\u0ae6;" // DIGIT ZERO
"\ue067>\u0ae7;" // DIGIT ONE
"\ue068>\u0ae8;" // DIGIT TWO
"\ue069>\u0ae9;" // DIGIT THREE
"\ue06a>\u0aea;" // DIGIT FOUR
"\ue06b>\u0aeb;" // DIGIT FIVE
"\ue06c>\u0aec;" // DIGIT SIX
"\ue06d>\u0aed;" // DIGIT SEVEN
"\ue06e>\u0aee;" // DIGIT EIGHT
"\ue06f>\u0aef;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
"\ue00f>\u0a8f;" // LETTER E
"\ue013>\u0a93;" // LETTER O
// \ue083>; # UNMAPPED InterIndic-Gujarati: LETTER RRA (\u0a83 = SIGN VISARGA)
"\ue047>\u0ac7;" // VOWEL SIGN E
"\ue04b>\u0acb;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Gurmukhi.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Gurmukhi
translit_InterIndic_Gurmukhi {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gurmukhi.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Gurmukhi
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:00 2001
//--------------------------------------------------------------------
// InterIndic-Gurmukhi
//:: NFD (NFC) ;
"\ue001>\u0a02;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
// \ue002>; # UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (\u0a02 = SIGN BINDI)
// \ue003>; # UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA
"\ue005>\u0a05;" // LETTER A
"\ue006>\u0a06;" // LETTER AA
"\ue007>\u0a07;" // LETTER I
"\ue008>\u0a08;" // LETTER II
"\ue009>\u0a09;" // LETTER U
"\ue00a>\u0a0a;" // LETTER UU
"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
"\ue00c>\u0a07;" // REMAP (indicExceptions.txt): \u0a0c>\u0a07 = LETTER VOCALIC L>LETTER I
"\ue00f>\u0a0f;" // LETTER EE
"\ue010>\u0a10;" // LETTER AI
"\ue013>\u0a13;" // LETTER OO
"\ue014>\u0a14;" // LETTER AU
"\ue015>\u0a15;" // LETTER KA
"\ue016>\u0a16;" // LETTER KHA
"\ue017>\u0a17;" // LETTER GA
"\ue018>\u0a18;" // LETTER GHA
"\ue019>\u0a19;" // LETTER NGA
"\ue01a>\u0a1a;" // LETTER CA
"\ue01b>\u0a1b;" // LETTER CHA
"\ue01c>\u0a1c;" // LETTER JA
"\ue01d>\u0a1d;" // LETTER JHA
"\ue01e>\u0a1e;" // LETTER NYA
"\ue01f>\u0a1f;" // LETTER TTA
"\ue020>\u0a20;" // LETTER TTHA
"\ue021>\u0a21;" // LETTER DDA
"\ue022>\u0a22;" // LETTER DDHA
"\ue023>\u0a23;" // LETTER NNA
"\ue024>\u0a24;" // LETTER TA
"\ue025>\u0a25;" // LETTER THA
"\ue026>\u0a26;" // LETTER DA
"\ue027>\u0a27;" // LETTER DHA
"\ue028>\u0a28;" // LETTER NA
"\ue029>\u0a28;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
"\ue02a>\u0a2a;" // LETTER PA
"\ue02b>\u0a2b;" // LETTER PHA
"\ue02c>\u0a2c;" // LETTER BA
"\ue02d>\u0a2d;" // LETTER BHA
"\ue02e>\u0a2e;" // LETTER MA
"\ue02f>\u0a2f;" // LETTER YA
"\ue030>\u0a30;" // LETTER RA
"\ue032>\u0a32;" // LETTER LA
"\ue033>\u0a33;" // LETTER LLA
"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
"\ue035>\u0a35;" // LETTER VA
"\ue036>\u0a36;" // LETTER SHA
"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
"\ue038>\u0a38;" // LETTER SA
"\ue039>\u0a39;" // LETTER HA
"\ue03c>\u0a3c;" // SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA
"\ue03e>\u0a3e;" // VOWEL SIGN AA
"\ue03f>\u0a3f;" // VOWEL SIGN I
"\ue040>\u0a40;" // VOWEL SIGN II
"\ue041>\u0a41;" // VOWEL SIGN U
"\ue042>\u0a42;" // VOWEL SIGN UU
// \ue043>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R
// \ue044>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR
"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
"\ue047>\u0a47;" // VOWEL SIGN EE
"\ue048>\u0a48;" // VOWEL SIGN AI
"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
"\ue04b>\u0a4b;" // VOWEL SIGN OO
"\ue04c>\u0a4c;" // VOWEL SIGN AU
"\ue04d>\u0a4d;" // SIGN VIRAMA
// \ue050>; # UNMAPPED InterIndic-Gurmukhi: OM
// \ue055>; # UNMAPPED InterIndic-Gurmukhi: LENGTH MARK
"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0a59;" // LETTER KHHA
"\ue05a>\u0a5a;" // LETTER GHHA
"\ue05b>\u0a5b;" // LETTER ZA
"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
"\ue05e>\u0a5e;" // LETTER FA
"\ue05f>\u0a2f;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
"\ue061>\u0a08\u0a3c;" // REMAP (indicExceptions.txt): \u0a61>\u0a08\u0a3c = LETTER VOCALIC LL>LETTER II.SIGN NUKTA
"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
"\ue066>\u0a66;" // DIGIT ZERO
"\ue067>\u0a67;" // DIGIT ONE
"\ue068>\u0a68;" // DIGIT TWO
"\ue069>\u0a69;" // DIGIT THREE
"\ue06a>\u0a6a;" // DIGIT FOUR
"\ue06b>\u0a6b;" // DIGIT FIVE
"\ue06c>\u0a6c;" // DIGIT SIX
"\ue06d>\u0a6d;" // DIGIT SEVEN
"\ue06e>\u0a6e;" // DIGIT EIGHT
"\ue06f>\u0a6f;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Gurmukhi: ISSHAR
// \ue081>; # UNMAPPED InterIndic-Gurmukhi: LETTER E
// \ue082>; # UNMAPPED InterIndic-Gurmukhi: LETTER O (\u0a02 = SIGN BINDI)
"\ue05c>\u0a5c;" // LETTER RRA
// \ue084>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E
// \ue085>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (\u0a05 = LETTER A)
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Kannada.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Kannada
translit_InterIndic_Kannada {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Kannada.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Kannada
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:00 2001
//--------------------------------------------------------------------
// InterIndic-Kannada
//:: NFD (NFC) ;
"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
"\ue002>\u0c82;" // SIGN ANUSVARA
"\ue003>\u0c83;" // SIGN VISARGA
"\ue005>\u0c85;" // LETTER A
"\ue006>\u0c86;" // LETTER AA
"\ue007>\u0c87;" // LETTER I
"\ue008>\u0c88;" // LETTER II
"\ue009>\u0c89;" // LETTER U
"\ue00a>\u0c8a;" // LETTER UU
"\ue00b>\u0c8b;" // LETTER VOCALIC R
"\ue00c>\u0c8c;" // LETTER VOCALIC L
"\ue00f>\u0c8f;" // LETTER EE
"\ue010>\u0c90;" // LETTER AI
"\ue013>\u0c93;" // LETTER OO
"\ue014>\u0c94;" // LETTER AU
"\ue015>\u0c95;" // LETTER KA
"\ue016>\u0c96;" // LETTER KHA
"\ue017>\u0c97;" // LETTER GA
"\ue018>\u0c98;" // LETTER GHA
"\ue019>\u0c99;" // LETTER NGA
"\ue01a>\u0c9a;" // LETTER CA
"\ue01b>\u0c9b;" // LETTER CHA
"\ue01c>\u0c9c;" // LETTER JA
"\ue01d>\u0c9d;" // LETTER JHA
"\ue01e>\u0c9e;" // LETTER NYA
"\ue01f>\u0c9f;" // LETTER TTA
"\ue020>\u0ca0;" // LETTER TTHA
"\ue021>\u0ca1;" // LETTER DDA
"\ue022>\u0ca2;" // LETTER DDHA
"\ue023>\u0ca3;" // LETTER NNA
"\ue024>\u0ca4;" // LETTER TA
"\ue025>\u0ca5;" // LETTER THA
"\ue026>\u0ca6;" // LETTER DA
"\ue027>\u0ca7;" // LETTER DHA
"\ue028>\u0ca8;" // LETTER NA
"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
"\ue02a>\u0caa;" // LETTER PA
"\ue02b>\u0cab;" // LETTER PHA
"\ue02c>\u0cac;" // LETTER BA
"\ue02d>\u0cad;" // LETTER BHA
"\ue02e>\u0cae;" // LETTER MA
"\ue02f>\u0caf;" // LETTER YA
"\ue030>\u0cb0;" // LETTER RA
"\ue032>\u0cb2;" // LETTER LA
"\ue033>\u0cb3;" // LETTER LLA
"\ue034>\u0cb3;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
"\ue035>\u0cb5;" // LETTER VA
"\ue036>\u0cb6;" // LETTER SHA
"\ue037>\u0cb7;" // LETTER SSA
"\ue038>\u0cb8;" // LETTER SA
"\ue039>\u0cb9;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Kannada: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA
"\ue03e>\u0cbe;" // VOWEL SIGN AA
"\ue03f>\u0cbf;" // VOWEL SIGN I
"\ue040>\u0cc0;" // VOWEL SIGN II
"\ue041>\u0cc1;" // VOWEL SIGN U
"\ue042>\u0cc2;" // VOWEL SIGN UU
"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R
"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
"\ue047>\u0cc7;" // VOWEL SIGN EE
"\ue048>\u0cc8;" // VOWEL SIGN AI
"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\ue04b>\u0ccb;" // VOWEL SIGN OO
"\ue04c>\u0ccc;" // VOWEL SIGN AU
"\ue04d>\u0ccd;" // SIGN VIRAMA
"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
"\ue055>\u0cd5;" // LENGTH MARK
"\ue056>\u0cd6;" // AI LENGTH MARK
"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
"\ue05e>\u0cde;" // LETTER FA
"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
"\ue060>\u0ce0;" // LETTER VOCALIC RR
"\ue061>\u0ce1;" // LETTER VOCALIC LL
"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
"\ue066>\u0ce6;" // DIGIT ZERO
"\ue067>\u0ce7;" // DIGIT ONE
"\ue068>\u0ce8;" // DIGIT TWO
"\ue069>\u0ce9;" // DIGIT THREE
"\ue06a>\u0cea;" // DIGIT FOUR
"\ue06b>\u0ceb;" // DIGIT FIVE
"\ue06c>\u0cec;" // DIGIT SIX
"\ue06d>\u0ced;" // DIGIT SEVEN
"\ue06e>\u0cee;" // DIGIT EIGHT
"\ue06f>\u0cef;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Kannada: ISSHAR
"\ue00e>\u0c8e;" // LETTER E
"\ue012>\u0c92;" // LETTER O
"\ue031>\u0cb1;" // LETTER RRA
"\ue046>\u0cc6;" // VOWEL SIGN E
"\ue04a>\u0cca;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,385 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Latin
translit_InterIndic_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// InterIndic-Latin
// :: NFD (NFC) ;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
//\u0950>\ue050; # OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$consonants=[$ka-$ha $virama];"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
//transliterations for anusvara
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
"$anusvara} [$ta$tha$da$dha$na] > n ;"
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
"$anusvara>'-'m\u0307;"
// normal consonants
"$cha}$x>ch;"
"$cha>cha;"
"$ca$virama}$ha>c'';"
"$ca}$x>c;"
"$ca>ca;"
"$jha}$x>jh;"
"$jha>jha;"
"$ja$virama}$ha>j'';"
"$ja}$x>j;"
"$ja>ja;"
//$nya}$x>ny;
//$nya>nya;
"$nya }$x>n\u0303 ;"
"$nya > n\u0303a ;"
"$ttha}$x>t\u0323h;"
"$tta$virama}$ha>t\u0323'';"
"$tta}$x>t\u0323;"
"$ddha}$x>d\u0323h;"
"$dda}$x$ha>d\u0323'';"
"$dda}$x>d\u0323;"
"$dha}$x>dh;"
"$da$virama}$ha>d'';"
"$da$virama}$ddha>d'';"
"$da$virama}$dda>d'';"
"$da$virama}$dha>d'';"
//$da$virama}$da>dda;
"$da}$x>d;"
"$tha}$x>th;"
"$ta$virama}$ha>t'';"
"$ta$virama}$ttha>t'';"
"$ta$virama}$tta>t'';"
"$ta$virama}$tha>t'';"
"$tta>t\u0323a;"
"$ttha>t\u0323ha;"
//$ta$virama}$ta>tta;
"$ta}$x>t;"
"$tha>tha;"
"$ta>ta;"
"$dda>d\u0323a;"
"$dha>dha;"
"$ddha>d\u0323ha;"
"$da>da;"
"$nna}$x>n\u0323 ;"
"$nna>n\u0323a ;"
"$na$virama}$ga>n'';"
"$na$virama}$ya>n'';"
"$na}$x>n;"
"$na>na;"
"$kha}$x>kh;"
"$kha>kha;"
"$ka$virama}$ha>k'';"
"$ka}$x>k;"
"$ka>ka;"
"$gha}$x>gh;"
"$gha>gha;"
"$ga$virama}$ha>g'';"
"$ga}$x>g;"
"$ga>ga;"
//ng<$nga}$x;
//nga<$nga;
"$nga}$x>n\u0307;"
"$nga>n\u0307a ;"
"$pha}$x>ph;"
"$pha>pha;"
"$pa$virama}$ha>p'';"
"$pa}$x>p;"
"$pa>pa;"
"$bha}$x>bh;"
"$bha>bha;"
"$ba$virama}$ha>b'';"
"$ba}$x>b;"
"$ba>ba;"
"$ma$virama}$ma>m'';"
//$ma$virama}$anusvara>m'';
"$ma}$x>m;"
"$ma>ma;"
"$ya}$x>y;"
"$ya>ya;"
"$ra$virama}$ha>r'';"
"$ra}$x>r;"
"$ra>ra;"
"$la$virama}$ha>l'';"
"$la}$x>l;"
"$la>la;"
"$lla$virama}$ha>l\u0323'';"
"$lla}$x>l\u0323;"
"$lla>l\u0323a;"
"$va}$x>v;"
"$va>va;"
"$sha}$x>s\u0301;"
"$ssa}$x>s\u0323;"
"$sa$virama}$ha>s'';"
"$sa$virama}$sha>s'';"
"$sa$virama}$ssa>s'';"
"$sa$virama}$sa>s'';"
"$sa}$x>s;"
"$sha>s\u0301a;"
"$ssa>s\u0323a;"
"$sa>sa;"
"$ha}$x>h;"
"$ha>ha;"
// Urdu compatibility
"$uya}$x > y\u0307 ;"
"$uya > y\u0307a ;"
"$ela}$x > l\u0331 ;"
"$ela > l\u0331a ;"
"$ena}$x > n\u0331 ;"
"$ena > n\u0331a ;"
"$uka}$x > q ;"
"$uka > qa ;"
"$ukha}$x > k\u0323 ;"
"$ukha > k\u0323a ;"
"$ugha}$x > g\u0307 ;"
"$ugha > g\u0307a ;"
"$ujha}$x > z ;"
"$ujha > za ;"
"$udha}$x > r\u0323h ;"
"$udha > r\u0323ha;"
"$uddha}$x> r\u0323 ;"
"$uddha > r\u0323a ;"
"$ufa}$x > f\u0323 ;"
"$ufa > f\u0323a ;"
// dependent vowels (should never occur except following consonants)
"$aa > a\u0304 ;"
"$ai > ai ;"
"$au > au ;"
"$ii > i\u0304 ;"
"$i > i ;"
"$uu > u\u0304 ;"
"$u > u ;"
"$rrh > r\u0325\u0304 ;"
"$rh}$consonants>r\u0325;"
"$rh > r\u0325a ;"
"$llh > l\u0325\u0304 ;"
"$lh > l\u0325 ;"
"$e > e\u0304 ;"
"$o > o\u0304 ;"
//extra vowels
"$ce > e\u0306 ;"
"$co > o\u0306 ;"
"$se > e ;"
"$so > o ;"
// independent vowels (when following consonants)
"a}$waa > ''a\u0304 ;"
"$z}$waa > ''a\u0304 ;"
"a}$wai > ''ai ;"
"$z}$wai > ''ai ;"
"a}$wau > ''au ;"
"$z}$wau > ''au ;"
"a}$wii > ''i\u0304 ;"
"$z}$wii > ''i\u0304 ;"
"a}$wi > ''i ;"
"$z}$wi > ''i ;"
"a}$wuu > ''u\u0304 ;"
"$z}$wuu > ''u\u0304 ;"
"a}$wu > ''u ;"
"$z}$wu > ''u ;"
"$z}$wrr > ''r\u0325\u0304 ;"
"$z}$wr > ''r\u0325 ;"
"$z}$wll > ''l\u0325\u0304 ;"
"$z}$wl > ''l\u0325 ;"
"$z}$we > ''e\u0304 ;"
"$z}$wo > ''o\u0304 ;"
"a}$wa > ''a ;"
"$z}$wa > ''a ;"
//extra vowels
"$z}$wce > ''e\u0306 ;"
"$z}$wco > ''o\u0306 ;"
"$z}$wse > ''e ;"
"$z}$wso > ''o ;"
// independent vowels (otherwise)
"$waa > a\u0304 ;"
"$wai > ai ;"
"$wau > au ;"
"$wii > i\u0304 ;"
"$wi > i ;"
"$wuu > u\u0304 ;"
"$wu > u ;"
"$wrr > r\u0325\u0304 ;"
"$wr > r\u0325 ;"
"$wll > l\u0325\u0304 ;"
"$wl > l\u0325 ;"
"$we > e\u0304 ;"
"$wo > o\u0304 ;"
"$wa > a ;"
//extra vowels
"$wce > e\u0306 ;"
"$wco > o\u0306 ;"
"$wse > e ;"
"$wso > o ;"
//stress marks
"$avagraha > \u0315;"
"$chandrabindu$anusvara>'-'\u0303;"
"$chandrabindu > '-'m\u0310;"
"$visarga>'-'h\u0323;"
//numbers
"$zero > 0;"
"$one > 1;"
"$two > 2;"
"$three > 3;"
"$four > 4;"
"$five > 5;"
"$six > 6;"
"$seven > 7;"
"$eight > 8;"
"$nine > 9;"
// blow away any remaining viramas
"$virama>;"
// :: NFC;
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Malayalam.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Malayalam
translit_InterIndic_Malayalam {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Malayalam.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Malayalam
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:00 2001
//--------------------------------------------------------------------
// InterIndic-Malayalam
//:: NFD (NFC) ;
"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
"\ue002>\u0d02;" // SIGN ANUSVARA
"\ue003>\u0d03;" // SIGN VISARGA
"\ue005>\u0d05;" // LETTER A
"\ue006>\u0d06;" // LETTER AA
"\ue007>\u0d07;" // LETTER I
"\ue008>\u0d08;" // LETTER II
"\ue009>\u0d09;" // LETTER U
"\ue00a>\u0d0a;" // LETTER UU
"\ue00b>\u0d0b;" // LETTER VOCALIC R
"\ue00c>\u0d0c;" // LETTER VOCALIC L
"\ue00f>\u0d0f;" // LETTER EE
"\ue010>\u0d10;" // LETTER AI
"\ue013>\u0d13;" // LETTER OO
"\ue014>\u0d14;" // LETTER AU
"\ue015>\u0d15;" // LETTER KA
"\ue016>\u0d16;" // LETTER KHA
"\ue017>\u0d17;" // LETTER GA
"\ue018>\u0d18;" // LETTER GHA
"\ue019>\u0d19;" // LETTER NGA
"\ue01a>\u0d1a;" // LETTER CA
"\ue01b>\u0d1b;" // LETTER CHA
"\ue01c>\u0d1c;" // LETTER JA
"\ue01d>\u0d1d;" // LETTER JHA
"\ue01e>\u0d1e;" // LETTER NYA
"\ue01f>\u0d1f;" // LETTER TTA
"\ue020>\u0d20;" // LETTER TTHA
"\ue021>\u0d21;" // LETTER DDA
"\ue022>\u0d22;" // LETTER DDHA
"\ue023>\u0d23;" // LETTER NNA
"\ue024>\u0d24;" // LETTER TA
"\ue025>\u0d25;" // LETTER THA
"\ue026>\u0d26;" // LETTER DA
"\ue027>\u0d27;" // LETTER DHA
"\ue028>\u0d28;" // LETTER NA
"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
"\ue02a>\u0d2a;" // LETTER PA
"\ue02b>\u0d2b;" // LETTER PHA
"\ue02c>\u0d2c;" // LETTER BA
"\ue02d>\u0d2d;" // LETTER BHA
"\ue02e>\u0d2e;" // LETTER MA
"\ue02f>\u0d2f;" // LETTER YA
"\ue030>\u0d30;" // LETTER RA
"\ue032>\u0d32;" // LETTER LA
"\ue033>\u0d33;" // LETTER LLA
"\ue034>\u0d34;" // LETTER LLLA
"\ue035>\u0d35;" // LETTER VA
"\ue036>\u0d36;" // LETTER SHA
"\ue037>\u0d37;" // LETTER SSA
"\ue038>\u0d38;" // LETTER SA
"\ue039>\u0d39;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Malayalam: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA
"\ue03e>\u0d3e;" // VOWEL SIGN AA
"\ue03f>\u0d3f;" // VOWEL SIGN I
"\ue040>\u0d40;" // VOWEL SIGN II
"\ue041>\u0d41;" // VOWEL SIGN U
"\ue042>\u0d42;" // VOWEL SIGN UU
"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R
// \ue044>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR
"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
"\ue047>\u0d47;" // VOWEL SIGN EE
"\ue048>\u0d48;" // VOWEL SIGN AI
"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
"\ue04b>\u0d4b;" // VOWEL SIGN OO
"\ue04c>\u0d4c;" // VOWEL SIGN AU
"\ue04d>\u0d4d;" // SIGN VIRAMA
// \ue050>; # UNMAPPED InterIndic-Malayalam: OM
// \ue055>; # UNMAPPED InterIndic-Malayalam: LENGTH MARK
"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0d57;" // AU LENGTH MARK
"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
"\ue060>\u0d60;" // LETTER VOCALIC RR
"\ue061>\u0d61;" // LETTER VOCALIC LL
// \ue062>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L
// \ue063>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL
"\ue066>\u0d66;" // DIGIT ZERO
"\ue067>\u0d67;" // DIGIT ONE
"\ue068>\u0d68;" // DIGIT TWO
"\ue069>\u0d69;" // DIGIT THREE
"\ue06a>\u0d6a;" // DIGIT FOUR
"\ue06b>\u0d6b;" // DIGIT FIVE
"\ue06c>\u0d6c;" // DIGIT SIX
"\ue06d>\u0d6d;" // DIGIT SEVEN
"\ue06e>\u0d6e;" // DIGIT EIGHT
"\ue06f>\u0d6f;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Malayalam: ISSHAR
"\ue00e>\u0d0e;" // LETTER E
"\ue012>\u0d12;" // LETTER O
"\ue031>\u0d31;" // LETTER RRA
"\ue046>\u0d46;" // VOWEL SIGN E
"\ue04a>\u0d4a;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Oriya.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Oriya
translit_InterIndic_Oriya {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Oriya.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Oriya
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:01 2001
//--------------------------------------------------------------------
// InterIndic-Oriya
//:: NFD (NFC) ;
"\ue001>\u0b01;" // SIGN CANDRABINDU
"\ue002>\u0b02;" // SIGN ANUSVARA
"\ue003>\u0b03;" // SIGN VISARGA
"\ue005>\u0b05;" // LETTER A
"\ue006>\u0b06;" // LETTER AA
"\ue007>\u0b07;" // LETTER I
"\ue008>\u0b08;" // LETTER II
"\ue009>\u0b09;" // LETTER U
"\ue00a>\u0b0a;" // LETTER UU
"\ue00b>\u0b0b;" // LETTER VOCALIC R
"\ue00c>\u0b0c;" // LETTER VOCALIC L
// \ue00f>; # UNMAPPED InterIndic-Oriya: LETTER EE (\u0b0f = LETTER E)
"\ue010>\u0b10;" // LETTER AI
// \ue013>; # UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O)
"\ue014>\u0b14;" // LETTER AU
"\ue015>\u0b15;" // LETTER KA
"\ue016>\u0b16;" // LETTER KHA
"\ue017>\u0b17;" // LETTER GA
"\ue018>\u0b18;" // LETTER GHA
"\ue019>\u0b19;" // LETTER NGA
"\ue01a>\u0b1a;" // LETTER CA
"\ue01b>\u0b1b;" // LETTER CHA
"\ue01c>\u0b1c;" // LETTER JA
"\ue01d>\u0b1d;" // LETTER JHA
"\ue01e>\u0b1e;" // LETTER NYA
"\ue01f>\u0b1f;" // LETTER TTA
"\ue020>\u0b20;" // LETTER TTHA
"\ue021>\u0b21;" // LETTER DDA
"\ue022>\u0b22;" // LETTER DDHA
"\ue023>\u0b23;" // LETTER NNA
"\ue024>\u0b24;" // LETTER TA
"\ue025>\u0b25;" // LETTER THA
"\ue026>\u0b26;" // LETTER DA
"\ue027>\u0b27;" // LETTER DHA
"\ue028>\u0b28;" // LETTER NA
"\ue029>\u0b28;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA
"\ue02a>\u0b2a;" // LETTER PA
"\ue02b>\u0b2b;" // LETTER PHA
"\ue02c>\u0b2c;" // LETTER BA
"\ue02d>\u0b2d;" // LETTER BHA
"\ue02e>\u0b2e;" // LETTER MA
"\ue02f>\u0b2f;" // LETTER YA
"\ue030>\u0b30;" // LETTER RA
"\ue032>\u0b32;" // LETTER LA
"\ue033>\u0b33;" // LETTER LLA
"\ue034>\u0b33;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA
"\ue035>\u0b2c;" // REMAP (indicExceptions.txt): \u0b35>\u0b2c = LETTER VA>LETTER BA
"\ue036>\u0b36;" // LETTER SHA
"\ue037>\u0b37;" // LETTER SSA
"\ue038>\u0b38;" // LETTER SA
"\ue039>\u0b39;" // LETTER HA
"\ue03c>\u0b3c;" // SIGN NUKTA
"\ue03d>\u0b3d;" // SIGN AVAGRAHA
"\ue03e>\u0b3e;" // VOWEL SIGN AA
"\ue03f>\u0b3f;" // VOWEL SIGN I
"\ue040>\u0b40;" // VOWEL SIGN II
"\ue041>\u0b41;" // VOWEL SIGN U
"\ue042>\u0b42;" // VOWEL SIGN UU
"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R
"\ue044>\u0b43\u0b3c;" // REMAP (indicExceptions.txt): \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
"\ue045>\u0b47;" // REMAP (indicExceptions.txt): \u0b45>\u0b47 = VOWEL SIGN CANDRA E>VOWEL SIGN E
// \ue047>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (\u0b47 = VOWEL SIGN E)
"\ue048>\u0b48;" // VOWEL SIGN AI
"\ue049>\u0b4b;" // REMAP (indicExceptions.txt): \u0b49>\u0b4b = VOWEL SIGN CANDRA O>VOWEL SIGN O
// \ue04b>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (\u0b4b = VOWEL SIGN O)
"\ue04c>\u0b4c;" // VOWEL SIGN AU
"\ue04d>\u0b4d;" // SIGN VIRAMA
"\ue050>\u0b13\u0b01;" // REMAP (indicExceptions.txt): \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
// \ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
"\ue056>\u0b56;" // AI LENGTH MARK
"\ue057>\u0b57;" // AU LENGTH MARK
"\ue059>\u0b16\u0b3c;" // REMAP (indicExceptions.txt): \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
"\ue05a>\u0b17\u0b3c;" // REMAP (indicExceptions.txt): \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
"\ue05b>\u0b1c\u0b3c;" // REMAP (indicExceptions.txt): \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
"\ue05d>\u0b5d;" // LETTER RHA
"\ue05e>\u0b2b\u0b3c;" // REMAP (indicExceptions.txt): \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
"\ue05f>\u0b5f;" // LETTER YYA
"\ue060>\u0b60;" // LETTER VOCALIC RR
"\ue061>\u0b61;" // LETTER VOCALIC LL
"\ue062>\u0b56\u0b3c;" // REMAP (indicExceptions.txt): \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
"\ue063>\u0b57\u0b3c;" // REMAP (indicExceptions.txt): \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
"\ue066>\u0b66;" // DIGIT ZERO
"\ue067>\u0b67;" // DIGIT ONE
"\ue068>\u0b68;" // DIGIT TWO
"\ue069>\u0b69;" // DIGIT THREE
"\ue06a>\u0b6a;" // DIGIT FOUR
"\ue06b>\u0b6b;" // DIGIT FIVE
"\ue06c>\u0b6c;" // DIGIT SIX
"\ue06d>\u0b6d;" // DIGIT SEVEN
"\ue06e>\u0b6e;" // DIGIT EIGHT
"\ue06f>\u0b6f;" // DIGIT NINE
"\ue070>\u0b70;" // ISSHAR
"\ue00e>\u0b0f;" // LETTER E
"\ue013>\u0b13;" // LETTER O
"\ue031>\u0b5c;" // LETTER RRA
"\ue047>\u0b47;" // VOWEL SIGN E
"\ue04b>\u0b4b;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Tamil.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Tamil
translit_InterIndic_Tamil {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Tamil.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Tamil
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:01 2001
//--------------------------------------------------------------------
// InterIndic-Tamil
//:: NFD (NFC) ;
// \ue001>; # UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU
"\ue002>\u0b82;" // SIGN ANUSVARA
"\ue003>\u0b83;" // SIGN VISARGA
"\ue005>\u0b85;" // LETTER A
"\ue006>\u0b86;" // LETTER AA
"\ue007>\u0b87;" // LETTER I
"\ue008>\u0b88;" // LETTER II
"\ue009>\u0b89;" // LETTER U
"\ue00a>\u0b8a;" // LETTER UU
"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
"\ue00c>\u0b87;" // REMAP (indicExceptions.txt): \u0b8c>\u0b87 = LETTER VOCALIC L>LETTER I
"\ue00f>\u0b8f;" // LETTER EE
"\ue010>\u0b90;" // LETTER AI
"\ue013>\u0b93;" // LETTER OO
"\ue014>\u0b94;" // LETTER AU
"\ue015>\u0b95;" // LETTER KA
"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
"\ue019>\u0b99;" // LETTER NGA
"\ue01a>\u0b9a;" // LETTER CA
"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
"\ue01c>\u0b9c;" // LETTER JA
"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
"\ue01e>\u0b9e;" // LETTER NYA
"\ue01f>\u0b9f;" // LETTER TTA
"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
"\ue023>\u0ba3;" // LETTER NNA
"\ue024>\u0ba4;" // LETTER TA
"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
"\ue028>\u0ba8;" // LETTER NA
"\ue029>\u0ba9;" // LETTER NNNA
"\ue02a>\u0baa;" // LETTER PA
"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
"\ue02e>\u0bae;" // LETTER MA
"\ue02f>\u0baf;" // LETTER YA
"\ue030>\u0bb0;" // LETTER RA
"\ue032>\u0bb2;" // LETTER LA
"\ue033>\u0bb3;" // LETTER LLA
"\ue034>\u0bb4;" // LETTER LLLA
"\ue035>\u0bb5;" // LETTER VA
"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
"\ue037>\u0bb7;" // LETTER SSA
"\ue038>\u0bb8;" // LETTER SA
"\ue039>\u0bb9;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Tamil: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA
"\ue03e>\u0bbe;" // VOWEL SIGN AA
"\ue03f>\u0bbf;" // VOWEL SIGN I
"\ue040>\u0bc0;" // VOWEL SIGN II
"\ue041>\u0bc1;" // VOWEL SIGN U
"\ue042>\u0bc2;" // VOWEL SIGN UU
"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
"\ue047>\u0bc7;" // VOWEL SIGN EE
"\ue048>\u0bc8;" // VOWEL SIGN AI
"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
"\ue04b>\u0bcb;" // VOWEL SIGN OO
"\ue04c>\u0bcc;" // VOWEL SIGN AU
"\ue04d>\u0bcd;" // SIGN VIRAMA
"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
// \ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0bd7;" // AU LENGTH MARK
"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
"\ue061>\u0b88;" // REMAP (indicExceptions.txt): \u0be1>\u0b88 = LETTER VOCALIC LL>LETTER II
// \ue062>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L
// \ue063>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL
// \ue066>; # UNMAPPED InterIndic-Tamil: DIGIT ZERO
"\ue067>\u0be7;" // DIGIT ONE
"\ue068>\u0be8;" // DIGIT TWO
"\ue069>\u0be9;" // DIGIT THREE
"\ue06a>\u0bea;" // DIGIT FOUR
"\ue06b>\u0beb;" // DIGIT FIVE
"\ue06c>\u0bec;" // DIGIT SIX
"\ue06d>\u0bed;" // DIGIT SEVEN
"\ue06e>\u0bee;" // DIGIT EIGHT
"\ue06f>\u0bef;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR
"\ue00e>\u0b8e;" // LETTER E
"\ue012>\u0b92;" // LETTER O
"\ue031>\u0bb1;" // LETTER RRA
"\ue046>\u0bc6;" // VOWEL SIGN E
"\ue04a>\u0bca;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Telugu.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Telugu
translit_InterIndic_Telugu {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Telugu.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Telugu
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:01 2001
//--------------------------------------------------------------------
// InterIndic-Telugu
//:: NFD (NFC) ;
"\ue001>\u0c01;" // SIGN CANDRABINDU
"\ue002>\u0c02;" // SIGN ANUSVARA
"\ue003>\u0c03;" // SIGN VISARGA
"\ue005>\u0c05;" // LETTER A
"\ue006>\u0c06;" // LETTER AA
"\ue007>\u0c07;" // LETTER I
"\ue008>\u0c08;" // LETTER II
"\ue009>\u0c09;" // LETTER U
"\ue00a>\u0c0a;" // LETTER UU
"\ue00b>\u0c0b;" // LETTER VOCALIC R
"\ue00c>\u0c0c;" // LETTER VOCALIC L
"\ue00f>\u0c0f;" // LETTER EE
"\ue010>\u0c10;" // LETTER AI
"\ue013>\u0c13;" // LETTER OO
"\ue014>\u0c14;" // LETTER AU
"\ue015>\u0c15;" // LETTER KA
"\ue016>\u0c16;" // LETTER KHA
"\ue017>\u0c17;" // LETTER GA
"\ue018>\u0c18;" // LETTER GHA
"\ue019>\u0c19;" // LETTER NGA
"\ue01a>\u0c1a;" // LETTER CA
"\ue01b>\u0c1b;" // LETTER CHA
"\ue01c>\u0c1c;" // LETTER JA
"\ue01d>\u0c1d;" // LETTER JHA
"\ue01e>\u0c1e;" // LETTER NYA
"\ue01f>\u0c1f;" // LETTER TTA
"\ue020>\u0c20;" // LETTER TTHA
"\ue021>\u0c21;" // LETTER DDA
"\ue022>\u0c22;" // LETTER DDHA
"\ue023>\u0c23;" // LETTER NNA
"\ue024>\u0c24;" // LETTER TA
"\ue025>\u0c25;" // LETTER THA
"\ue026>\u0c26;" // LETTER DA
"\ue027>\u0c27;" // LETTER DHA
"\ue028>\u0c28;" // LETTER NA
"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
"\ue02a>\u0c2a;" // LETTER PA
"\ue02b>\u0c2b;" // LETTER PHA
"\ue02c>\u0c2c;" // LETTER BA
"\ue02d>\u0c2d;" // LETTER BHA
"\ue02e>\u0c2e;" // LETTER MA
"\ue02f>\u0c2f;" // LETTER YA
"\ue030>\u0c30;" // LETTER RA
"\ue032>\u0c32;" // LETTER LA
"\ue033>\u0c33;" // LETTER LLA
"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
"\ue035>\u0c35;" // LETTER VA
"\ue036>\u0c36;" // LETTER SHA
"\ue037>\u0c37;" // LETTER SSA
"\ue038>\u0c38;" // LETTER SA
"\ue039>\u0c39;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Telugu: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA
"\ue03e>\u0c3e;" // VOWEL SIGN AA
"\ue03f>\u0c3f;" // VOWEL SIGN I
"\ue040>\u0c40;" // VOWEL SIGN II
"\ue041>\u0c41;" // VOWEL SIGN U
"\ue042>\u0c42;" // VOWEL SIGN UU
"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R
"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E
"\ue047>\u0c47;" // VOWEL SIGN EE
"\ue048>\u0c48;" // VOWEL SIGN AI
"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\ue04b>\u0c4b;" // VOWEL SIGN OO
"\ue04c>\u0c4c;" // VOWEL SIGN AU
"\ue04d>\u0c4d;" // SIGN VIRAMA
"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
"\ue055>\u0c55;" // LENGTH MARK
"\ue056>\u0c56;" // AI LENGTH MARK
"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
"\ue060>\u0c60;" // LETTER VOCALIC RR
"\ue061>\u0c61;" // LETTER VOCALIC LL
"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
"\ue066>\u0c66;" // DIGIT ZERO
"\ue067>\u0c67;" // DIGIT ONE
"\ue068>\u0c68;" // DIGIT TWO
"\ue069>\u0c69;" // DIGIT THREE
"\ue06a>\u0c6a;" // DIGIT FOUR
"\ue06b>\u0c6b;" // DIGIT FIVE
"\ue06c>\u0c6c;" // DIGIT SIX
"\ue06d>\u0c6d;" // DIGIT SEVEN
"\ue06e>\u0c6e;" // DIGIT EIGHT
"\ue06f>\u0c6f;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Telugu: ISSHAR
"\ue00e>\u0c0e;" // LETTER E
"\ue012>\u0c12;" // LETTER O
"\ue031>\u0c31;" // LETTER RRA
"\ue046>\u0c46;" // VOWEL SIGN E
"\ue04a>\u0c4a;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,117 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Kannada_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Kannada_InterIndic
translit_Kannada_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Kannada_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Kannada_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:05 2001
//--------------------------------------------------------------------
// Kannada-InterIndic
//:: NFD (NFC) ;
"\u0c82>\ue002;" // SIGN ANUSVARA
"\u0c83>\ue003;" // SIGN VISARGA
"\u0c85>\ue005;" // LETTER A
"\u0c86>\ue006;" // LETTER AA
"\u0c87>\ue007;" // LETTER I
"\u0c88>\ue008;" // LETTER II
"\u0c89>\ue009;" // LETTER U
"\u0c8a>\ue00a;" // LETTER UU
"\u0c8b>\ue00b;" // LETTER VOCALIC R
"\u0c8c>\ue00c;" // LETTER VOCALIC L
"\u0c8e>\ue00e;" // LETTER E
"\u0c8f>\ue00f;" // LETTER EE
"\u0c90>\ue010;" // LETTER AI
"\u0c92>\ue012;" // LETTER O
"\u0c93>\ue013;" // LETTER OO
"\u0c94>\ue014;" // LETTER AU
"\u0c95>\ue015;" // LETTER KA
"\u0c96>\ue016;" // LETTER KHA
"\u0c97>\ue017;" // LETTER GA
"\u0c98>\ue018;" // LETTER GHA
"\u0c99>\ue019;" // LETTER NGA
"\u0c9a>\ue01a;" // LETTER CA
"\u0c9b>\ue01b;" // LETTER CHA
"\u0c9c>\ue01c;" // LETTER JA
"\u0c9d>\ue01d;" // LETTER JHA
"\u0c9e>\ue01e;" // LETTER NYA
"\u0c9f>\ue01f;" // LETTER TTA
"\u0ca0>\ue020;" // LETTER TTHA
"\u0ca1>\ue021;" // LETTER DDA
"\u0ca2>\ue022;" // LETTER DDHA
"\u0ca3>\ue023;" // LETTER NNA
"\u0ca4>\ue024;" // LETTER TA
"\u0ca5>\ue025;" // LETTER THA
"\u0ca6>\ue026;" // LETTER DA
"\u0ca7>\ue027;" // LETTER DHA
"\u0ca8>\ue028;" // LETTER NA
"\u0caa>\ue02a;" // LETTER PA
"\u0cab>\ue02b;" // LETTER PHA
"\u0cac>\ue02c;" // LETTER BA
"\u0cad>\ue02d;" // LETTER BHA
"\u0cae>\ue02e;" // LETTER MA
"\u0caf>\ue02f;" // LETTER YA
"\u0cb0>\ue030;" // LETTER RA
"\u0cb1>\ue031;" // LETTER RRA
"\u0cb2>\ue032;" // LETTER LA
"\u0cb3>\ue033;" // LETTER LLA
"\u0cb5>\ue035;" // LETTER VA
"\u0cb6>\ue036;" // LETTER SHA
"\u0cb7>\ue037;" // LETTER SSA
"\u0cb8>\ue038;" // LETTER SA
"\u0cb9>\ue039;" // LETTER HA
"\u0cbe>\ue03e;" // VOWEL SIGN AA
"\u0cbf>\ue03f;" // VOWEL SIGN I
"\u0cc0>\ue040;" // VOWEL SIGN II
"\u0cc1>\ue041;" // VOWEL SIGN U
"\u0cc2>\ue042;" // VOWEL SIGN UU
"\u0cc3>\ue043;" // VOWEL SIGN VOCALIC R
"\u0cc4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0cc6>\ue046;" // VOWEL SIGN E
"\u0cc7>\ue047;" // VOWEL SIGN EE
"\u0cc8>\ue048;" // VOWEL SIGN AI
"\u0cca>\ue04a;" // VOWEL SIGN O
"\u0ccb>\ue04b;" // VOWEL SIGN OO
"\u0ccc>\ue04c;" // VOWEL SIGN AU
"\u0ccd>\ue04d;" // SIGN VIRAMA
"\u0cd5>\ue055;" // LENGTH MARK
"\u0cd6>\ue056;" // AI LENGTH MARK
"\u0cde>\ue05e;" // LETTER FA
"\u0ce0>\ue060;" // LETTER VOCALIC RR
"\u0ce1>\ue061;" // LETTER VOCALIC LL
"\u0ce6>\ue066;" // DIGIT ZERO
"\u0ce7>\ue067;" // DIGIT ONE
"\u0ce8>\ue068;" // DIGIT TWO
"\u0ce9>\ue069;" // DIGIT THREE
"\u0cea>\ue06a;" // DIGIT FOUR
"\u0ceb>\ue06b;" // DIGIT FIVE
"\u0cec>\ue06c;" // DIGIT SIX
"\u0ced>\ue06d;" // DIGIT SEVEN
"\u0cee>\ue06e;" // DIGIT EIGHT
"\u0cef>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,319 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Latin_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Latin_InterIndic
translit_Latin_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Latin-InterIndic
//:: NFD;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
//\u0950>\ue050; # OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
//DEBUG: $consonants=[$ka-$ha $virama];
"\u0315 > $avagraha;"
"'-'\u0303>$chandrabindu$anusvara;"
"'-'m\u0310>$chandrabindu;"
"'-'h\u0323>$visarga;"
"mm>$anusvara;"
"x>$visarga;"
"aa>$waa;"
"a\u0304>$waa;"
"ai>$wai;"
"au>$wau;"
"ii>$wii;"
"i\u0304>$wii;"
"i>$wi;"
"uu>$wuu;"
"u\u0304>$wuu;"
"u>$wu;"
"rrh>$wrr;"
"r\u0325\u0304>$wrr;"
"rh>$wr;"
"r\u0325>$wr;"
"l\u0325\u0304>$wll;"
"lh>$wl;l\u0325>$wl;"
"e\u0304>$we;"
"o\u0304>$wo;"
"a>$wa;"
"e\u0306>$wce;"
"o\u0306>$wco;"
"e>$wse;"
"o>$wso;"
"n}na > $na|$virama;"
"n\u0307}[kg] > $anusvara;"
"n\u0307}n\u0303 > $anusvara;"
"n\u0304}[cj] > $anusvara;"
"n\u0304}n\u0307 > $anusvara;"
"n\u0323}[tdn]\u0323 > $anusvara;"
"n}[tdn] > $anusvara;"
"m}[pbm] > $anusvara;"
"n} [yrlvsh] > $anusvara;"
"'-'m\u0307 > $anusvara;"
"y\u0307>$uya|$virama;"
"l\u0331>$ela|$virama;"
"n\u0331>$ena|$virama;"
"n\u0307>$nga|$virama;"
"n\u0303>$nya|$virama;"
"n\u0323>$nna|$virama;"
"t\u0323h>$ttha|$virama;"
"t\u0323>$tta|$virama;"
"r\u0323h>$udha|$virama;"
"r\u0323>$uddha|$virama;"
"d\u0323h>$ddha|$virama;"
"d\u0323>$dda|$virama;"
"kh>$kha|$virama;"
"k>$ka|$virama;"
"q>$ka|$virama;"
"gh>$gha|$virama;"
"g>$ga|$virama;"
"ch>$cha|$virama;"
"c>$ca|$virama;"
"jh>$jha|$virama;"
"j>$ja|$virama;"
"ny>$nya|$virama;"
"tth>$ttha|$virama;"
"ddh>$ddha|$virama;"
"th>$tha|$virama;"
"t>$ta|$virama;"
"dh>$dha|$virama;"
"d>$da|$virama;"
"n>$na|$virama;"
"ph>$pha|$virama;"
"p>$pa|$virama;"
"bh>$bha|$virama;"
"b>$ba|$virama;"
"m>$ma|$virama;"
"y>$ya|$virama;"
"r>$ra|$virama;"
"l\u0323a>$lla;"
"l>$la|$virama;"
"v>$va|$virama;"
"f>$va|$virama;"
"w>$va|$virama;"
"sh>$sha|$virama;"
"ss>$ssa|$virama;"
"s\u0323>$ssa|$virama;"
"s\u0301>$sha|$virama;"
"s>$sa|$virama;"
"z>$sa|$virama;"
"h>$ha|$virama;"
"'.'>$danda;"
"$danda'.'>$doubleDanda;"
"$depVowelAbove{'~'>$anusvara;"
"$depVowelBelow{'~'>$chandrabindu;"
"$virama aa>$aa;"
"$virama a\u0304>$aa;"
"$virama ai>$ai;"
"$virama au>$au;"
"$virama ii>$ii;"
"$virama i\u0304>$ii;"
"$virama i>$i;"
"$virama uu>$uu;"
"$virama u\u0304>$uu;"
"$virama u>$u;"
"$virama rrh>$rrh;"
"$virama r\u0325\u0304>$rrh;"
"$virama rh>$rh;"
"$virama r\u0325a>$rh;"
"$virama r\u0325>$rh;"
"$virama l\u0325\u0304>$llh;"
"$virama lh>$lh;"
"$virama l\u0325>$lh;"
"$virama e\u0304>$e;"
"$virama o\u0304>$o;"
"$virama a>;"
"$virama e\u0306>$ce;"
"$virama o\u0306>$co;"
"$virama e>$se;"
"$virama o>$so;"
"$virama''aa>$waa;"
"$virama''a\u0304>$waa;"
"$virama''ai>$wai;"
"$virama''au>$wau;"
"$virama''ii>$wii;"
"$virama''i\u0304>$wii;"
"$virama''i>$wi;"
"$virama''uu>$wuu;"
"$virama''u\u0304>$wuu;"
"$virama''u>$wu;"
"$virama''rrh>$wrr;"
"$virama''r\u0325\u0304>$wrr;"
"$virama''rh>$wr;"
"$virama''r\u0325>$wr;"
"$virama''l\u0325\u0304>$wll;"
"$virama''lh>$wl;"
"$virama''l\u0325>$wl;"
"$virama''e\u0304>$we;"
"$virama''o\u0304>$wo;"
"$virama''a>$wa;"
"$virama''e\u0306>$wce;"
"$virama''o\u0306>$wco;"
"$virama''e>$wse;"
"$virama''o>$wso;"
"$virama } [$z] > $virama;"
"$virama } ' ' > $virama ;"
"$virama}$endThing>;"
"0>$zero;"
"1>$one;"
"2>$two;"
"3>$three;"
"4>$four;"
"5>$five;"
"6>$six;"
"7>$seven;"
"8>$eight;"
"9>$nine;"
"''>;"
//:: NFC (NFD) ;
}
}

View File

@ -0,0 +1,528 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Latin_Jamo.utf8.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Latin_Jamo
translit_Latin_Jamo {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Latin-Jamo
// Transliteration from Latin characters to Korean script is done in
// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
// transliteration is done algorithmically following Unicode 3.0
// section 3.11. This file implements the Latin to Jamo
// transliteration using rules.
// Jamo occupy the block 1100-11FF. Within this block there are three
// groups of characters: initial consonants or choseong (I), medial
// vowels or jungseong (M), and trailing consonants or jongseong (F).
// Standard Korean syllables are of the form I+M+F*.
// Section 3.11 describes the use of 'filler' jamo to convert
// nonstandard syllables to standard form: the choseong filler 115F and
// the junseong filler 1160. In this transliterator, we will not use
// 115F or 1160.
// We will, however, insert two 'null' jamo to make foreign words
// conform to Korean syllable structure. These are the null initial
// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
// we will use the hyphen in order to disambiguate strings,
// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
// We will not use all of the characters in the jamo block. We will
// only use the 19 initials, 21 medials, and 27 finals possessing a
// jamo short name as defined in section 4.4 of the Unicode book.
// Rules of thumb. These guidelines provide the basic framework
// for the rules. They are phrased in terms of Latin-Jamo transliteration.
// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
// just context-free transliteration of jamo to corresponding short names,
// with the addition of hyphens to maintain round-trip integrity
// in the context of the Latin-Jamo rules.
// A sequence of vowels:
// - Take the longest sequence you can. If there are too many, or you don't
// have a starting consonant, introduce a 110B necessary.
// A sequence of consonants.
// - First join the double consonants: G + G -> GG
// - In the remaining list,
// -- If there is no preceding vowel, take the first consonant, and insert EU
// after it. Continue with the rest of the consonants.
// -- If there is one consonant, attach to the following vowel
// -- If there are two consonants and a following vowel, attach one to the
// preceeding vowel, and one to the following vowel.
// -- If there are more than two consonants, join the first two together if you
// can: L + G => LG
// -- If you still end up with more than 2 consonants, insert EU after the
// first one, and continue with the rest of the consonants.
//----------------------------------------------------------------------
// Variables
// Some latin consonants or consonant pairs only occur as initials, and
// some only as finals, but some occur as both. This makes some jamo
// consonants ambiguous when transliterated into latin.
// Initial only: IEUNG BB DD JJ R
// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
// Initial and Final: B C D G GG H J K M N P S SS T
"$Gi = \u1100;"
"$GGi = \u1101;"
"$Ni = \u1102;"
"$Di = \u1103;"
"$DD = \u1104;"
"$R = \u1105;"
"$Mi = \u1106;"
"$Bi = \u1107;"
"$BB = \u1108;"
"$Si = \u1109;"
"$SSi = \u110A;"
"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
"$Ji = \u110C;"
"$JJ = \u110D;"
"$Ci = \u110E;"
"$Ki = \u110F;"
"$Ti = \u1110;"
"$Pi = \u1111;"
"$Hi = \u1112;"
"$A = \u1161;"
"$AE = \u1162;"
"$YA = \u1163;"
"$YAE = \u1164;"
"$EO = \u1165;"
"$E = \u1166;"
"$YEO = \u1167;"
"$YE = \u1168;"
"$O = \u1169;"
"$WA = \u116A;"
"$WAE = \u116B;"
"$OE = \u116C;"
"$YO = \u116D;"
"$U = \u116E;"
"$WEO = \u116F;"
"$WE = \u1170;"
"$WI = \u1171;"
"$YU = \u1172;"
"$EU = \u1173;" // null medial, inserted during Latin-Jamo
"$YI = \u1174;"
"$I = \u1175;"
"$Gf = \u11A8;"
"$GGf = \u11A9;"
"$GS = \u11AA;"
"$Nf = \u11AB;"
"$NJ = \u11AC;"
"$NH = \u11AD;"
"$Df = \u11AE;"
"$L = \u11AF;"
"$LG = \u11B0;"
"$LM = \u11B1;"
"$LB = \u11B2;"
"$LS = \u11B3;"
"$LT = \u11B4;"
"$LP = \u11B5;"
"$LH = \u11B6;"
"$Mf = \u11B7;"
"$Bf = \u11B8;"
"$BS = \u11B9;"
"$Sf = \u11BA;"
"$SSf = \u11BB;"
"$NG = \u11BC;"
"$Jf = \u11BD;"
"$Cf = \u11BE;"
"$Kf = \u11BF;"
"$Tf = \u11C0;"
"$Pf = \u11C1;"
"$Hf = \u11C2;"
"$jamoInitial = [\u1100-\u1112];"
"$jamoMedial = [\u1161-\u1175];"
"$latinInitial = [bcdghjkmnprst];"
// Any character in the latin transliteration of a medial
"$latinMedial = [aeiouwy];"
// The last character of the latin transliteration of a medial
"$latinMedialEnd = [aeiou];"
//----------------------------------------------------------------------
// Jamo-Latin
// Jamo to latin is relatively simple, since it is the latin that is
// ambiguous. Most rules are straightforward, and we encode them below
// as simple add-on back rule, e.g.:
// $jamoMedial {bs} > $BS;
// becomes
// $jamoMedial {bs} <> $BS;
// Furthermore, we don't care about the ordering for Jamo-Latin because
// we are going from single characters, so we can very easily piggyback
// on the Latin-Jamo.
// The main issue with Jamo-Latin is when to insert hyphens.
// Hyphens are inserted to obtain correct round trip behavior. For
// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
// would then round trip to Ki A GGi E. To prevent this, we insert a
// hyphen: "kag-ge". IMPORTANT: The need for hyphens depends
// very specifically on the behavior of the Latin-Jamo rules. A change
// in the Latin-Jamo behavior can completely change the way the
// hyphen insertion must be done.
// First try to preserve actual hyphens in the jamo text by doubling
// them. This fixes problems like:
// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
// -- if we don't care about losing hyphens in the jamo, we can delete
// this rule.
"'--' <> '-';"
// Triple consonants. For three consonants "axxx" we insert a
// hyphen between the first and second "x" if XXf, Xf, and Xi all
// exist, and we have A Xf XXi. This prevents the reverse
// transliteration to A XXf Xi.
"'-' < $latinMedialEnd g {} $GGi;"
"'-' < $latinMedialEnd s {} $SSi;"
// For vowels the rule is similar. If there is a vowel "ae" such that
// "a" by itself and "e" by itself are vowels, then we want to map A E
// to "a-e" so as not to round trip to AE. However, in the text Ki EO
// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
// tested. NOTE: These rules used to have a left context of
// $latinInitial instead of [^$latinMedial]. The problem with this is
// sequences where an initial IEUNG is transliterated away:
// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
"'-' < [^$latinMedial] [y w] e {} [$O $OE];"
"'-' < [^$latinMedial] e {} [$O $OE $U];"
"'-' < [^$latinMedial] [o a] {} [$E $EO $EU];"
"'-' < [^$latinMedial] [w y] a {} [$E $EO $EU];"
// Similar to the above, but with an intervening $IEUNG.
"'-' < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
"'-' < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
"'-' < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
"'-' < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"
// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
// where Xi also exists, must be transliterated as "ax-e" to prevent
// the round trip conversion to A Xi E.
"'-' < $latinMedialEnd b {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd c {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd d {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd h {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd j {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd k {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd m {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd p {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd t {} $IEUNG $jamoMedial;"
// Double finals followed by IEUNG. Similar to the single finals
// followed by IEUNG. Any latin consonant pair X Y, between medials,
// that we would split by Latin-Jamo, we must handle when it occurs as
// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
// E.
"'-' < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd s s {} $IEUNG $jamoMedial;"
// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
// we transliterate as "ax-xe" to prevent round trip transliteration as
// A XXi E.
"'-' < $latinMedialEnd b {} $Bi $jamoMedial;"
"'-' < $latinMedialEnd d {} $Di $jamoMedial;"
"'-' < $latinMedialEnd j {} $Ji $jamoMedial;"
"'-' < $latinMedialEnd g {} $Gi $jamoMedial;"
"'-' < $latinMedialEnd s {} $Si $jamoMedial;"
// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
// "xyy" forms that correspond to XYf Yi must be transliterated as
// "xy-y".
"'-' < $latinMedialEnd b s {} [$Si $SSi];"
"'-' < $latinMedialEnd g s {} [$Si $SSi];"
"'-' < $latinMedialEnd l b {} [$Bi $BB];"
"'-' < $latinMedialEnd l g {} [$Gi $GGi];"
"'-' < $latinMedialEnd l s {} [$Si $SSi];"
"'-' < $latinMedialEnd n g {} [$Gi $GGi];"
"'-' < $latinMedialEnd n j {} [$Ji $JJ];"
// Deletion of IEUNG is handled below.
//----------------------------------------------------------------------
// Latin-Jamo
// [Basic, context-free Jamo-Latin rules are embedded here too. See
// above.]
// Split digraphs: Text of the form 'axye', where 'xy' is a final
// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
// 'e' are medials, we want to transliterate this as A Xf Yi E rather
// than A XYf IEUNG E. We do NOT include text of the form "axxe",
// since that is handled differently below. These rules are generated
// programmatically from the jamo data.
"$jamoMedial {b s} $latinMedial > $Bf $Si;"
"$jamoMedial {g s} $latinMedial > $Gf $Si;"
"$jamoMedial {l b} $latinMedial > $L $Bi;"
"$jamoMedial {l g} $latinMedial > $L $Gi;"
"$jamoMedial {l h} $latinMedial > $L $Hi;"
"$jamoMedial {l m} $latinMedial > $L $Mi;"
"$jamoMedial {l p} $latinMedial > $L $Pi;"
"$jamoMedial {l s} $latinMedial > $L $Si;"
"$jamoMedial {l t} $latinMedial > $L $Ti;"
"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
"$jamoMedial {n j} $latinMedial > $Nf $Ji;"
// Single consonants are initials: Text of the form 'axe', where 'x'
// can be an initial or a final, and 'a' and 'e' are medials, we want
// to transliterate as A Xi E rather than A Xf IEUNG E.
"$jamoMedial {b} $latinMedial > $Bi;"
"$jamoMedial {c} $latinMedial > $Ci;"
"$jamoMedial {d} $latinMedial > $Di;"
"$jamoMedial {g} $latinMedial > $Gi;"
"$jamoMedial {h} $latinMedial > $Hi;"
"$jamoMedial {j} $latinMedial > $Ji;"
"$jamoMedial {k} $latinMedial > $Ki;"
"$jamoMedial {m} $latinMedial > $Mi;"
"$jamoMedial {n} $latinMedial > $Ni;"
"$jamoMedial {p} $latinMedial > $Pi;"
"$jamoMedial {s} $latinMedial > $Si;"
"$jamoMedial {t} $latinMedial > $Ti;"
// Doubled initials. The sequence "axxe", where XX exists as an initial
// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
// to transliterate as A XXi E, rather than split to A Xf Xi E.
"$jamoMedial {b b} $latinMedial > $BB;"
"$jamoMedial {d d} $latinMedial > $DD;"
"$jamoMedial {j j} $latinMedial > $JJ;"
"$jamoMedial {g g} $latinMedial > $GGi;"
"$jamoMedial {s s} $latinMedial > $SSi;"
// XYY. Because doubled consonants bind more strongly than XY
// consonants, we must handle the sequence "axyy" specially. Here XYf
// and YYi must exist. In these cases, we map to Xf YYi rather than
// XYf.
"$jamoMedial {b} s s > $Bf;"
"$jamoMedial {g} s s > $Gf;"
"$jamoMedial {l} b b > $L;"
"$jamoMedial {l} g g > $L;"
"$jamoMedial {l} s s > $L;"
"$jamoMedial {n} g g > $Nf;"
"$jamoMedial {n} j j > $Nf;"
// Finals: Attach consonant with preceding medial to preceding medial.
// Do this BEFORE mapping consonants to initials. Longer keys must
// precede shorter keys that they start with, e.g., the rule for 'bs'
// must precede 'b'.
// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
// block for Jamo-Latin.]
"$jamoMedial {bs} <> $BS;"
"$jamoMedial {b} <> $Bf;"
"$jamoMedial {c} <> $Cf;"
"$jamoMedial {d} <> $Df;"
"$jamoMedial {gg} <> $GGf;"
"$jamoMedial {gs} <> $GS;"
"$jamoMedial {g} <> $Gf;"
"$jamoMedial {h} <> $Hf;"
"$jamoMedial {j} <> $Jf;"
"$jamoMedial {k} <> $Kf;"
"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
"$jamoMedial {lh} <> $LH;"
"$jamoMedial {lm} <> $LM;"
"$jamoMedial {lp} <> $LP;"
"$jamoMedial {ls} <> $LS;"
"$jamoMedial {lt} <> $LT;"
"$jamoMedial {l} <> $L;"
"$jamoMedial {m} <> $Mf;"
"$jamoMedial {ng} <> $NG;"
"$jamoMedial {nh} <> $NH;"
"$jamoMedial {nj} <> $NJ;"
"$jamoMedial {n} <> $Nf;"
"$jamoMedial {p} <> $Pf;"
"$jamoMedial {ss} <> $SSf;"
"$jamoMedial {s} <> $Sf;"
"$jamoMedial {t} <> $Tf;"
// Initials: Attach single consonant to following medial. Do this
// AFTER mapping finals. Longer keys must precede shorter keys that
// they start with, e.g., the rule for 'gg' must precede 'g'.
// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
// this block for Jamo-Latin.]
"{gg} $latinMedial <> $GGi;"
"{g} $latinMedial <> $Gi;"
"{n} $latinMedial <> $Ni;"
"{dd} $latinMedial <> $DD;"
"{d} $latinMedial <> $Di;"
"{r} $latinMedial <> $R;"
"{m} $latinMedial <> $Mi;"
"{bb} $latinMedial <> $BB;"
"{b} $latinMedial <> $Bi;"
"{ss} $latinMedial <> $SSi;"
"{s} $latinMedial <> $Si;"
"{jj} $latinMedial <> $JJ;"
"{j} $latinMedial <> $Ji;"
"{c} $latinMedial <> $Ci;"
"{k} $latinMedial <> $Ki;"
"{t} $latinMedial <> $Ti;"
"{p} $latinMedial <> $Pi;"
"{h} $latinMedial <> $Hi;"
// 'r' in final position. Because of the equivalency of the 'l' and
// 'r' jamo (the glyphs are the same), we try to provide the same
// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
// below. If we see an 'r' in an apparent final position, treat it
// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
// Instead, we want Ki A L Ki A.
"$jamoMedial {r} $latinInitial > | l;"
// Initial + Final: If we match the next rule, we have initial then
// final consonant with no intervening medial. We insert the null
// vowel BEFORE it to create a well-formed syllable. (In the next rule
// we insert a null vowel AFTER an anomalous initial.)
"$jamoInitial {} [bcdghjklmnpst] > $EU;"
// Initial + X: This block matches an initial consonant not followed by
// a medial. We insert the null vowel after it. We handle double
// initials explicitly here; for single initial consonants we insert EU
// (as Latin) after them and let standard rules do the rest.
// BREAKS ROUND TRIP INTEGRITY
"gg > $GGi $EU;"
"dd > $DD $EU;"
"bb > $BB $EU;"
"ss > $SSi $EU;"
"jj > $JJ $EU;"
"([bcdghjkmnprst]) > | $1 eu;"
// X + Final: Finally we have to deal with a consonant that can only be
// interpreted as a final (not an initial) and which is preceded
// neither by an initial nor a medial. It is the start of the
// syllable, but cannot be. Most of these will already be handled by
// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
// For this isolated case, we could add a null initial and medial,
// which would give "la" => IEUNG EU L IEUNG A, for example. A more
// economical solution is to transliterate isolated "l" (that is,
// initial "l") to "r". (Other similar conversions of consonants that
// occur neither as initials nor as finals are handled below.)
"l > | r;"
// Medials. If a medial is preceded by an initial, then we proceed
// normally. As usual, longer keys must precede shorter ones.
// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
// this block for Jamo-Latin.]
"$jamoInitial {ae} <> $AE;"
"$jamoInitial {a} <> $A;"
"$jamoInitial {eo} <> $EO;"
"$jamoInitial {eu} <> $EU;"
"$jamoInitial {e} <> $E;"
"$jamoInitial {i} <> $I;"
"$jamoInitial {oe} <> $OE;"
"$jamoInitial {o} <> $O;"
"$jamoInitial {u} <> $U;"
"$jamoInitial {wae} <> $WAE;"
"$jamoInitial {wa} <> $WA;"
"$jamoInitial {weo} <> $WEO;"
"$jamoInitial {we} <> $WE;"
"$jamoInitial {wi} <> $WI;"
"$jamoInitial {yae} <> $YAE;"
"$jamoInitial {ya} <> $YA;"
"$jamoInitial {yeo} <> $YEO;"
"$jamoInitial {ye} <> $YE;"
"$jamoInitial {yi} <> $YI;"
"$jamoInitial {yo} <> $YO;"
"$jamoInitial {yu} <> $YU;"
// We may see an anomalous isolated 'w' or 'y'. In that case, we
// interpret it as 'wi' and 'yu', respectively.
// BREAKS ROUND TRIP INTEGRITY
"$jamoInitial {w} > | wi;"
"$jamoInitial {y} > | yu;"
// Otherwise, insert a null consonant IEUNG before the medial (which is
// still an untransliterated latin vowel).
"($latinMedial) > $IEUNG | $1;"
// Convert non-jamo latin consonants to equivalents. These occur as
// neither initials nor finals in jamo. 'l' occurs as a final, but not
// an initial; it is handled above. The following letters (left hand
// side) will never be output by Jamo-Latin.
"f > | p;"
"q > | k;"
"v > | b;"
"x > | ks;"
"z > | s;"
// Delete hyphens (Latin-Jamo).
"'-' > ;"
// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
// since these may also occur in text.
"< $IEUNG;"
// eof
}
}

View File

@ -0,0 +1,470 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Latin_Katakana.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Latin_Katakana
translit_Latin_Katakana {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Latin_Katakana.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
"::NFD (NFC) ;"
":: [:Latin:] Lower ();"
// Uses modified Hepburn. Small changes to make unambiguous.
// | Kunrei-shiki: Hepburn/MHepburn
// | ------------------------------
// | si: shi
// | si ~ya: sha
// | si ~yu: shu
// | si ~yo: sho
// | zi: ji
// | zi ~ya: ja
// | zi ~yu: ju
// | zi ~yo: jo
// | ti: chi
// | ti ~ya: cha
// | ti ~yu: chu
// | ti ~yu: cho
// | tu: tsu
// | di: ji/dji
// | du: zu/dzu
// | hu: fu
// | For foreign words:
// | -----------------
// | se ~i si
// | si ~e she
// |
// | ze ~i zi
// | zi ~e je
// |
// | te ~i ti
// | ti ~e che
// | te ~u tu
// |
// | de ~i di
// | de ~u du
// | de ~i di
// |
// | he ~u: hu
// | hu ~a fa
// | hu ~i fi
// | hu ~e he
// | hu ~o ho
// Most small forms are generated, but if necessary
// explicit small forms are given with ~a, ~ya, etc.
//------------------------------------------------------
// Variables
"$vowel = [aeiou] ;"
"$macron = \u0304 ;"
// Variables used for doubled-consonants with tsu
"$kana = [\u3041-\u3094] ;"
"$voice = [\u3099\u309B];"
"$semivoice = [\u309A\u309C];"
"$k_start = [カキクケコかきくけこ] ;"
"$s_start = [サシスセソさしすせそ] ;"
"$j_start = [シし] $voice ;"
"$t_start = [タチツテトたちつてと] ;"
"$n_start = [ナニヌネノンなにぬねの] ;"
"$h_start = [ハヒヘホはひへほ] ;"
"$f_start = [フふ] ;"
"$m_start = [マミムメモまみむめも] ;"
"$y_start = [ヤユヨやゆよ] ;"
"$r_start = [ラリルレロらりるれろ] ;"
"$w_start = [ワヰヱヲわゐゑを] ;"
"$v_start = [ワヰヱヲ]゙ ;"
// if ン is followed by $n_quoter, then it needs an
// apostrophe after its romaji form to disambiguate it.
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ヤ ユ ヨ ン] ;"
"$small_y = [ャィュェョ] ;"
"$iteration = \u309D ;"
//------------------------------------------------------
// katakana rules
// Punctuation
"'.' <> 。;"
"',' <> 、;"
// ' ' } [a-z] > ; # delete spaces before latin
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
// Iteration Mark
// Copy previous letter & marks
// TODO
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
// Specials for katakana -- not shared with hiragana
"va <> ヷ ;"
"vi <> ヸ ;"
"ve <> ヹ ;"
"vo <> ヺ ;"
"'~ka' <> ヵ ;"
"'~ke' <> ヶ ;"
// ~~~ begin shared rules ~~~
//special
"ya < '~'ャ;"
"yi < '~'ィ ;"
"yu < '~'ュ;"
"ye < '~'ェ;"
"yo < '~'ョ;"
//normal
"a <> ア ;"
"b | '~' < ヒ ゙} $small_y ;"
"by } $vowel > ビ | '~y' ;"
"ba <> バ ;"
"bi <> ビ ;"
"bu <> ブ ;"
"be <> ベ ;"
"bo <> ボ ;"
"c } i > | s ;"
"c } e > | s ;"
"da <> ダ ;"
"di <> ディ ;"
"du <> デゥ ;"
"de <> デ ;"
"do <> ド ;"
"dzu <> ヅ ;"
"dja < ヂャ ;"
"dji'~i' < ヂィ ;" // liu
"dju < ヂュ ;"
"dje < ヂェ ;"
"djo < ヂョ ;"
"dji <> ヂ ;"
"dj } $vowel > ヂ | '~y' ;"
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
"cha < チャ ;"
"chi'~i' < チィ ;" // liu
"chu < チュ ;"
"che < チェ ;"
"cho < チョ ;"
"chi <> チ ;"
"ch } $vowel > チ | '~y' ;"
"e <> エ ;"
"g | '~' < ギ} $small_y ;"
"gy } $vowel > ギ | '~y' ;"
"ga <> ガ ;"
"gi <> ギ ;"
"gu <> グ ;"
"ge <> ゲ ;"
"go <> ゴ ;"
"i <> イ ;"
// j } $vowel > ジ | '~y' ;
"ja <> ジャ ;"
"ji'~i' < ジィ ;" // liu
"ju <> ジュ ;"
"je <> ジェ ;"
"jo <> ジョ ;"
"ji <> ジ ;"
"k | '~' < キ} $small_y ;"
"ky } $vowel > キ | '~y' ;"
"ka <> カ ;"
"ki <> キ ;"
"ku <> ク ;"
"ke <> ケ ;"
"ko <> コ ;"
"m | '~' < ミ} $small_y ;"
"my } $vowel > ミ | '~y' ;"
"ma <> マ ;"
"mi <> ミ ;"
"mu <> ム ;"
"me <> メ ;"
"mo <> モ ;"
"m } [pbfv] > ン ;"
"n | '~' < ニ } $small_y ;"
"ny } $vowel > ニ | '~y' ;"
"na <> ナ ;"
"ni <> ニ ;"
"nu <> ヌ ;"
"ne <> ネ ;"
"no <> ;"
"o <> オ ;"
"p | '~' < ピ } $small_y ;"
"py } $vowel > ピ | '~y' ;"
"pa <> パ ;"
"pi <> ピ ;"
"pu <> プ ;"
"pe <> ペ ;"
"po <> ポ ;"
"h | '~' < ヒ } $small_y ;"
"hy } $vowel > ヒ | '~y' ;"
"ha <> ハ ;"
"hi <> ヒ ;"
"hu <> ヘゥ ;"
"he <> ヘ ;"
"ho <> ホ ;"
// f | '~' < フ } $small_y ;
// f } $vowel > フ | '~' ;
"fa <> ファ ;"
"fi <> フィ ;"
"fe <> フェ ;"
"fo <> フォ ;"
"fu <> フ ;"
"r | '~' < リ } $small_y ;"
"ry } $vowel > リ | '~y' ;"
"ra <> ラ ;"
"ri <> リ ;"
"ru <> ル ;"
"re <> レ ;"
"ro <> ロ ;"
"za <> ザ ;"
"zi <> ゼィ ;"
"zu <> ズ ;"
"ze <> ゼ ;"
"zo <> ゾ ;"
"sa <> サ ;"
"si <> セィ ;"
"su <> ス ;"
"se <> セ ;"
"so <> ソ ;"
"sha < シャ ;"
"shi'~i' < シィ ;" // liu
"shu < シュ ;"
"she < シェ ;"
"sho < ショ ;"
"shi <> シ ;"
"sh } $vowel > シ | '~y' ;"
"ta <> タ ;"
"ti <> ティ ;"
"tu <> テゥ ;"
"te <> テ ;"
"to <> ト ;"
"tsu <> ツ ;"
// v } $vowel > ヴ | '~' ;
//'v~a' < ヴァ ; # liu
//'v~i' < ヴィ ; # liu
//'v~e' < ヴェ ; # liu
//'v~o' < ヴォ ; # liu
"vu <> ヴ ;"
"u <> ウ ;"
// w } $vowel > ウ | '~' ;
"wa <> ワ ;"
"wi <> ヰ ;"
"wu > ウ ;"
"we <> ヱ ;"
"wo <> ヲ ;"
"ya <> ヤ ;"
"yi > イ ;"
"yu <> ユ ;"
"ye > エ ;"
"yo <> ヨ ;"
// double consonants
//specials
"s } sh > ッ ;"
"t } ch > ッ ;"
//voiced
"j } j <> ッ } $j_start ;"
"b } b <> ッ } [$h_start$f_start] $voice;"
"d } d <> ッ } $t_start $voice;"
"g } g <> ッ } $k_start $voice;"
"p } p <> ッ } [$h_start$f_start] $semivoice;"
// v } v <> ッ } [ワヰウヱヲう] $voice ;
"z } z <> ッ } $s_start $voice;"
"v } v <> ッ } $v_start;"
// normal
"k } k <> ッ } $k_start ;"
"m } m <> ッ } $m_start ;"
"n } n <> ッ } $n_start ;"
"h } h <> ッ } $h_start ;"
"f } f <> ッ } $f_start ;"
"r } r <> ッ } $r_start ;"
"t } t <> ッ } $t_start ;"
"s } s <> ッ } $s_start ;"
"w } w <> ッ } $w_start;"
"y } y <> ッ } $y_start;"
// completeness
"x } x > ッ ;"
"c } k > ッ ;"
"c } c > ッ ;"
"c } q > ッ ;"
"l } l > ッ ;"
"q } q > ッ ;"
// y } y > ッ ;
// w } w > ッ ;
// prolonged vowel mark. this indicates a doubling of
// the preceding vowel sound
//a < a { ー ; # liu
//e < e { ー ; # liu
//i < i { ー ; # liu
//o < o { ー ; # liu
//u < u { ー ; # liu
"$macron <> ー ;"
// small forms
"'~a' <> ァ ;"
"'~i' <> ィ ;"
"'~u' <> ゥ ;"
"'~e' <> ェ ;"
"'~o' <> ォ ;"
"'~tsu' <> ッ ;"
"'~wa' <> ヮ ;"
"'~ya' <> ャ ;"
"'~yi' > ィ ;"
"'~yu' <> ュ ;"
"'~ye' > ェ ;"
"'~yo' <> ョ ;"
// h- rule: lengthens vowel if not followed by a vowel
"[aeiou] } h > ー ;"
// one-way latin- > kana rules. these do not occur in
// well-formed romaji representing actual japanese text.
// their purpose is to make all romaji map to kana of
// some sort.
// the following are not really necessary, but produce
// slightly more natural results.
"cy > セィ ;"
"dy > ディ ;"
"hy > ヒ ;"
"sy > セィ ;"
"ty > ティ ;"
"zy > ゼィ ;"
"h > ヘ ;"
// isolated consonants listed here so as not to mask
// longer rules above.
"ch > チ;"
"sh > シ ;"
"dz > ヅ ;"
"dj > ヂ;"
"b > ブ ;"
"d > デ ;"
"g > グ ;"
"k > ク ;"
"m > ム ;"
"n'' < ン } $n_quoter ;"
"n <> ン ;"
"p > プ ;"
"r > ル ;"
"s > ス ;"
"t > テ ;"
"y > イ ;"
"z > ズ ;"
"v > ヴ ;"
"f > フ;"
"j > ジ;"
"w > ウ;"
// simple substitutions using backup
"c > | k ;"
"l > | r ;"
"q > | k ;"
"x > | ks ;"
// ~~~ END shared rules ~~~
//------------------------------------------------------
// Final cleanup
"'~' > ;" // delete stray tildes between letters
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
":: NFC (NFD) ;"
// eof
}
}

View File

@ -0,0 +1,115 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Malayalam_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Malayalam_InterIndic
translit_Malayalam_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Malayalam_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Malayalam_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:06 2001
//--------------------------------------------------------------------
// Malayalam-InterIndic
//:: NFD (NFC) ;
"\u0d02>\ue002;" // SIGN ANUSVARA
"\u0d03>\ue003;" // SIGN VISARGA
"\u0d05>\ue005;" // LETTER A
"\u0d06>\ue006;" // LETTER AA
"\u0d07>\ue007;" // LETTER I
"\u0d08>\ue008;" // LETTER II
"\u0d09>\ue009;" // LETTER U
"\u0d0a>\ue00a;" // LETTER UU
"\u0d0b>\ue00b;" // LETTER VOCALIC R
"\u0d0c>\ue00c;" // LETTER VOCALIC L
"\u0d0e>\ue00e;" // LETTER E
"\u0d0f>\ue00f;" // LETTER EE
"\u0d10>\ue010;" // LETTER AI
"\u0d12>\ue012;" // LETTER O
"\u0d13>\ue013;" // LETTER OO
"\u0d14>\ue014;" // LETTER AU
"\u0d15>\ue015;" // LETTER KA
"\u0d16>\ue016;" // LETTER KHA
"\u0d17>\ue017;" // LETTER GA
"\u0d18>\ue018;" // LETTER GHA
"\u0d19>\ue019;" // LETTER NGA
"\u0d1a>\ue01a;" // LETTER CA
"\u0d1b>\ue01b;" // LETTER CHA
"\u0d1c>\ue01c;" // LETTER JA
"\u0d1d>\ue01d;" // LETTER JHA
"\u0d1e>\ue01e;" // LETTER NYA
"\u0d1f>\ue01f;" // LETTER TTA
"\u0d20>\ue020;" // LETTER TTHA
"\u0d21>\ue021;" // LETTER DDA
"\u0d22>\ue022;" // LETTER DDHA
"\u0d23>\ue023;" // LETTER NNA
"\u0d24>\ue024;" // LETTER TA
"\u0d25>\ue025;" // LETTER THA
"\u0d26>\ue026;" // LETTER DA
"\u0d27>\ue027;" // LETTER DHA
"\u0d28>\ue028;" // LETTER NA
"\u0d2a>\ue02a;" // LETTER PA
"\u0d2b>\ue02b;" // LETTER PHA
"\u0d2c>\ue02c;" // LETTER BA
"\u0d2d>\ue02d;" // LETTER BHA
"\u0d2e>\ue02e;" // LETTER MA
"\u0d2f>\ue02f;" // LETTER YA
"\u0d30>\ue030;" // LETTER RA
"\u0d31>\ue031;" // LETTER RRA
"\u0d32>\ue032;" // LETTER LA
"\u0d33>\ue033;" // LETTER LLA
"\u0d34>\ue034;" // LETTER LLLA
"\u0d35>\ue035;" // LETTER VA
"\u0d36>\ue036;" // LETTER SHA
"\u0d37>\ue037;" // LETTER SSA
"\u0d38>\ue038;" // LETTER SA
"\u0d39>\ue039;" // LETTER HA
"\u0d3e>\ue03e;" // VOWEL SIGN AA
"\u0d3f>\ue03f;" // VOWEL SIGN I
"\u0d40>\ue040;" // VOWEL SIGN II
"\u0d41>\ue041;" // VOWEL SIGN U
"\u0d42>\ue042;" // VOWEL SIGN UU
"\u0d43>\ue043;" // VOWEL SIGN VOCALIC R
"\u0d46>\ue046;" // VOWEL SIGN E
"\u0d47>\ue047;" // VOWEL SIGN EE
"\u0d48>\ue048;" // VOWEL SIGN AI
"\u0d4a>\ue04a;" // VOWEL SIGN O
"\u0d4b>\ue04b;" // VOWEL SIGN OO
"\u0d4c>\ue04c;" // VOWEL SIGN AU
"\u0d4d>\ue04d;" // SIGN VIRAMA
"\u0d57>\ue057;" // AU LENGTH MARK
"\u0d60>\ue060;" // LETTER VOCALIC RR
"\u0d61>\ue061;" // LETTER VOCALIC LL
"\u0d66>\ue066;" // DIGIT ZERO
"\u0d67>\ue067;" // DIGIT ONE
"\u0d68>\ue068;" // DIGIT TWO
"\u0d69>\ue069;" // DIGIT THREE
"\u0d6a>\ue06a;" // DIGIT FOUR
"\u0d6b>\ue06b;" // DIGIT FIVE
"\u0d6c>\ue06c;" // DIGIT SIX
"\u0d6d>\ue06d;" // DIGIT SEVEN
"\u0d6e>\ue06e;" // DIGIT EIGHT
"\u0d6f>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,116 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Oriya_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Oriya_InterIndic
translit_Oriya_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Oriya_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Oriya_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:07 2001
//--------------------------------------------------------------------
// Oriya-InterIndic
//:: NFD (NFC) ;
"\u0b01>\ue001;" // SIGN CANDRABINDU
"\u0b02>\ue002;" // SIGN ANUSVARA
"\u0b03>\ue003;" // SIGN VISARGA
"\u0b05>\ue005;" // LETTER A
"\u0b06>\ue006;" // LETTER AA
"\u0b07>\ue007;" // LETTER I
"\u0b08>\ue008;" // LETTER II
"\u0b09>\ue009;" // LETTER U
"\u0b0a>\ue00a;" // LETTER UU
"\u0b0b>\ue00b;" // LETTER VOCALIC R
"\u0b0c>\ue00c;" // LETTER VOCALIC L
"\u0b0f>\ue00f;" // LETTER E
"\u0b10>\ue010;" // LETTER AI
"\u0b13>\ue013;" // LETTER O
"\u0b14>\ue014;" // LETTER AU
"\u0b15>\ue015;" // LETTER KA
"\u0b16>\ue016;" // LETTER KHA
"\u0b17>\ue017;" // LETTER GA
"\u0b18>\ue018;" // LETTER GHA
"\u0b19>\ue019;" // LETTER NGA
"\u0b1a>\ue01a;" // LETTER CA
"\u0b1b>\ue01b;" // LETTER CHA
"\u0b1c>\ue01c;" // LETTER JA
"\u0b1d>\ue01d;" // LETTER JHA
"\u0b1e>\ue01e;" // LETTER NYA
"\u0b1f>\ue01f;" // LETTER TTA
"\u0b20>\ue020;" // LETTER TTHA
"\u0b21>\ue021;" // LETTER DDA
"\u0b22>\ue022;" // LETTER DDHA
"\u0b23>\ue023;" // LETTER NNA
"\u0b24>\ue024;" // LETTER TA
"\u0b25>\ue025;" // LETTER THA
"\u0b26>\ue026;" // LETTER DA
"\u0b27>\ue027;" // LETTER DHA
"\u0b28>\ue028;" // LETTER NA
"\u0b2a>\ue02a;" // LETTER PA
"\u0b2b>\ue02b;" // LETTER PHA
"\u0b2c>\ue02c;" // LETTER BA
"\u0b2d>\ue02d;" // LETTER BHA
"\u0b2e>\ue02e;" // LETTER MA
"\u0b2f>\ue02f;" // LETTER YA
"\u0b30>\ue030;" // LETTER RA
"\u0b32>\ue032;" // LETTER LA
"\u0b33>\ue033;" // LETTER LLA
"\u0b36>\ue036;" // LETTER SHA
"\u0b37>\ue037;" // LETTER SSA
"\u0b38>\ue038;" // LETTER SA
"\u0b39>\ue039;" // LETTER HA
"\u0b3c>\ue03c;" // SIGN NUKTA
"\u0b3d>\ue03d;" // SIGN AVAGRAHA
"\u0b3e>\ue03e;" // VOWEL SIGN AA
"\u0b3f>\ue03f;" // VOWEL SIGN I
"\u0b40>\ue040;" // VOWEL SIGN II
"\u0b41>\ue041;" // VOWEL SIGN U
"\u0b42>\ue042;" // VOWEL SIGN UU
"\u0b43>\ue043;" // VOWEL SIGN VOCALIC R
"\u0b47>\ue047;" // VOWEL SIGN E
"\u0b48>\ue048;" // VOWEL SIGN AI
"\u0b4b>\ue04b;" // VOWEL SIGN O
"\u0b4c>\ue04c;" // VOWEL SIGN AU
"\u0b4d>\ue04d;" // SIGN VIRAMA
"\u0b56>\ue056;" // AI LENGTH MARK
"\u0b57>\ue057;" // AU LENGTH MARK
"\u0b5c>\ue05c;" // LETTER RRA
"\u0b5d>\ue05d;" // LETTER RHA
"\u0b5f>\ue05f;" // LETTER YYA
"\u0b60>\ue060;" // LETTER VOCALIC RR
"\u0b61>\ue061;" // LETTER VOCALIC LL
"\u0b66>\ue066;" // DIGIT ZERO
"\u0b67>\ue067;" // DIGIT ONE
"\u0b68>\ue068;" // DIGIT TWO
"\u0b69>\ue069;" // DIGIT THREE
"\u0b6a>\ue06a;" // DIGIT FOUR
"\u0b6b>\ue06b;" // DIGIT FIVE
"\u0b6c>\ue06c;" // DIGIT SIX
"\u0b6d>\ue06d;" // DIGIT SEVEN
"\u0b6e>\ue06e;" // DIGIT EIGHT
"\u0b6f>\ue06f;" // DIGIT NINE
"\u0b70>\ue070;" // ISSHAR
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,98 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Tamil_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Tamil_InterIndic
translit_Tamil_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Tamil_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Tamil_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:07 2001
//--------------------------------------------------------------------
// Tamil-InterIndic
//:: NFD (NFC) ;
"\u0b82>\ue002;" // SIGN ANUSVARA
"\u0b83>\ue003;" // SIGN VISARGA
"\u0b85>\ue005;" // LETTER A
"\u0b86>\ue006;" // LETTER AA
"\u0b87>\ue007;" // LETTER I
"\u0b88>\ue008;" // LETTER II
"\u0b89>\ue009;" // LETTER U
"\u0b8a>\ue00a;" // LETTER UU
"\u0b8e>\ue00e;" // LETTER E
"\u0b8f>\ue00f;" // LETTER EE
"\u0b90>\ue010;" // LETTER AI
"\u0b92>\ue012;" // LETTER O
"\u0b93>\ue013;" // LETTER OO
"\u0b94>\ue014;" // LETTER AU
"\u0b95>\ue015;" // LETTER KA
"\u0b99>\ue019;" // LETTER NGA
"\u0b9a>\ue01a;" // LETTER CA
"\u0b9c>\ue01c;" // LETTER JA
"\u0b9e>\ue01e;" // LETTER NYA
"\u0b9f>\ue01f;" // LETTER TTA
"\u0ba3>\ue023;" // LETTER NNA
"\u0ba4>\ue024;" // LETTER TA
"\u0ba8>\ue028;" // LETTER NA
"\u0ba9>\ue029;" // LETTER NNNA
"\u0baa>\ue02a;" // LETTER PA
"\u0bae>\ue02e;" // LETTER MA
"\u0baf>\ue02f;" // LETTER YA
"\u0bb0>\ue030;" // LETTER RA
"\u0bb1>\ue031;" // LETTER RRA
"\u0bb2>\ue032;" // LETTER LA
"\u0bb3>\ue033;" // LETTER LLA
"\u0bb4>\ue034;" // LETTER LLLA
"\u0bb5>\ue035;" // LETTER VA
"\u0bb7>\ue037;" // LETTER SSA
"\u0bb8>\ue038;" // LETTER SA
"\u0bb9>\ue039;" // LETTER HA
"\u0bbe>\ue03e;" // VOWEL SIGN AA
"\u0bbf>\ue03f;" // VOWEL SIGN I
"\u0bc0>\ue040;" // VOWEL SIGN II
"\u0bc1>\ue041;" // VOWEL SIGN U
"\u0bc2>\ue042;" // VOWEL SIGN UU
"\u0bc6>\ue046;" // VOWEL SIGN E
"\u0bc7>\ue047;" // VOWEL SIGN EE
"\u0bc8>\ue048;" // VOWEL SIGN AI
"\u0bca>\ue04a;" // VOWEL SIGN O
"\u0bcb>\ue04b;" // VOWEL SIGN OO
"\u0bcc>\ue04c;" // VOWEL SIGN AU
"\u0bcd>\ue04d;" // SIGN VIRAMA
"\u0bd7>\ue057;" // AU LENGTH MARK
"\u0be7>\ue067;" // DIGIT ONE
"\u0be8>\ue068;" // DIGIT TWO
"\u0be9>\ue069;" // DIGIT THREE
"\u0bea>\ue06a;" // DIGIT FOUR
"\u0beb>\ue06b;" // DIGIT FIVE
"\u0bec>\ue06c;" // DIGIT SIX
"\u0bed>\ue06d;" // DIGIT SEVEN
"\u0bee>\ue06e;" // DIGIT EIGHT
"\u0bef>\ue06f;" // DIGIT NINE
// \u0bf0>; # UNMAPPED Tamil-InterIndic: NUMBER TEN
// \u0bf1>; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
// \u0bf2>; # UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,117 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Telugu_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Telugu_InterIndic
translit_Telugu_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Telugu_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Telugu_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:07 2001
//--------------------------------------------------------------------
// Telugu-InterIndic
//:: NFD (NFC) ;
"\u0c01>\ue001;" // SIGN CANDRABINDU
"\u0c02>\ue002;" // SIGN ANUSVARA
"\u0c03>\ue003;" // SIGN VISARGA
"\u0c05>\ue005;" // LETTER A
"\u0c06>\ue006;" // LETTER AA
"\u0c07>\ue007;" // LETTER I
"\u0c08>\ue008;" // LETTER II
"\u0c09>\ue009;" // LETTER U
"\u0c0a>\ue00a;" // LETTER UU
"\u0c0b>\ue00b;" // LETTER VOCALIC R
"\u0c0c>\ue00c;" // LETTER VOCALIC L
"\u0c0e>\ue00e;" // LETTER E
"\u0c0f>\ue00f;" // LETTER EE
"\u0c10>\ue010;" // LETTER AI
"\u0c12>\ue012;" // LETTER O
"\u0c13>\ue013;" // LETTER OO
"\u0c14>\ue014;" // LETTER AU
"\u0c15>\ue015;" // LETTER KA
"\u0c16>\ue016;" // LETTER KHA
"\u0c17>\ue017;" // LETTER GA
"\u0c18>\ue018;" // LETTER GHA
"\u0c19>\ue019;" // LETTER NGA
"\u0c1a>\ue01a;" // LETTER CA
"\u0c1b>\ue01b;" // LETTER CHA
"\u0c1c>\ue01c;" // LETTER JA
"\u0c1d>\ue01d;" // LETTER JHA
"\u0c1e>\ue01e;" // LETTER NYA
"\u0c1f>\ue01f;" // LETTER TTA
"\u0c20>\ue020;" // LETTER TTHA
"\u0c21>\ue021;" // LETTER DDA
"\u0c22>\ue022;" // LETTER DDHA
"\u0c23>\ue023;" // LETTER NNA
"\u0c24>\ue024;" // LETTER TA
"\u0c25>\ue025;" // LETTER THA
"\u0c26>\ue026;" // LETTER DA
"\u0c27>\ue027;" // LETTER DHA
"\u0c28>\ue028;" // LETTER NA
"\u0c2a>\ue02a;" // LETTER PA
"\u0c2b>\ue02b;" // LETTER PHA
"\u0c2c>\ue02c;" // LETTER BA
"\u0c2d>\ue02d;" // LETTER BHA
"\u0c2e>\ue02e;" // LETTER MA
"\u0c2f>\ue02f;" // LETTER YA
"\u0c30>\ue030;" // LETTER RA
"\u0c31>\ue031;" // LETTER RRA
"\u0c32>\ue032;" // LETTER LA
"\u0c33>\ue033;" // LETTER LLA
"\u0c35>\ue035;" // LETTER VA
"\u0c36>\ue036;" // LETTER SHA
"\u0c37>\ue037;" // LETTER SSA
"\u0c38>\ue038;" // LETTER SA
"\u0c39>\ue039;" // LETTER HA
"\u0c3e>\ue03e;" // VOWEL SIGN AA
"\u0c3f>\ue03f;" // VOWEL SIGN I
"\u0c40>\ue040;" // VOWEL SIGN II
"\u0c41>\ue041;" // VOWEL SIGN U
"\u0c42>\ue042;" // VOWEL SIGN UU
"\u0c43>\ue043;" // VOWEL SIGN VOCALIC R
"\u0c44>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0c46>\ue046;" // VOWEL SIGN E
"\u0c47>\ue047;" // VOWEL SIGN EE
"\u0c48>\ue048;" // VOWEL SIGN AI
"\u0c4a>\ue04a;" // VOWEL SIGN O
"\u0c4b>\ue04b;" // VOWEL SIGN OO
"\u0c4c>\ue04c;" // VOWEL SIGN AU
"\u0c4d>\ue04d;" // SIGN VIRAMA
"\u0c55>\ue055;" // LENGTH MARK
"\u0c56>\ue056;" // AI LENGTH MARK
"\u0c60>\ue060;" // LETTER VOCALIC RR
"\u0c61>\ue061;" // LETTER VOCALIC LL
"\u0c66>\ue066;" // DIGIT ZERO
"\u0c67>\ue067;" // DIGIT ONE
"\u0c68>\ue068;" // DIGIT TWO
"\u0c69>\ue069;" // DIGIT THREE
"\u0c6a>\ue06a;" // DIGIT FOUR
"\u0c6b>\ue06b;" // DIGIT FIVE
"\u0c6c>\ue06c;" // DIGIT SIX
"\u0c6d>\ue06d;" // DIGIT SEVEN
"\u0c6e>\ue06e;" // DIGIT EIGHT
"\u0c6f>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -5,7 +5,7 @@
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: Transliterator_index.txt
// Date: Fri Mar 2 12:50:49 2001
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
//--------------------------------------------------------------------
@ -61,77 +61,70 @@ translit_index {
// Bidirectional rule files
{ "Fullwidth-Halfwidth", "file", "fullhalf", "FORWARD" },
{ "Halfwidth-Fullwidth", "file", "fullhalf", "REVERSE" },
{ "Fullwidth-Halfwidth", "file", "translit_Fullwidth_Halfwidth", "FORWARD" },
{ "Halfwidth-Fullwidth", "file", "translit_Fullwidth_Halfwidth", "REVERSE" },
{ "Latin-Arabic", "file", "larabic", "FORWARD" },
{ "Arabic-Latin", "file", "larabic", "REVERSE" },
{ "Latin-Cyrillic", "file", "translit_Cyrillic_Latin", "REVERSE" },
{ "Cyrillic-Latin", "file", "translit_Cyrillic_Latin", "FORWARD" },
{ "Latin-Cyrillic", "file", "lcyril", "FORWARD" },
{ "Cyrillic-Latin", "file", "lcyril", "REVERSE" },
{ "Latin-Greek", "file", "translit_Greek_Latin", "REVERSE" },
{ "Greek-Latin", "file", "translit_Greek_Latin", "FORWARD" },
//{ "Latin-Devanagari", "file", "ldevan", "FORWARD" },
//{ "Devanagari-Latin", "file", "ldevan", "REVERSE" },
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },
{ "Latin-Greek", "file", "lgreek", "FORWARD" },
{ "Greek-Latin", "file", "lgreek", "REVERSE" },
{ "Latin-Katakana", "file", "translit_Latin_Katakana", "FORWARD" },
{ "Katakana-Latin", "file", "translit_Latin_Katakana", "REVERSE" },
{ "Latin-Hebrew", "file", "lhebrew", "FORWARD" },
{ "Hebrew-Latin", "file", "lhebrew", "REVERSE" },
{ "Latin-Hiragana", "file", "translit_Hiragana_Latin", "REVERSE" },
{ "Hiragana-Latin", "file", "translit_Hiragana_Latin", "FORWARD" },
{ "Latin-Jamo", "file", "ljamo", "FORWARD" },
{ "Jamo-Latin", "file", "ljamo", "REVERSE" },
{ "Hiragana-Katakana", "file", "translit_Hiragana_Katakana", "FORWARD" },
{ "Katakana-Hiragana", "file", "translit_Hiragana_Katakana", "REVERSE" },
{ "Latin-Kana", "file", "lkana", "FORWARD" },
{ "Kana-Latin", "file", "lkana", "REVERSE" },
{ "Any-Accents", "file", "translit_Any_Accents", "FORWARD" },
{ "Accents-Any", "file", "translit_Any_Accents", "REVERSE" },
{ "Hiragana-Katakana", "file", "kana", "FORWARD" },
{ "Katakana-Hiragana", "file", "kana", "REVERSE" },
{ "StraightQuotes-CurlyQuotes", "file", "quotes", "FORWARD" },
{ "CurlyQuotes-StraightQuotes", "file", "quotes", "REVERSE" },
{ "Any-Publishing", "file", "translit_Any_Publishing", "FORWARD" },
{ "Publishing-Any", "file", "translit_Any_Publishing", "REVERSE" },
// One way rules (forward only)
// Java only: { "Han-Pinyin", "file", "-", "FORWARD" },
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
// Replaced by algorithmic transliterator:
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
// Compound rules
/// TODO
{ "Latin-Hangul", "alias", "[:Latin:];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
{ "Latin-Hangul", "alias", "[\p{Latin}];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
{ "Hangul-Latin", "alias", "[\uAC00-\uD7AF];NFD;Jamo-Latin", "" },
// Inter-Indic composed rules
{ "Latin-InterIndic", "internal", "Latin_InterIndic", "FORWARD" },
{ "Devanagari-InterIndic", "internal", "Devanagari_InterIndic", "FORWARD" },
{ "Bengali-InterIndic", "internal", "Bengali_InterIndic", "FORWARD" },
{ "Gurmukhi-InterIndic", "internal", "Gurmukhi_InterIndic", "FORWARD" },
{ "Gujarati-InterIndic", "internal", "Gujarati_InterIndic", "FORWARD" },
{ "Oriya-InterIndic", "internal", "Oriya_InterIndic", "FORWARD" },
{ "Tamil-InterIndic", "internal", "Tamil_InterIndic", "FORWARD" },
{ "Telugu-InterIndic", "internal", "Telugu_InterIndic", "FORWARD" },
{ "Kannada-InterIndic", "internal", "Kannada_InterIndic", "FORWARD" },
{ "Malayalam-InterIndic", "internal", "Malayalam_InterIndic", "FORWARD" },
{ "Latin-InterIndic", "internal", "translit_Latin_InterIndic", "FORWARD" },
{ "Devanagari-InterIndic", "internal", "translit_Devanagari_InterIndic", "FORWARD" },
{ "Bengali-InterIndic", "internal", "translit_Bengali_InterIndic", "FORWARD" },
{ "Gurmukhi-InterIndic", "internal", "translit_Gurmukhi_InterIndic", "FORWARD" },
{ "Gujarati-InterIndic", "internal", "translit_Gujarati_InterIndic", "FORWARD" },
{ "Oriya-InterIndic", "internal", "translit_Oriya_InterIndic", "FORWARD" },
{ "Tamil-InterIndic", "internal", "translit_Tamil_InterIndic", "FORWARD" },
{ "Telugu-InterIndic", "internal", "translit_Telugu_InterIndic", "FORWARD" },
{ "Kannada-InterIndic", "internal", "translit_Kannada_InterIndic", "FORWARD" },
{ "Malayalam-InterIndic", "internal", "translit_Malayalam_InterIndic", "FORWARD" },
{ "InterIndic-Latin", "internal", "InterIndic_Latin", "FORWARD" },
{ "InterIndic-Devanagari", "internal", "InterIndic_Devanagari", "FORWARD" },
{ "InterIndic-Bengali", "internal", "InterIndic_Bengali", "FORWARD" },
{ "InterIndic-Gurmukhi", "internal", "InterIndic_Gurmukhi", "FORWARD" },
{ "InterIndic-Gujarati", "internal", "InterIndic_Gujarati", "FORWARD" },
{ "InterIndic-Oriya", "internal", "InterIndic_Oriya", "FORWARD" },
{ "InterIndic-Tamil", "internal", "InterIndic_Tamil", "FORWARD" },
{ "InterIndic-Telugu", "internal", "InterIndic_Telugu", "FORWARD" },
{ "InterIndic-Kannada", "internal", "InterIndic_Kannada", "FORWARD" },
{ "InterIndic-Malayalam", "internal", "InterIndic_Malayalam", "FORWARD" },
{ "InterIndic-Latin", "internal", "translit_InterIndic_Latin", "FORWARD" },
{ "InterIndic-Devanagari", "internal", "translit_InterIndic_Devanagari", "FORWARD" },
{ "InterIndic-Bengali", "internal", "translit_InterIndic_Bengali", "FORWARD" },
{ "InterIndic-Gurmukhi", "internal", "translit_InterIndic_Gurmukhi", "FORWARD" },
{ "InterIndic-Gujarati", "internal", "translit_InterIndic_Gujarati", "FORWARD" },
{ "InterIndic-Oriya", "internal", "translit_InterIndic_Oriya", "FORWARD" },
{ "InterIndic-Tamil", "internal", "translit_InterIndic_Tamil", "FORWARD" },
{ "InterIndic-Telugu", "internal", "translit_InterIndic_Telugu", "FORWARD" },
{ "InterIndic-Kannada", "internal", "translit_InterIndic_Kannada", "FORWARD" },
{ "InterIndic-Malayalam", "internal", "translit_InterIndic_Malayalam", "FORWARD" },
//Latin-X transliterators
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
//Latin-Indic transliterators
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Latin-Bengali", "alias", "NFD;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Latin-Gurmukhi", "alias", "NFD;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Latin-Gujarati", "alias", "NFD;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
@ -141,8 +134,8 @@ translit_index {
{ "Latin-Kannada", "alias", "NFD;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Latin-Malayalam", "alias", "NFD;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
//X-Latin transliterators
{ "Devanagari-Latin","alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
//Indic-Latin transliterators
{ "Devanagari-Latin", "alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
{ "Bengali-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
{ "Gurmukhi-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
{ "Gujarati-Latin", "alias", "NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" },
@ -152,7 +145,6 @@ translit_index {
{ "Kannada-Latin", "alias", "NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" },
{ "Malayalam-Latin", "alias", "NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" },
{ "Devanagari-Bengali", "alias", "NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Devanagari-Gurmukhi", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Devanagari-Gujarati", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" },
@ -177,7 +169,7 @@ translit_index {
{ "Gurmukhi-Telugu", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Gurmukhi-Kannada", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Gurmukhi-Malayalam", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Gujarati-Devanagari", "alias", "NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Gujarati-Devanagari", "alias", "Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Gujarati-Bengali", "alias", "NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Gujarati-Gurmukhi", "alias", "NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Gujarati-Oriya", "alias", "NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" },
@ -227,5 +219,6 @@ translit_index {
{ "Malayalam-Kannada", "alias", "NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" },
// eof
}
}

View File

@ -103,28 +103,34 @@ uk.txt uk_UA.txt\
vi.txt vi_VN.txt\
zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
larabic.txt lcyril.txt\
lgreek.txt lhebrew.txt ljamo.txt\
lkana.txt quotes.txt\
Bengali_InterIndic.txt\
Devanagari_InterIndic.txt\
Gujarati_InterIndic.txt\
Gurmukhi_InterIndic.txt\
Kannada_InterIndic.txt\
Malayalam_InterIndic.txt\
Oriya_InterIndic.txt\
Tamil_InterIndic.txt\
Telugu_InterIndic.txt\
InterIndic_Bengali.txt\
InterIndic_Devanagari.txt\
InterIndic_Gujarati.txt\
InterIndic_Gurmukhi.txt\
InterIndic_Kannada.txt\
InterIndic_Malayalam.txt\
InterIndic_Oriya.txt\
InterIndic_Tamil.txt\
InterIndic_Telugu.txt\
Latin_InterIndic.txt\
InterIndic_Latin.txt
TRANSLIT_SOURCE=translit_Any_Accents.txt\
translit_Any_Publishing.txt\
translit_Bengali_InterIndic.txt\
translit_Cyrillic_Latin.txt\
translit_Devanagari_InterIndic.txt\
translit_Fullwidth_Halfwidth.txt\
translit_Greek_Latin.txt\
translit_Gujarati_InterIndic.txt\
translit_Gurmukhi_InterIndic.txt\
translit_Hiragana_Katakana.txt\
translit_Hiragana_Latin.txt\
translit_InterIndic_Bengali.txt\
translit_InterIndic_Devanagari.txt\
translit_InterIndic_Gujarati.txt\
translit_InterIndic_Gurmukhi.txt\
translit_InterIndic_Kannada.txt\
translit_InterIndic_Latin.txt\
translit_InterIndic_Malayalam.txt\
translit_InterIndic_Oriya.txt\
translit_InterIndic_Tamil.txt\
translit_InterIndic_Telugu.txt\
translit_Kannada_InterIndic.txt\
translit_Latin_InterIndic.txt\
translit_Latin_Jamo.txt\
translit_Latin_Katakana.txt\
translit_Malayalam_InterIndic.txt\
translit_Oriya_InterIndic.txt\
translit_Tamil_InterIndic.txt\
translit_Telugu_InterIndic.txt\
translit_index.txt

View File

@ -0,0 +1,311 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Any_Accents.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Any_Accents
translit_Any_Accents {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Any_Accents.txt,v $
// $Date: 2001/10/26 05:41:15 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
":: NFD (NFC) ;"
// to do: make reversible
// define special conversion characters.
// varients of this could use different characters, or set one or the other to null.
"$pre = \< ;"
"$post = \> ;"
// Provide keyboard equivalents for common diacritics used in transliteration
"$pre \` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
"$pre \' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
"$pre \^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
"$pre \~ $post <> \u0303 ;" // COMBINING TILDE
"$pre \- $post <> \u0304 ;" // COMBINING MACRON
"$pre \" $post <> \u0308 ;" // COMBINING DIAERESIS
"$pre \* $post <> \u030A ;" // COMBINING RING ABOVE
"$pre \, $post <> \u0327 ;" // COMBINING CEDILLA
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
"$pre \. $post <> \u0323 ;" // COMBINING DOT BELOW
// Combine common characters
"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE
"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE
"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH
"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH
"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE
"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE
"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN
"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN
"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE
"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE
"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S
"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG
"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG
"$pre T $post <> \u0398 ;" // THETA
"$pre t $post <> \u03B8 ;" // THETA
"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH
"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH
"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH
"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH
"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON
"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON
"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA
"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA
"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O
"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O
"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E
"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E
// three that don't have uppercases
"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP
"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I
"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V
// Additional Characters that may be added in the future
// $pre XXX $post <> \u0306 ; # COMBINING BREVE
// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
// $pre XXX $post <> \u030C ; # COMBINING CARON
// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
// $pre XXX $post <> \u031B ; # COMBINING HORN
// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
// $pre XXX $post <> \u0328 ; # COMBINING OGONEK
// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
":: NFC (NFD) ;"
}
}

View File

@ -0,0 +1,55 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Any_Publishing.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Any_Publishing
translit_Any_Publishing {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Any_Publishing.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
// Test case
// "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
// Variables
"$single = \' ;"
"$space = ' ' ;"
"$double = \" ;"
"$back = \` ;"
"$tab = '\u0008' ;"
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
// fix UNIX quotes
"$back $back > “ ;"
"$back > ;"
// fix typewriter quotes, by context
"$makeRight {$double} <> “ ;"
"$double <> ” ;"
"$makeRight {$single} <> ;"
"$single <> ;"
// fix multiple spaces and hyphens
"$space {$space} > ;"
"'--' <> — ;"
}
}

View File

@ -0,0 +1,121 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Bengali_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Bengali_InterIndic
translit_Bengali_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Bengali_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:57 2001
//--------------------------------------------------------------------
// Bengali-InterIndic
//:: NFD (NFC) ;
"\u0981>\ue001;" // SIGN CANDRABINDU
"\u0982>\ue002;" // SIGN ANUSVARA
"\u0983>\ue003;" // SIGN VISARGA
"\u0985>\ue005;" // LETTER A
"\u0986>\ue006;" // LETTER AA
"\u0987>\ue007;" // LETTER I
"\u0988>\ue008;" // LETTER II
"\u0989>\ue009;" // LETTER U
"\u098a>\ue00a;" // LETTER UU
"\u098b>\ue00b;" // LETTER VOCALIC R
"\u098c>\ue00c;" // LETTER VOCALIC L
"\u098f>\ue00f;" // LETTER E
"\u0990>\ue010;" // LETTER AI
"\u0993>\ue013;" // LETTER O
"\u0994>\ue014;" // LETTER AU
"\u0995>\ue015;" // LETTER KA
"\u0996>\ue016;" // LETTER KHA
"\u0997>\ue017;" // LETTER GA
"\u0998>\ue018;" // LETTER GHA
"\u0999>\ue019;" // LETTER NGA
"\u099a>\ue01a;" // LETTER CA
"\u099b>\ue01b;" // LETTER CHA
"\u099c>\ue01c;" // LETTER JA
"\u099d>\ue01d;" // LETTER JHA
"\u099e>\ue01e;" // LETTER NYA
"\u099f>\ue01f;" // LETTER TTA
"\u09a0>\ue020;" // LETTER TTHA
"\u09a1>\ue021;" // LETTER DDA
"\u09a2>\ue022;" // LETTER DDHA
"\u09a3>\ue023;" // LETTER NNA
"\u09a4>\ue024;" // LETTER TA
"\u09a5>\ue025;" // LETTER THA
"\u09a6>\ue026;" // LETTER DA
"\u09a7>\ue027;" // LETTER DHA
"\u09a8>\ue028;" // LETTER NA
"\u09aa>\ue02a;" // LETTER PA
"\u09ab>\ue02b;" // LETTER PHA
"\u09ac>\ue02c;" // LETTER BA
"\u09ad>\ue02d;" // LETTER BHA
"\u09ae>\ue02e;" // LETTER MA
"\u09af>\ue02f;" // LETTER YA
"\u09b0>\ue030;" // LETTER RA
"\u09b2>\ue032;" // LETTER LA
"\u09b6>\ue036;" // LETTER SHA
"\u09b7>\ue037;" // LETTER SSA
"\u09b8>\ue038;" // LETTER SA
"\u09b9>\ue039;" // LETTER HA
"\u09bc>\ue03c;" // SIGN NUKTA
"\u09be>\ue03e;" // VOWEL SIGN AA
"\u09bf>\ue03f;" // VOWEL SIGN I
"\u09c0>\ue040;" // VOWEL SIGN II
"\u09c1>\ue041;" // VOWEL SIGN U
"\u09c2>\ue042;" // VOWEL SIGN UU
"\u09c3>\ue043;" // VOWEL SIGN VOCALIC R
"\u09c4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u09c7>\ue047;" // VOWEL SIGN E
"\u09c8>\ue048;" // VOWEL SIGN AI
"\u09cb>\ue04b;" // VOWEL SIGN O
"\u09cc>\ue04c;" // VOWEL SIGN AU
"\u09cd>\ue04d;" // SIGN VIRAMA
"\u09d7>\ue057;" // AU LENGTH MARK
"\u09dc>\ue053;" // LETTER RRA
"\u09dd>\ue05d;" // LETTER RHA
"\u09df>\ue05f;" // LETTER YYA
"\u09e0>\ue060;" // LETTER VOCALIC RR
"\u09e1>\ue061;" // LETTER VOCALIC LL
"\u09e2>\ue062;" // VOWEL SIGN VOCALIC L
"\u09e3>\ue063;" // VOWEL SIGN VOCALIC LL
"\u09e6>\ue066;" // DIGIT ZERO
"\u09e7>\ue067;" // DIGIT ONE
"\u09e8>\ue068;" // DIGIT TWO
"\u09e9>\ue069;" // DIGIT THREE
"\u09ea>\ue06a;" // DIGIT FOUR
"\u09eb>\ue06b;" // DIGIT FIVE
"\u09ec>\ue06c;" // DIGIT SIX
"\u09ed>\ue06d;" // DIGIT SEVEN
"\u09ee>\ue06e;" // DIGIT EIGHT
"\u09ef>\ue06f;" // DIGIT NINE
// \u09f0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
// \u09f1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
// \u09f2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK
// \u09f3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN
// \u09f4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
// \u09f5>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR TWO
// \u09f6>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR THREE
// \u09f7>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
// \u09f8>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
// \u09f9>; # UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
"\u09fa>\ue070;" // ISSHAR
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,316 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Cyrillic_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Cyrillic_Latin
translit_Cyrillic_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Cyrl_Latn.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
// TODO: add remaining characters
// Should add variants for Russian-English, Russian-German
// Those can use this as a base, and then remap cases
// like a $hat to ya or ja.
":: NFD (NFC) ;"
"$modprime = \u02B9;"
"$modprime2 = \u02BA;"
"$grave = \u0300;"
"$acute = \u0301;"
"$hat = \u0302;"
"$breve = \u0306 ;"
"$dot = \u0307 ;"
"$caron = \u030C ;"
"$comma = \u0326 ;"
// move up so not masked
"я <> a $hat ;" // CYRILLIC SMALL LETTER YA
"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA
"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE
"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE
// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
"э <> e $acute;" // CYRILLIC SMALL LETTER E
"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E
"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE
"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE
"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA
"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA
"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA
"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA
"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE
"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE
// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU
"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU
"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
"ј <> j $caron;" // CYRILLIC SMALL LETTER JE
"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE
"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE
"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE
"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE
"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE
"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE
"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE
"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE
"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE
// Normal order
"а <> a ;" // CYRILLIC SMALL LETTER A
"А <> A ;" // CYRILLIC CAPITAL LETTER A
"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA
"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA
"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE
"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE
"б <> b ;" // CYRILLIC SMALL LETTER BE
"Б <> B ;" // CYRILLIC CAPITAL LETTER BE
"в <> v ;" // CYRILLIC SMALL LETTER VE
"В <> V ;" // CYRILLIC CAPITAL LETTER VE
"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN
"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE
"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE
"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
"г <> g ;" // CYRILLIC SMALL LETTER GHE
"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE
"д <> d;" // CYRILLIC SMALL LETTER DE
"Д <> D;" // CYRILLIC CAPITAL LETTER DE
"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE
"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE
"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER
"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
"е <> e ;" // CYRILLIC SMALL LETTER IE
"Е <> E;" // CYRILLIC CAPITAL LETTER IE
"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE
"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE
// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
"з <> z ;" // CYRILLIC SMALL LETTER ZE
"З <> Z;" // CYRILLIC CAPITAL LETTER ZE
"й <> j ;" // CYRILLIC SMALL LETTER I
"Й <> J ;" // CYRILLIC CAPITAL LETTER I
"и <> i ;" // CYRILLIC SMALL LETTER I
"И <> I ;" // CYRILLIC CAPITAL LETTER I
"к <> k ;" // CYRILLIC SMALL LETTER KA
"К <> K;" // CYRILLIC CAPITAL LETTER KA
// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
"л <> l ;" // CYRILLIC SMALL LETTER EL
"Л <> L;" // CYRILLIC CAPITAL LETTER EL
"м <> m ;" // CYRILLIC SMALL LETTER EM
"М <> M ;" // CYRILLIC CAPITAL LETTER EM
"н <> n ;" // CYRILLIC SMALL LETTER EN
"Н <> N;" // CYRILLIC CAPITAL LETTER EN
// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
"о <> o ;" // CYRILLIC SMALL LETTER O
"О <> O ;" // CYRILLIC CAPITAL LETTER O
// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
"п <> p ;" // CYRILLIC SMALL LETTER PE
"П <> P ;" // CYRILLIC CAPITAL LETTER PE
// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
"р <> r ;" // CYRILLIC SMALL LETTER ER
"Р <> R ;" // CYRILLIC CAPITAL LETTER ER
// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
"с <> s ;" // CYRILLIC SMALL LETTER ES
"С <> S ;" // CYRILLIC CAPITAL LETTER ES
// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
"т <> t ;" // CYRILLIC SMALL LETTER TE
"Т <> T ;" // CYRILLIC CAPITAL LETTER TE
// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
"у <> u ;" // CYRILLIC SMALL LETTER U
"У <> U ;" // CYRILLIC CAPITAL LETTER U
// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
"ф <> f ;" // CYRILLIC SMALL LETTER EF
"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF
"х <> h ;" // CYRILLIC SMALL LETTER HA
"Х <> H;" // CYRILLIC CAPITAL LETTER HA
// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
"ц <> c ;" // CYRILLIC SMALL LETTER TSE
"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE
// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
"ъ <> [:Ll:] { $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN
"Ъ <> $modprime2 ;" // CYRILLIC CAPITAL LETTER HARD SIGN
"ы <> y ;" // CYRILLIC SMALL LETTER YERU
"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU
"ь <> [:Ll:] { $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN
"Ь <> $modprime ;" // CYRILLIC CAPITAL LETTER SOFT SIGN
// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A
//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A
//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ё <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O
//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA
//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ў <> XXX ; # CYRILLIC SMALL LETTER U
//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E
//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
// Completeness
"$ignore = [[:Mark:]''] * ;"
"| k < q ;"
"| K < Q ;"
"| u < w ;"
"| U < W ;"
"| KS < X } $ignore [:UppercaseLetter:] ;"
"| KS < [:UppercaseLetter:] $ignore { X ;"
"| Ks < X ;"
"| ks < x ;"
":: NFC (NFD) ;"
}
}

View File

@ -0,0 +1,147 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Devanagari_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Devanagari_InterIndic
translit_Devanagari_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Devanagari_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:57 2001
//--------------------------------------------------------------------
// Devanagari-InterIndic
// :: NFD;
//Rules for Decomposed characters
"\u0928\u093c > \ue029;" //\u0929
"\u0930\u093c > \ue031;" //\u0932
"\u0933\u093c > \ue034;" //\u0934
"\u0915\u093c > \ue058;" //\u0958 LETTER QA (For Urdu)
"\u0916\u093c > \ue059;" //\u0959 LETTER KHHA (For Urdu)
"\u0917\u093c > \ue05a;" //\u095a LETTER GHHA (For Urdu)
"\u091c\u093c > \ue05b;" //\u095b LETTER ZA (For Urdu)
"\u0921\u093c > \ue05c;" //\u095c LETTER DDDHA (pronounced RRA)
"\u0922\u093c > \ue05d;" //\u095d LETTER RHA (pronounced RRHA)
"\u092b\u093c > \ue05e;" //\u095e LETTER FA
"\u092f\u093c > \ue05f;" //\u095f LETTER YYA
"\u0901>\ue001;" // SIGN CANDRABINDU
"\u0902>\ue002;" // SIGN ANUSVARA
"\u0903>\ue003;" // SIGN VISARGA
"\u0905>\ue005;" // LETTER A
"\u0906>\ue006;" // LETTER AA
"\u0907>\ue007;" // LETTER I
"\u0908>\ue008;" // LETTER II
"\u0909>\ue009;" // LETTER U
"\u090a>\ue00a;" // LETTER UU
"\u090b>\ue00b;" // LETTER VOCALIC R
"\u090c>\ue00c;" // LETTER VOCALIC L
"\u090d>\ue00d;" // LETTER CANDRA E (For representing English sounds)
//\u090e>\ue00e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
"\u090f>\ue00f;" // LETTER E
"\u0910>\ue010;" // LETTER AI
"\u0911>\ue011;" // LETTER CANDRA O (For representing English sounds)
//\u0912>\ue012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
"\u0913>\ue013;" // LETTER O
"\u0914>\ue014;" // LETTER AU
"\u0915>\ue015;" // LETTER KA
"\u0916>\ue016;" // LETTER KHA
"\u0917>\ue017;" // LETTER GA
"\u0918>\ue018;" // LETTER GHA
"\u0919>\ue019;" // LETTER NGA
"\u091a>\ue01a;" // LETTER CA
"\u091b>\ue01b;" // LETTER CHA
"\u091c>\ue01c;" // LETTER JA
"\u091d>\ue01d;" // LETTER JHA
"\u091e>\ue01e;" // LETTER NYA
"\u091f>\ue01f;" // LETTER TTA
"\u0920>\ue020;" // LETTER TTHA
"\u0921>\ue021;" // LETTER DDA
"\u0922>\ue022;" // LETTER DDHA
"\u0923>\ue023;" // LETTER NNA
"\u0924>\ue024;" // LETTER TA
"\u0925>\ue025;" // LETTER THA
"\u0926>\ue026;" // LETTER DA
"\u0927>\ue027;" // LETTER DHA
"\u0928>\ue028;" // LETTER NA
"\u0929>\ue029;" // LETTER NNNA
"\u092a>\ue02a;" // LETTER PA
"\u092b>\ue02b;" // LETTER PHA
"\u092c>\ue02c;" // LETTER BA
"\u092d>\ue02d;" // LETTER BHA
"\u092e>\ue02e;" // LETTER MA
"\u092f>\ue02f;" // LETTER YA
"\u0930>\ue030;" // LETTER RA
//\u0931>\ue031; # UNMAPPED LETTER RRA (Eyelash RA for Southern scripts)
"\u0932>\ue032;" // LETTER LA
"\u0933>\ue033;" // LETTER LLA
//\u0934>\ue034; # UNMAPPED LETTER LLLA (LLLA for Southern scripts)
"\u0935>\ue035;" // LETTER VA
"\u0936>\ue036;" // LETTER SHA
"\u0937>\ue037;" // LETTER SSA
"\u0938>\ue038;" // LETTER SA
"\u0939>\ue039;" // LETTER HA
"\u093c>\ue03c;" // SIGN NUKTA
"\u093d>\ue03d;" // SIGN AVAGRAHA
"\u093e>\ue03e;" // VOWEL SIGN AA
"\u093f>\ue03f;" // VOWEL SIGN I
"\u0940>\ue040;" // VOWEL SIGN II
"\u0941>\ue041;" // VOWEL SIGN U
"\u0942>\ue042;" // VOWEL SIGN UU
"\u0943>\ue043;" // VOWEL SIGN VOCALIC R
"\u0944>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0945>\ue045;" // VOWEL SIGN CANDRA E
//\u0946>\ue046; # UNMAPPED VOWEL SIGN SHORT E
"\u0947>\ue047;" // VOWEL SIGN E
"\u0948>\ue048;" // VOWEL SIGN AI
"\u0949>\ue049;" // VOWEL SIGN CANDRA O
//\u094a>\ue04a; # UNMAPPED VOWEL SIGN SHORT O
"\u094b>\ue04b;" // VOWEL SIGN O
"\u094c>\ue04c;" // VOWEL SIGN AU
"\u094d>\ue04d;" // SIGN VIRAMA
"\u0950>\ue050;" // OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"\u0958>\ue058;" // LETTER QA (For Urdu)
"\u0959>\ue059;" // LETTER KHHA (For Urdu)
"\u095a>\ue05a;" // LETTER GHHA (For Urdu)
"\u095b>\ue05b;" // LETTER ZA (For Urdu)
"\u095c>\ue05c;" // LETTER DDDHA (pronounced RRA)
"\u095d>\ue05d;" // LETTER RHA (pronounced RRHA)
"\u095e>\ue05e;" // LETTER FA
"\u095f>\ue05f;" // LETTER YYA
"\u0960>\ue060;" // LETTER VOCALIC RR
"\u0961>\ue061;" // LETTER VOCALIC LL
"\u0962>\ue062;" // VOWEL SIGN VOCALIC L
"\u0963>\ue063;" // VOWEL SIGN VOCALIC LL
// \u0964>; # UNMAPPED Devanagari-InterIndic: DANDA
// \u0965>; # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
"\u0966>\ue066;" // DIGIT ZERO
"\u0967>\ue067;" // DIGIT ONE
"\u0968>\ue068;" // DIGIT TWO
"\u0969>\ue069;" // DIGIT THREE
"\u096a>\ue06a;" // DIGIT FOUR
"\u096b>\ue06b;" // DIGIT FIVE
"\u096c>\ue06c;" // DIGIT SIX
"\u096d>\ue06d;" // DIGIT SEVEN
"\u096e>\ue06e;" // DIGIT EIGHT
"\u096f>\ue06f;" // DIGIT NINE
// \u0970>; # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
// :: NFC (NFD) ;
}
}

View File

@ -0,0 +1,287 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Fullwidth_Halfwidth
translit_Fullwidth_Halfwidth {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:57 2001
//--------------------------------------------------------------------
// Fullwidth-Halfwidth
// Mechanically generated from Unicode Character Database
// multicharacter
"ガ<>ガ;" // to KATAKANA LETTER GA
"ギ<>ギ;" // to KATAKANA LETTER GI
"グ<>グ;" // to KATAKANA LETTER GU
"ゲ<>ゲ;" // to KATAKANA LETTER GE
"ゴ<>ゴ;" // to KATAKANA LETTER GO
"ザ<>ザ;" // to KATAKANA LETTER ZA
"ジ<>ジ;" // to KATAKANA LETTER ZI
"ズ<>ズ;" // to KATAKANA LETTER ZU
"ゼ<>ゼ;" // to KATAKANA LETTER ZE
"ゾ<>ゾ;" // to KATAKANA LETTER ZO
"ダ<>ダ;" // to KATAKANA LETTER DA
"ヂ<>ヂ;" // to KATAKANA LETTER DI
"ヅ<>ヅ;" // to KATAKANA LETTER DU
"デ<>デ;" // to KATAKANA LETTER DE
"ド<>ド;" // to KATAKANA LETTER DO
"バ<>バ;" // to KATAKANA LETTER BA
"パ<>パ;" // to KATAKANA LETTER PA
"ビ<>ビ;" // to KATAKANA LETTER BI
"ピ<>ピ;" // to KATAKANA LETTER PI
"ブ<>ブ;" // to KATAKANA LETTER BU
"プ<>プ;" // to KATAKANA LETTER PU
"ベ<>ベ;" // to KATAKANA LETTER BE
"ペ<>ペ;" // to KATAKANA LETTER PE
"ボ<>ボ;" // to KATAKANA LETTER BO
"ポ<>ポ;" // to KATAKANA LETTER PO
"ヴ<>ヴ;" // to KATAKANA LETTER VU
"ヷ<>ヷ;" // to KATAKANA LETTER VA
"ヺ<>ヺ;" // to KATAKANA LETTER VO
// single character
"<>'!';" // from FULLWIDTH EXCLAMATION MARK
"<>'\"';" // from FULLWIDTH QUOTATION MARK
"<>'#';" // from FULLWIDTH NUMBER SIGN
"<>'$';" // from FULLWIDTH DOLLAR SIGN
"<>'%';" // from FULLWIDTH PERCENT SIGN
"<>'&';" // from FULLWIDTH AMPERSAND
"<>'';" // from FULLWIDTH APOSTROPHE
"<>'(';" // from FULLWIDTH LEFT PARENTHESIS
"<>')';" // from FULLWIDTH RIGHT PARENTHESIS
"<>'*';" // from FULLWIDTH ASTERISK
"<>'+';" // from FULLWIDTH PLUS SIGN
"<>',';" // from FULLWIDTH COMMA
"<>'-';" // from FULLWIDTH HYPHEN-MINUS
"<>'.';" // from FULLWIDTH FULL STOP
"<>'/';" // from FULLWIDTH SOLIDUS
"<>'0';" // from FULLWIDTH DIGIT ZERO
"<>'1';" // from FULLWIDTH DIGIT ONE
"<>'2';" // from FULLWIDTH DIGIT TWO
"<>'3';" // from FULLWIDTH DIGIT THREE
"<>'4';" // from FULLWIDTH DIGIT FOUR
"<>'5';" // from FULLWIDTH DIGIT FIVE
"<>'6';" // from FULLWIDTH DIGIT SIX
"<>'7';" // from FULLWIDTH DIGIT SEVEN
"<>'8';" // from FULLWIDTH DIGIT EIGHT
"<>'9';" // from FULLWIDTH DIGIT NINE
"<>':';" // from FULLWIDTH COLON
"<>';';" // from FULLWIDTH SEMICOLON
"<>'<';" // from FULLWIDTH LESS-THAN SIGN
"<>'=';" // from FULLWIDTH EQUALS SIGN
"<>'>';" // from FULLWIDTH GREATER-THAN SIGN
"<>'?';" // from FULLWIDTH QUESTION MARK
"<>'@';" // from FULLWIDTH COMMERCIAL AT
"<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
"<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
"<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
"<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
"<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
"<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
"<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
"<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
"<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
"<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
"<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
"<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
"<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
"<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
"<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
"<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
"<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
"<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
"<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
"<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
"<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
"<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
"<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
"<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
"<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
"<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
"<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
"<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
"<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
"<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
"_<>'_';" // from FULLWIDTH LOW LINE
"<>'`';" // from FULLWIDTH GRAVE ACCENT
"<>a;" // from FULLWIDTH LATIN SMALL LETTER A
"<>b;" // from FULLWIDTH LATIN SMALL LETTER B
"<>c;" // from FULLWIDTH LATIN SMALL LETTER C
"<>d;" // from FULLWIDTH LATIN SMALL LETTER D
"<>e;" // from FULLWIDTH LATIN SMALL LETTER E
"<>f;" // from FULLWIDTH LATIN SMALL LETTER F
"<>g;" // from FULLWIDTH LATIN SMALL LETTER G
"<>h;" // from FULLWIDTH LATIN SMALL LETTER H
"<>i;" // from FULLWIDTH LATIN SMALL LETTER I
"<>j;" // from FULLWIDTH LATIN SMALL LETTER J
"<>k;" // from FULLWIDTH LATIN SMALL LETTER K
"<>l;" // from FULLWIDTH LATIN SMALL LETTER L
"<>m;" // from FULLWIDTH LATIN SMALL LETTER M
"<>n;" // from FULLWIDTH LATIN SMALL LETTER N
"<>o;" // from FULLWIDTH LATIN SMALL LETTER O
"<>p;" // from FULLWIDTH LATIN SMALL LETTER P
"<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
"<>r;" // from FULLWIDTH LATIN SMALL LETTER R
"<>s;" // from FULLWIDTH LATIN SMALL LETTER S
"<>t;" // from FULLWIDTH LATIN SMALL LETTER T
"<>u;" // from FULLWIDTH LATIN SMALL LETTER U
"<>v;" // from FULLWIDTH LATIN SMALL LETTER V
"<>w;" // from FULLWIDTH LATIN SMALL LETTER W
"<>x;" // from FULLWIDTH LATIN SMALL LETTER X
"<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
"<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
"<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
"<>'|';" // from FULLWIDTH VERTICAL LINE
"<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
"<>'~';" // from FULLWIDTH TILDE
"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET
"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET
"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA
"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT
"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO
"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A
"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I
"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U
"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E
"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O
"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA
"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU
"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO
"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU
"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A
"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I
"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U
"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E
"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O
"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA
"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI
"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU
"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE
"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO
"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA
"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI
"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU
"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE
"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO
"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA
"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI
"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU
"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE
"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO
"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA
"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI
"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU
"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE
"<>ノ;" // to HALFWIDTH KATAKANA LETTER NO
"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA
"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI
"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU
"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE
"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO
"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA
"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI
"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU
"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME
"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO
"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA
"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU
"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO
"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA
"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI
"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU
"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE
"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO
"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA
"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N
"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
"<>;" // to HALFWIDTH HANGUL FILLER
"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK
"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN
"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT
"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL
"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM
"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP
"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS
"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG
"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC
"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH
"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH
"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH
"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH
"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH
"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A
"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE
"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA
"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE
"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO
"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E
"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO
"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE
"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O
"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA
"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE
"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE
"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO
"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U
"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO
"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE
"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI
"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU
"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU
"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI
"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I
"¢<>'¢';" // from FULLWIDTH CENT SIGN
"£<>'£';" // from FULLWIDTH POUND SIGN
"¬<>'¬';" // from FULLWIDTH NOT SIGN
" ̄<>' '̄;" // from FULLWIDTH MACRON
"' '<>' ';" // ideographic space (place this after MACRON)
"¦<>'¦';" // from FULLWIDTH BROKEN BAR
"¥<>'¥';" // from FULLWIDTH YEN SIGN
"₩<>₩;" // from FULLWIDTH WON SIGN
"│<>;" // to HALFWIDTH FORMS LIGHT VERTICAL
"←<>←;" // to HALFWIDTH LEFTWARDS ARROW
"↑<>↑;" // to HALFWIDTH UPWARDS ARROW
"→<>→;" // to HALFWIDTH RIGHTWARDS ARROW
"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW
"■<>■;" // to HALFWIDTH BLACK SQUARE
"○<>○;" // to HALFWIDTH WHITE CIRCLE
// eof
}
}

View File

@ -0,0 +1,283 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Greek_Latin
translit_Greek_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Grek_Latn.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
// Rules are predicated on running NFD first, and NFC afterwards
"::NFD (NFC) ;"
// TEST CASES
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
// ᾳ ῃ ῳ ὃ ὄ
// ὠς ὡς ὢς ὣς
// Ὠς Ὡς Ὢς Ὣς
// ὨΣ ὩΣ ὪΣ ὫΣ
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
// Useful variables
"$lower = [:Ll:] ;"
"$upper = [:Lu:] ;"
"$accent = [:M:] ;"
"$macron = \u0304 ;"
"$ddot = \u0308 ;"
"$lcgvowel = [αεηιουω] ;"
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
"$gvowel = [$lcgvowel $ucgvowel] ;"
"$lcgvowelC = [$lcgvowel $accent] ;"
"$vowel = [ AEIOUaeiou $gvowel] ;"
"$beforeLower = $accent * $lower ;"
"$gammaLike = [ΓΚΞΧγκξχ] ;"
"$smooth = ̓ ;"
"$rough = ̔ ;"
"$iotasub = ͅ ;"
// Fix punctuation
"\; <> \? ;"
"· <> \: ;"
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
"\u0342 <> \u0302 ;"
// IOTA: convert iota subscript to iota
// first make previous alpha long!
"Α } $accent * $iotasub > A $macron ;"
"α } $accent * $iotasub > a $macron ;"
// now convert to uppercase if after uppercase, ow to lowercase
"$upper $accent * { $iotasub > I ;"
"$iotasub > i ;"
"| $1 $iotasub < ([:L:] $macron [:M:]*) i ;"
// BREATHING
// Convert rough breathing to h, and move before letters.
// Make A ` x = > H a x
"Α $rough } $beforeLower > H | α ;"
"Ε $rough } $beforeLower > H | ε;"
"Η $rough } $beforeLower > H | η ;"
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
"Ο $rough } $beforeLower > H | ο ;"
"Υ $rough } $beforeLower > H | υ ;"
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
// Make A x ` = > H a x
"Α ($lower) $rough > H | α $1 ;"
"Ε ($lower) $rough > H | ε $1 ;"
"Η ($lower) $rough > H | η $1 ;"
"Ι ($lower $ddot?) $rough > H | ι $1 ;"
"Ο ($lower) $rough > H | ο $1 ;"
"Υ ($lower) $rough > H | υ $1 ;"
"Ω ($lower $ddot?) $rough > H | ω $1 ;"
//Otherwise, make x ` into h x and X ` into H X
"($lcgvowel + $ddot? ) $rough > h | $1 ;"
"($gvowel + $ddot? ) $rough > H | $1 ;"
// Go backwards with H
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot?) ;"
"| $1 $rough < H ([AEIOUY] $macron? $ddot?[aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
// titlecase, have to fix individually
"| $1 $rough < H (a $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (e $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (i $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (o $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (u $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (y $macron? [aeiouyAEIOUY] $macron?) ;"
"| $1 $rough < H (a $macron? $ddot? ) ;"
"| $1 $rough < H (e $macron? $ddot? ) ;"
"| $1 $rough < H (i $macron? $ddot? ) ;"
"| $1 $rough < H (o $macron? $ddot? ) ;"
"| $1 $rough < H (u $macron? $ddot? ) ;"
"| $1 $rough < H (y $macron? $ddot? ) ;"
// Now do smooth
//delete smooth breathing for Latin
"$smooth > ;"
// insert in Greek
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron? [aeiouyAEIOUY] $macron?) } [^[$smooth]] ;"
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] ;"
// TODO: preserve smooth/rough breathing if not
// on initial vowel sequence
// need to have these up here so the rules don't mask
"η <> e $macron ;"
"Η <> E $macron ;"
"φ <> ph ;"
"Ψ } $beforeLower <> Ps ;"
"Ψ <> PS ;"
"Φ } $beforeLower <> Ph ;"
"Φ <> PH ;"
"ψ <> ps ;"
"ω <> o $macron ;"
"Ω <> O $macron;"
// NORMAL
"α <> a ;"
"Α <> A ;"
"β <> b ;"
"Β <> B ;"
"γ } $gammaLike <> n } [gkc] ;"
"γ <> g ;"
"Γ } $gammaLike <> N } [gkc] ;"
"Γ <> G ;"
"δ <> d ;"
"Δ <> D ;"
"ε <> e ;"
"Ε <> E ;"
"ζ <> z ;"
"Ζ <> Z ;"
"θ <> th ;"
"Θ } $beforeLower <> Th ;"
"Θ <> TH ;"
"ι <> i ;"
"Ι <> I ;"
"κ <> k ;"
"Κ <> K ;"
"λ <> l ;"
"Λ <> L ;"
"μ <> m ;"
"Μ <> M ;"
"ν } $gammaLike > n\' ;"
"ν <> n ;"
"Ν } $gammaLike <> N\' ;"
"Ν <> N ;"
"ξ <> x ;"
"Ξ <> X ;"
"ο <> o ;"
"Ο <> O ;"
"π <> p ;"
"Π <> P ;"
"ρ $rough <> rh;"
"Ρ $rough } $beforeLower <> Rh ;"
"Ρ $rough <> RH ;"
"ρ <> r ;"
"Ρ <> R ;"
"[Pp] {ς > \'s ;"
"[Pp] {σ > \'s ;"
"σ < [:^L:] [:M:]* { s } [:^L:] ;"
"ς <> s } [:^L:] ;"
"σ <> s ;"
"[Pp] { Σ <> \'S ;"
"Σ <> S ;"
"τ <> t ;"
"Τ <> T ;"
"$vowel {υ } <> u ;"
"υ <> y ;"
"$vowel { Υ <> U ;"
"Υ <> Y ;"
"χ <> ch ;"
"Χ } $beforeLower <> Ch ;"
"Χ <> CH ;"
// Completeness for ASCII
"$ignore = [[:Mark:]''] * ;"
"| k < c ;"
"| ph < f ;"
"| i < j ;"
"| k < q ;"
"| u < v ;"
"| u < w ;"
"| K < C ;"
"| PH < F } $ignore [:UppercaseLetter:] ;"
"| PH < [:UppercaseLetter:] $ignore { F ;"
"| PH < F ;"
"| I < J ;"
"| K < Q ;"
"| U < V ;"
"| U < W ;"
"$rough } $ignore [:UppercaseLetter:] > H ;"
"$ignore [:UppercaseLetter:] { $rough > H ;"
"$rough < H ;"
"$rough <> h ;"
// Completeness for Greek
"ϐ > | β ;"
"ϑ > | θ ;"
"ϒ > | Υ ;"
"ϕ > | φ ;"
"ϖ > | π ;"
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"
"ͺ > i;"
"::NFC (NFD) ;"
}
}

View File

@ -0,0 +1,115 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Gujarati_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Gujarati_InterIndic
translit_Gujarati_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gujarati_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:54 2001
//--------------------------------------------------------------------
// Gujarati_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:58 2001
//--------------------------------------------------------------------
// Gujarati-InterIndic
//:: NFD (NFC) ;
"\u0a81>\ue001;" // SIGN CANDRABINDU
"\u0a82>\ue002;" // SIGN ANUSVARA
"\u0a83>\ue003;" // SIGN VISARGA
"\u0a85>\ue005;" // LETTER A
"\u0a86>\ue006;" // LETTER AA
"\u0a87>\ue007;" // LETTER I
"\u0a88>\ue008;" // LETTER II
"\u0a89>\ue009;" // LETTER U
"\u0a8a>\ue00a;" // LETTER UU
"\u0a8b>\ue00b;" // LETTER VOCALIC R
// \u0a8d>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E
"\u0a8f>\ue00f;" // LETTER E
"\u0a90>\ue010;" // LETTER AI
// \u0a91>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O
"\u0a93>\ue013;" // LETTER O
"\u0a94>\ue014;" // LETTER AU
"\u0a95>\ue015;" // LETTER KA
"\u0a96>\ue016;" // LETTER KHA
"\u0a97>\ue017;" // LETTER GA
"\u0a98>\ue018;" // LETTER GHA
"\u0a99>\ue019;" // LETTER NGA
"\u0a9a>\ue01a;" // LETTER CA
"\u0a9b>\ue01b;" // LETTER CHA
"\u0a9c>\ue01c;" // LETTER JA
"\u0a9d>\ue01d;" // LETTER JHA
"\u0a9e>\ue01e;" // LETTER NYA
"\u0a9f>\ue01f;" // LETTER TTA
"\u0aa0>\ue020;" // LETTER TTHA
"\u0aa1>\ue021;" // LETTER DDA
"\u0aa2>\ue022;" // LETTER DDHA
"\u0aa3>\ue023;" // LETTER NNA
"\u0aa4>\ue024;" // LETTER TA
"\u0aa5>\ue025;" // LETTER THA
"\u0aa6>\ue026;" // LETTER DA
"\u0aa7>\ue027;" // LETTER DHA
"\u0aa8>\ue028;" // LETTER NA
"\u0aaa>\ue02a;" // LETTER PA
"\u0aab>\ue02b;" // LETTER PHA
"\u0aac>\ue02c;" // LETTER BA
"\u0aad>\ue02d;" // LETTER BHA
"\u0aae>\ue02e;" // LETTER MA
"\u0aaf>\ue02f;" // LETTER YA
"\u0ab0>\ue030;" // LETTER RA
"\u0ab2>\ue032;" // LETTER LA
"\u0ab3>\ue033;" // LETTER LLA
"\u0ab5>\ue035;" // LETTER VA
"\u0ab6>\ue036;" // LETTER SHA
"\u0ab7>\ue037;" // LETTER SSA
"\u0ab8>\ue038;" // LETTER SA
"\u0ab9>\ue039;" // LETTER HA
"\u0abc>\ue03c;" // SIGN NUKTA
"\u0abd>\ue03d;" // SIGN AVAGRAHA
"\u0abe>\ue03e;" // VOWEL SIGN AA
"\u0abf>\ue03f;" // VOWEL SIGN I
"\u0ac0>\ue040;" // VOWEL SIGN II
"\u0ac1>\ue041;" // VOWEL SIGN U
"\u0ac2>\ue042;" // VOWEL SIGN UU
"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R
"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E
"\u0ac7>\ue047;" // VOWEL SIGN E
"\u0ac8>\ue048;" // VOWEL SIGN AI
"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O
"\u0acb>\ue04b;" // VOWEL SIGN O
"\u0acc>\ue04c;" // VOWEL SIGN AU
"\u0acd>\ue04d;" // SIGN VIRAMA
"\u0ad0>\ue050;" // OM
"\u0ae0>\ue060;" // LETTER VOCALIC RR
"\u0ae6>\ue066;" // DIGIT ZERO
"\u0ae7>\ue067;" // DIGIT ONE
"\u0ae8>\ue068;" // DIGIT TWO
"\u0ae9>\ue069;" // DIGIT THREE
"\u0aea>\ue06a;" // DIGIT FOUR
"\u0aeb>\ue06b;" // DIGIT FIVE
"\u0aec>\ue06c;" // DIGIT SIX
"\u0aed>\ue06d;" // DIGIT SEVEN
"\u0aee>\ue06e;" // DIGIT EIGHT
"\u0aef>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,112 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Gurmukhi_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Gurmukhi_InterIndic
translit_Gurmukhi_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gurmukhi_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:54 2001
//--------------------------------------------------------------------
// Gurmukhi_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:58 2001
//--------------------------------------------------------------------
// Gurmukhi-InterIndic
//:: NFD (NFC) ;
"\u0a02>\ue001;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
"\u0a05>\ue005;" // LETTER A
"\u0a06>\ue006;" // LETTER AA
"\u0a07>\ue007;" // LETTER I
"\u0a08>\ue008;" // LETTER II
"\u0a09>\ue009;" // LETTER U
"\u0a0a>\ue00a;" // LETTER UU
"\u0a0f>\ue00f;" // LETTER EE
"\u0a10>\ue010;" // LETTER AI
"\u0a13>\ue013;" // LETTER OO
"\u0a14>\ue014;" // LETTER AU
"\u0a15>\ue015;" // LETTER KA
"\u0a16>\ue016;" // LETTER KHA
"\u0a17>\ue017;" // LETTER GA
"\u0a18>\ue018;" // LETTER GHA
"\u0a19>\ue019;" // LETTER NGA
"\u0a1a>\ue01a;" // LETTER CA
"\u0a1b>\ue01b;" // LETTER CHA
"\u0a1c>\ue01c;" // LETTER JA
"\u0a1d>\ue01d;" // LETTER JHA
"\u0a1e>\ue01e;" // LETTER NYA
"\u0a1f>\ue01f;" // LETTER TTA
"\u0a20>\ue020;" // LETTER TTHA
"\u0a21>\ue021;" // LETTER DDA
"\u0a22>\ue022;" // LETTER DDHA
"\u0a23>\ue023;" // LETTER NNA
"\u0a24>\ue024;" // LETTER TA
"\u0a25>\ue025;" // LETTER THA
"\u0a26>\ue026;" // LETTER DA
"\u0a27>\ue027;" // LETTER DHA
"\u0a28>\ue028;" // LETTER NA
"\u0a2a>\ue02a;" // LETTER PA
"\u0a2b>\ue02b;" // LETTER PHA
"\u0a2c>\ue02c;" // LETTER BA
"\u0a2d>\ue02d;" // LETTER BHA
"\u0a2e>\ue02e;" // LETTER MA
"\u0a2f>\ue02f;" // LETTER YA
"\u0a30>\ue030;" // LETTER RA
"\u0a32>\ue032;" // LETTER LA
"\u0a33>\ue033;" // LETTER LLA
"\u0a35>\ue035;" // LETTER VA
"\u0a36>\ue036;" // LETTER SHA
"\u0a38>\ue038;" // LETTER SA
"\u0a39>\ue039;" // LETTER HA
"\u0a3c>\ue03c;" // SIGN NUKTA
"\u0a3e>\ue03e;" // VOWEL SIGN AA
"\u0a3f>\ue03f;" // VOWEL SIGN I
"\u0a40>\ue040;" // VOWEL SIGN II
"\u0a41>\ue041;" // VOWEL SIGN U
"\u0a42>\ue042;" // VOWEL SIGN UU
"\u0a47>\ue047;" // VOWEL SIGN EE
"\u0a48>\ue048;" // VOWEL SIGN AI
"\u0a4b>\ue04b;" // VOWEL SIGN OO
"\u0a4c>\ue04c;" // VOWEL SIGN AU
"\u0a4d>\ue04d;" // SIGN VIRAMA
"\u0a59>\ue059;" // LETTER KHHA
"\u0a5a>\ue05a;" // LETTER GHHA
"\u0a5b>\ue05b;" // LETTER ZA
"\u0a5c>\ue05c;" // LETTER RRA
"\u0a5e>\ue05e;" // LETTER FA
"\u0a66>\ue066;" // DIGIT ZERO
"\u0a67>\ue067;" // DIGIT ONE
"\u0a68>\ue068;" // DIGIT TWO
"\u0a69>\ue069;" // DIGIT THREE
"\u0a6a>\ue06a;" // DIGIT FOUR
"\u0a6b>\ue06b;" // DIGIT FIVE
"\u0a6c>\ue06c;" // DIGIT SIX
"\u0a6d>\ue06d;" // DIGIT SEVEN
"\u0a6e>\ue06e;" // DIGIT EIGHT
"\u0a6f>\ue06f;" // DIGIT NINE
// \u0a70>; # UNMAPPED Gurmukhi-InterIndic: TIPPI
// \u0a71>; # UNMAPPED Gurmukhi-InterIndic: ADDAK
// \u0a72>; # UNMAPPED Gurmukhi-InterIndic: IRI
// \u0a73>; # UNMAPPED Gurmukhi-InterIndic: URA
// \u0a74>; # UNMAPPED Gurmukhi-InterIndic: EK ONKAR
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,217 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Hiragana_Katakana.utf8.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Hiragana_Katakana
translit_Hiragana_Katakana {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 2001
//--------------------------------------------------------------------
// Hiragana-Katana
// This is largely a one-to-one mapping, but it has a
// few kinks:
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
// (308F-3092) with a voicing mark (3099), which is
// semantically equivalent. However, this is a non-
// roundtripping transformation.
// 2. The Katakana small ka/ke (30F5,30F6) have no
// Hiragana equiavlents. We convert them to normal
// Hiragana ka/ke (304B,3051). This is a one-way
// information-losing transformation and precludes
// round-tripping of 30F5 and 30F6.
// 3. The combining marks 3099-309C are in the Hiragana
// block, but they apply to Katakana as well, so we
// leave them untouched.
// 4. The Katakana prolonged sound mark 30FC doubles the
// preceding vowel. This is a one-way information-
// losing transformation from Katakana to Hiragana.
// 5. The Katakana middle dot separates words in foreign
// expressions; we leave this unmodified.
// The above points preclude successful round-trip
// transformations of arbitrary input text. However,
// they provide naturalistic results that should conform
// to user expectations.
// Combining equivalents va/vi/ve/vo
"わ゙ <> ヷ;"
"ゐ゙ <> ヸ;"
"ゑ゙ <> ヹ;"
"を゙ <> ヺ;"
// One-to-one mappings, main block
// 3041:3094 <> 30A1:30F4
// 309D,E <> 30FD,E
"ぁ <> ァ;"
"あ <> ア;"
"ぃ <> ィ;"
"い <> イ;"
"ぅ <> ゥ;"
"う <> ウ;"
"ぇ <> ェ;"
"え <> エ;"
"ぉ <> ォ;"
"お <> オ;"
"か <> カ;"
"が <> ガ;"
"き <> キ;"
"ぎ <> ギ;"
"く <> ク;"
"ぐ <> グ;"
"け <> ケ;"
"げ <> ゲ;"
"こ <> コ;"
"ご <> ゴ;"
"さ <> サ;"
"ざ <> ザ;"
"し <> シ;"
"じ <> ジ;"
"す <> ス;"
"ず <> ズ;"
"せ <> セ;"
"ぜ <> ゼ;"
"そ <> ソ;"
"ぞ <> ゾ;"
"た <> タ;"
"だ <> ダ;"
"ち <> チ;"
"ぢ <> ヂ;"
"っ <> ッ;"
"つ <> ツ;"
"づ <> ヅ;"
"て <> テ;"
"で <> デ;"
"と <> ト;"
"ど <> ド;"
"な <> ナ;"
"に <> ニ;"
"ぬ <> ヌ;"
"ね <> ネ;"
"の <> ;"
"は <> ハ;"
"ば <> バ;"
"ぱ <> パ;"
"ひ <> ヒ;"
"び <> ビ;"
"ぴ <> ピ;"
"ふ <> フ;"
"ぶ <> ブ;"
"ぷ <> プ;"
"へ <> ヘ;"
"べ <> ベ;"
"ぺ <> ペ;"
"ほ <> ホ;"
"ぼ <> ボ;"
"ぽ <> ポ;"
"ま <> マ;"
"み <> ミ;"
"む <> ム;"
"め <> メ;"
"も <> モ;"
"ゃ <> ャ;"
"や <> ヤ;"
"ゅ <> ュ;"
"ゆ <> ユ;"
"ょ <> ョ;"
"よ <> ヨ;"
"ら <> ラ;"
"り <> リ;"
"る <> ル;"
"れ <> レ;"
"ろ <> ロ;"
"ゎ <> ヮ;"
"わ <> ワ;"
"ゐ <> ヰ;"
"ゑ <> ヱ;"
"を <> ヲ;"
"ん <> ン;"
"ゔ <> ヴ;"
"ゝ <> ヽ;"
"ゞ <> ヾ;"
// One-way Katakana-Hiragana xform of small K ka/ke to
// normal H ka/ke.
"か < ヵ;"
"け < ヶ;"
// Katakana followed by a prolonged sound mark 30FC has
// its final vowel doubled. This is a Katakana-Hiragana
// one-way information-losing transformation. We
// include the small Katakana (e.g., small A 3041) and
// do not distinguish them from their large
// counterparts. It doesn't make sense to double a
// small counterpart vowel as a small Hiragana vowel, so
// we don't do so. In natural text this should never
// occur anyway. If a 30FC is seen without a preceding
// vowel sound (e.g., after n 30F3) we do not change it.
//## $long = ー;
// The following categories are Hiragana, not Katakana
// as might be expected, since by the time we get to the
// 30FC, the preceding character will have already been
// transformed to Hiragana.
// {The following mechanically generated from the
// Unicode 3.0 data:}
"$xa = ["
"ぁ あ か が さ ざ"
"た だ な は ば ぱ"
"ま ゃ や ら ゎ わ"
"];"
"$xi = ["
"ぃ い き ぎ し じ"
"ち ぢ に ひ び ぴ"
"み り ゐ"
"];"
"$xu = ["
"ぅ う く ぐ す ず"
"っ つ づ ぬ ふ ぶ"
"ぷ む ゅ ゆ る ゔ"
"];"
"$xe = ["
"ぇ え け げ せ ぜ"
"て で ね へ べ ぺ"
"め れ ゑ"
"];"
"$xo = ["
"ぉ お こ ご そ ぞ"
"と ど の ほ ぼ ぽ"
"も ょ よ ろ を"
"];"
"あ < $xa {ー};"
"い < $xi {ー};"
"う < $xu {ー};"
"え < $xe {ー};"
"お < $xo {ー};"
// eof
}
}

View File

@ -0,0 +1,32 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Hiragana_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Hiragana_Latin
translit_Hiragana_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Hira_Latn.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
":: [:^Katakana:] ;" // don't touch any katakana that was in the text!
":: Hiragana-Katakana;"
":: Katakana-Latin;"
":: ([:^Katakana:]) ;" // don't touch any katakana that was in the text!
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Bengali.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Bengali
translit_InterIndic_Bengali {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Bengali.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Bengali
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:59 2001
//--------------------------------------------------------------------
// InterIndic-Bengali
//:: NFD (NFC) ;
"\ue001>\u0981;" // SIGN CANDRABINDU
"\ue002>\u0982;" // SIGN ANUSVARA
"\ue003>\u0983;" // SIGN VISARGA
"\ue005>\u0985;" // LETTER A
"\ue006>\u0986;" // LETTER AA
"\ue007>\u0987;" // LETTER I
"\ue008>\u0988;" // LETTER II
"\ue009>\u0989;" // LETTER U
"\ue00a>\u098a;" // LETTER UU
"\ue00b>\u098b;" // LETTER VOCALIC R
"\ue00c>\u098c;" // LETTER VOCALIC L
// \ue00f>; # UNMAPPED InterIndic-Bengali: LETTER EE (\u098f = LETTER E)
"\ue010>\u0990;" // LETTER AI
// \ue013>; # UNMAPPED InterIndic-Bengali: LETTER OO (\u0993 = LETTER O)
"\ue014>\u0994;" // LETTER AU
"\ue015>\u0995;" // LETTER KA
"\ue016>\u0996;" // LETTER KHA
"\ue017>\u0997;" // LETTER GA
"\ue018>\u0998;" // LETTER GHA
"\ue019>\u0999;" // LETTER NGA
"\ue01a>\u099a;" // LETTER CA
"\ue01b>\u099b;" // LETTER CHA
"\ue01c>\u099c;" // LETTER JA
"\ue01d>\u099d;" // LETTER JHA
"\ue01e>\u099e;" // LETTER NYA
"\ue01f>\u099f;" // LETTER TTA
"\ue020>\u09a0;" // LETTER TTHA
"\ue021>\u09a1;" // LETTER DDA
"\ue022>\u09a2;" // LETTER DDHA
"\ue023>\u09a3;" // LETTER NNA
"\ue024>\u09a4;" // LETTER TA
"\ue025>\u09a5;" // LETTER THA
"\ue026>\u09a6;" // LETTER DA
"\ue027>\u09a7;" // LETTER DHA
"\ue028>\u09a8;" // LETTER NA
"\ue029>\u09a8;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
"\ue02a>\u09aa;" // LETTER PA
"\ue02b>\u09ab;" // LETTER PHA
"\ue02c>\u09ac;" // LETTER BA
"\ue02d>\u09ad;" // LETTER BHA
"\ue02e>\u09ae;" // LETTER MA
"\ue02f>\u09af;" // LETTER YA
"\ue030>\u09b0;" // LETTER RA
"\ue032>\u09b2;" // LETTER LA
"\ue033>\u09b2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
"\ue034>\u09b2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
"\ue035>\u09ac;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
"\ue036>\u09b6;" // LETTER SHA
"\ue037>\u09b7;" // LETTER SSA
"\ue038>\u09b8;" // LETTER SA
"\ue039>\u09b9;" // LETTER HA
"\ue03c>\u09bc;" // SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA
"\ue03e>\u09be;" // VOWEL SIGN AA
"\ue03f>\u09bf;" // VOWEL SIGN I
"\ue040>\u09c0;" // VOWEL SIGN II
"\ue041>\u09c1;" // VOWEL SIGN U
"\ue042>\u09c2;" // VOWEL SIGN UU
"\ue043>\u09c3;" // VOWEL SIGN VOCALIC R
"\ue044>\u09c4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u09c7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
// \ue047>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (\u09c7 = VOWEL SIGN E)
"\ue048>\u09c8;" // VOWEL SIGN AI
"\ue049>\u09cb;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
// \ue04b>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (\u09cb = VOWEL SIGN O)
"\ue04c>\u09cc;" // VOWEL SIGN AU
"\ue04d>\u09cd;" // SIGN VIRAMA
// \ue050>; # UNMAPPED InterIndic-Bengali: OM
// \ue055>; # UNMAPPED InterIndic-Bengali: LENGTH MARK
"\ue056>\u09c8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u09d7;" // AU LENGTH MARK
"\ue059>\u0996;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
"\ue05a>\u0997;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
"\ue05b>\u099c;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
"\ue05d>\u09dd;" // LETTER RHA
"\ue05e>\u09ab;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
"\ue05f>\u09df;" // LETTER YYA
"\ue060>\u09e0;" // LETTER VOCALIC RR
"\ue061>\u09e1;" // LETTER VOCALIC LL
"\ue062>\u09e2;" // VOWEL SIGN VOCALIC L
"\ue063>\u09e3;" // VOWEL SIGN VOCALIC LL
"\ue066>\u09e6;" // DIGIT ZERO
"\ue067>\u09e7;" // DIGIT ONE
"\ue068>\u09e8;" // DIGIT TWO
"\ue069>\u09e9;" // DIGIT THREE
"\ue06a>\u09ea;" // DIGIT FOUR
"\ue06b>\u09eb;" // DIGIT FIVE
"\ue06c>\u09ec;" // DIGIT SIX
"\ue06d>\u09ed;" // DIGIT SEVEN
"\ue06e>\u09ee;" // DIGIT EIGHT
"\ue06f>\u09ef;" // DIGIT NINE
"\ue0fa>\u09fa;" // ISSHAR
"\ue00f>\u098f;" // LETTER E
"\ue013>\u0993;" // LETTER O
"\ue031>\u09dc;" // LETTER RRA
"\ue047>\u09c7;" // VOWEL SIGN E
"\ue04b>\u09cb;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,159 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Devanagari.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Devanagari
translit_InterIndic_Devanagari {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Devanagari.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Devanagari
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:59 2001
//--------------------------------------------------------------------
// InterIndic-Devanagari
//:: NFD (NFC) ;
//Rules for Decomposed characters
"\ue028\ue03c > \u0929;" //\ue029
"\ue030\ue03c > \u0931;" //\ue031
"\ue033\ue03c > \u0934;" //\ue034
"\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu)
"\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu)
"\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu)
"\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu)
"\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA)
"\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA)
"\ue02b\ue03c > \u095e;" //\ue05e LETTER FA
"\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA
"\ue001 > \u0901;" // SIGN CANDRABINDU
"\ue002 > \u0902;" // SIGN ANUSVARA
"\ue003 > \u0903;" // SIGN VISARGA
"\ue005 > \u0905;" // LETTER A
"\ue006 > \u0906;" // LETTER AA
"\ue007 > \u0907;" // LETTER I
"\ue008 > \u0908;" // LETTER II
"\ue009 > \u0909;" // LETTER U
"\ue00a > \u090a;" // LETTER UU
"\ue00b > \u090b;" // LETTER VOCALIC R
"\ue00c > \u090c;" // LETTER VOCALIC L
"\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds)
//\ue00e > \u090e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
"\ue00e > \u090f;"
"\ue00f > \u090f;" // LETTER E
"\ue010 > \u0910;" // LETTER AI
"\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds)
//\ue012 > \u0912; # UNMAPPED LETTER SHORT O (For Southern Scripts)
"\ue012 > \u0913;"
"\ue013 > \u0913;" // LETTER O
"\ue014 > \u0914;" // LETTER AU
"\ue015 > \u0915;" // LETTER KA
"\ue016 > \u0916;" // LETTER KHA
"\ue017 > \u0917;" // LETTER GA
"\ue018 > \u0918;" // LETTER GHA
"\ue019 > \u0919;" // LETTER NGA
"\ue01a > \u091a;" // LETTER CA
"\ue01b > \u091b;" // LETTER CHA
"\ue01c > \u091c;" // LETTER JA
"\ue01d > \u091d;" // LETTER JHA
"\ue01e > \u091e;" // LETTER NYA
"\ue01f > \u091f;" // LETTER TTA
"\ue020 > \u0920;" // LETTER TTHA
"\ue021 > \u0921;" // LETTER DDA
"\ue022 > \u0922;" // LETTER DDHA
"\ue023 > \u0923;" // LETTER NNA
"\ue024 > \u0924;" // LETTER TA
"\ue025 > \u0925;" // LETTER THA
"\ue026 > \u0926;" // LETTER DA
"\ue027 > \u0927;" // LETTER DHA
"\ue028 > \u0928;" // LETTER NA
"\ue029 > \u0929;" // LETTER NNNA
"\ue02a > \u092a;" // LETTER PA
"\ue02b > \u092b;" // LETTER PHA
"\ue02c > \u092c;" // LETTER BA
"\ue02d > \u092d;" // LETTER BHA
"\ue02e > \u092e;" // LETTER MA
"\ue02f > \u092f;" // LETTER YA
"\ue030 > \u0930;" // LETTER RA
//\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
"\ue031 > \u0930;"
"\ue032 > \u0932;" // LETTER LA
"\ue033 > \u0933;" // LETTER LLA
//\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
"\ue034 > \u0933;"
"\ue035 > \u0935;" // LETTER VA
"\ue036 > \u0936;" // LETTER SHA
"\ue037 > \u0937;" // LETTER SSA
"\ue038 > \u0938;" // LETTER SA
"\ue039 > \u0939;" // LETTER HA
"\ue03c > \u093c;" // SIGN NUKTA
"\ue03d > \u093d;" // SIGN AVAGRAHA
"\ue03e > \u093e;" // VOWEL SIGN AA
"\ue03f > \u093f;" // VOWEL SIGN I
"\ue040 > \u0940;" // VOWEL SIGN II
"\ue041 > \u0941;" // VOWEL SIGN U
"\ue042 > \u0942;" // VOWEL SIGN UU
"\ue043 > \u0943;" // VOWEL SIGN VOCALIC R
"\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR
"\ue045 > \u0945;" // VOWEL SIGN CANDRA E
//\ue046 > \u0946; # UNMAPPED VOWEL SIGN SHORT E
"\ue046 > \u0947;"
"\ue047 > \u0947;" // VOWEL SIGN E
"\ue048 > \u0948;" // VOWEL SIGN AI
"\ue049 > \u0949;" // VOWEL SIGN CANDRA O
//\ue04a > \u094a; # UNMAPPED VOWEL SIGN SHORT O
"\ue04a > \u094b;"
"\ue04b > \u094b;" // VOWEL SIGN O
"\ue04c > \u094c;" // VOWEL SIGN AU
"\ue04d > \u094d;" // SIGN VIRAMA
"\ue050 > \u0950;" // OM
// \u0951 # UNMAPPED STRESS SIGN UDATTA
// \u0952 # UNMAPPED STRESS SIGN ANUDATTA
// \u0953 # UNMAPPED GRAVE ACCENT
// \u0954 # UNMAPPED ACUTE ACCENT
"\ue058 > \u0958;" // LETTER QA (For Urdu)
"\ue059 > \u0959;" // LETTER KHHA (For Urdu)
"\ue05a > \u095a;" // LETTER GHHA (For Urdu)
"\ue05b > \u095b;" // LETTER ZA (For Urdu)
"\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA)
"\ue05d > \u095d;" // LETTER RHA (pronounced RRHA)
"\ue05e > \u095e;" // LETTER FA
"\ue05f > \u095f;" // LETTER YYA
"\ue060 > \u0960;" // LETTER VOCALIC RR
"\ue061 > \u0961;" // LETTER VOCALIC LL
"\ue062 > \u0962;" // VOWEL SIGN VOCALIC L
"\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL
// > ; \u0964 # UNMAPPED Devanagari-InterIndic: DANDA
// > ; \u0965 # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
"\ue066 > \u0966;" // DIGIT ZERO
"\ue067 > \u0967;" // DIGIT ONE
"\ue068 > \u0968;" // DIGIT TWO
"\ue069 > \u0969;" // DIGIT THREE
"\ue06a > \u096a;" // DIGIT FOUR
"\ue06b > \u096b;" // DIGIT FIVE
"\ue06c > \u096c;" // DIGIT SIX
"\ue06d > \u096d;" // DIGIT SEVEN
"\ue06e > \u096e;" // DIGIT EIGHT
"\ue06f > \u096f;" // DIGIT NINE
// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
// :: NFC;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Gujarati.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Gujarati
translit_InterIndic_Gujarati {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gujarati.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Gujarati
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:41:59 2001
//--------------------------------------------------------------------
// InterIndic-Gujarati
//:: NFD (NFC) ;
"\ue001>\u0a81;" // SIGN CANDRABINDU
"\ue002>\u0a82;" // SIGN ANUSVARA
"\ue003>\u0a83;" // SIGN VISARGA
"\ue005>\u0a85;" // LETTER A
"\ue006>\u0a86;" // LETTER AA
"\ue007>\u0a87;" // LETTER I
"\ue008>\u0a88;" // LETTER II
"\ue009>\u0a89;" // LETTER U
"\ue00a>\u0a8a;" // LETTER UU
"\ue00b>\u0a8b;" // LETTER VOCALIC R
"\ue00c>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0a8c>\u0ab2\u0ac3 = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R
// \ue00f>; # UNMAPPED InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
"\ue010>\u0a90;" // LETTER AI
// \ue013>; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
"\ue014>\u0a94;" // LETTER AU
"\ue015>\u0a95;" // LETTER KA
"\ue016>\u0a96;" // LETTER KHA
"\ue017>\u0a97;" // LETTER GA
"\ue018>\u0a98;" // LETTER GHA
"\ue019>\u0a99;" // LETTER NGA
"\ue01a>\u0a9a;" // LETTER CA
"\ue01b>\u0a9b;" // LETTER CHA
"\ue01c>\u0a9c;" // LETTER JA
"\ue01d>\u0a9d;" // LETTER JHA
"\ue01e>\u0a9e;" // LETTER NYA
"\ue01f>\u0a9f;" // LETTER TTA
"\ue020>\u0aa0;" // LETTER TTHA
"\ue021>\u0aa1;" // LETTER DDA
"\ue022>\u0aa2;" // LETTER DDHA
"\ue023>\u0aa3;" // LETTER NNA
"\ue024>\u0aa4;" // LETTER TA
"\ue025>\u0aa5;" // LETTER THA
"\ue026>\u0aa6;" // LETTER DA
"\ue027>\u0aa7;" // LETTER DHA
"\ue028>\u0aa8;" // LETTER NA
"\ue029>\u0aa8;" // REMAP (indicExceptions.txt): \u0aa9>\u0aa8 = LETTER NNNA>LETTER NA
"\ue02a>\u0aaa;" // LETTER PA
"\ue02b>\u0aab;" // LETTER PHA
"\ue02c>\u0aac;" // LETTER BA
"\ue02d>\u0aad;" // LETTER BHA
"\ue02e>\u0aae;" // LETTER MA
"\ue02f>\u0aaf;" // LETTER YA
"\ue030>\u0ab0;" // LETTER RA
"\ue032>\u0ab2;" // LETTER LA
"\ue033>\u0ab3;" // LETTER LLA
"\ue034>\u0ab3;" // REMAP (indicExceptions.txt): \u0ab4>\u0ab3 = LETTER LLLA>LETTER LLA
"\ue035>\u0ab5;" // LETTER VA
"\ue036>\u0ab6;" // LETTER SHA
"\ue037>\u0ab7;" // LETTER SSA
"\ue038>\u0ab8;" // LETTER SA
"\ue039>\u0ab9;" // LETTER HA
"\ue03c>\u0abc;" // SIGN NUKTA
"\ue03d>\u0abd;" // SIGN AVAGRAHA
"\ue03e>\u0abe;" // VOWEL SIGN AA
"\ue03f>\u0abf;" // VOWEL SIGN I
"\ue040>\u0ac0;" // VOWEL SIGN II
"\ue041>\u0ac1;" // VOWEL SIGN U
"\ue042>\u0ac2;" // VOWEL SIGN UU
"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R
"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E
// \ue047>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
"\ue048>\u0ac8;" // VOWEL SIGN AI
"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O
// \ue04b>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
"\ue04c>\u0acc;" // VOWEL SIGN AU
"\ue04d>\u0acd;" // SIGN VIRAMA
"\ue050>\u0ad0;" // OM
// \ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
"\ue060>\u0ae0;" // LETTER VOCALIC RR
"\ue061>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0ae1>\u0ab2\u0ac3 = LETTER VOCALIC LL>LETTER LA.VOWEL SIGN VOCALIC R
"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
"\ue066>\u0ae6;" // DIGIT ZERO
"\ue067>\u0ae7;" // DIGIT ONE
"\ue068>\u0ae8;" // DIGIT TWO
"\ue069>\u0ae9;" // DIGIT THREE
"\ue06a>\u0aea;" // DIGIT FOUR
"\ue06b>\u0aeb;" // DIGIT FIVE
"\ue06c>\u0aec;" // DIGIT SIX
"\ue06d>\u0aed;" // DIGIT SEVEN
"\ue06e>\u0aee;" // DIGIT EIGHT
"\ue06f>\u0aef;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
"\ue00f>\u0a8f;" // LETTER E
"\ue013>\u0a93;" // LETTER O
// \ue083>; # UNMAPPED InterIndic-Gujarati: LETTER RRA (\u0a83 = SIGN VISARGA)
"\ue047>\u0ac7;" // VOWEL SIGN E
"\ue04b>\u0acb;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Gurmukhi.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Gurmukhi
translit_InterIndic_Gurmukhi {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gurmukhi.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Gurmukhi
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:00 2001
//--------------------------------------------------------------------
// InterIndic-Gurmukhi
//:: NFD (NFC) ;
"\ue001>\u0a02;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
// \ue002>; # UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (\u0a02 = SIGN BINDI)
// \ue003>; # UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA
"\ue005>\u0a05;" // LETTER A
"\ue006>\u0a06;" // LETTER AA
"\ue007>\u0a07;" // LETTER I
"\ue008>\u0a08;" // LETTER II
"\ue009>\u0a09;" // LETTER U
"\ue00a>\u0a0a;" // LETTER UU
"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
"\ue00c>\u0a07;" // REMAP (indicExceptions.txt): \u0a0c>\u0a07 = LETTER VOCALIC L>LETTER I
"\ue00f>\u0a0f;" // LETTER EE
"\ue010>\u0a10;" // LETTER AI
"\ue013>\u0a13;" // LETTER OO
"\ue014>\u0a14;" // LETTER AU
"\ue015>\u0a15;" // LETTER KA
"\ue016>\u0a16;" // LETTER KHA
"\ue017>\u0a17;" // LETTER GA
"\ue018>\u0a18;" // LETTER GHA
"\ue019>\u0a19;" // LETTER NGA
"\ue01a>\u0a1a;" // LETTER CA
"\ue01b>\u0a1b;" // LETTER CHA
"\ue01c>\u0a1c;" // LETTER JA
"\ue01d>\u0a1d;" // LETTER JHA
"\ue01e>\u0a1e;" // LETTER NYA
"\ue01f>\u0a1f;" // LETTER TTA
"\ue020>\u0a20;" // LETTER TTHA
"\ue021>\u0a21;" // LETTER DDA
"\ue022>\u0a22;" // LETTER DDHA
"\ue023>\u0a23;" // LETTER NNA
"\ue024>\u0a24;" // LETTER TA
"\ue025>\u0a25;" // LETTER THA
"\ue026>\u0a26;" // LETTER DA
"\ue027>\u0a27;" // LETTER DHA
"\ue028>\u0a28;" // LETTER NA
"\ue029>\u0a28;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
"\ue02a>\u0a2a;" // LETTER PA
"\ue02b>\u0a2b;" // LETTER PHA
"\ue02c>\u0a2c;" // LETTER BA
"\ue02d>\u0a2d;" // LETTER BHA
"\ue02e>\u0a2e;" // LETTER MA
"\ue02f>\u0a2f;" // LETTER YA
"\ue030>\u0a30;" // LETTER RA
"\ue032>\u0a32;" // LETTER LA
"\ue033>\u0a33;" // LETTER LLA
"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
"\ue035>\u0a35;" // LETTER VA
"\ue036>\u0a36;" // LETTER SHA
"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
"\ue038>\u0a38;" // LETTER SA
"\ue039>\u0a39;" // LETTER HA
"\ue03c>\u0a3c;" // SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA
"\ue03e>\u0a3e;" // VOWEL SIGN AA
"\ue03f>\u0a3f;" // VOWEL SIGN I
"\ue040>\u0a40;" // VOWEL SIGN II
"\ue041>\u0a41;" // VOWEL SIGN U
"\ue042>\u0a42;" // VOWEL SIGN UU
// \ue043>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R
// \ue044>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR
"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
"\ue047>\u0a47;" // VOWEL SIGN EE
"\ue048>\u0a48;" // VOWEL SIGN AI
"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
"\ue04b>\u0a4b;" // VOWEL SIGN OO
"\ue04c>\u0a4c;" // VOWEL SIGN AU
"\ue04d>\u0a4d;" // SIGN VIRAMA
// \ue050>; # UNMAPPED InterIndic-Gurmukhi: OM
// \ue055>; # UNMAPPED InterIndic-Gurmukhi: LENGTH MARK
"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0a59;" // LETTER KHHA
"\ue05a>\u0a5a;" // LETTER GHHA
"\ue05b>\u0a5b;" // LETTER ZA
"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
"\ue05e>\u0a5e;" // LETTER FA
"\ue05f>\u0a2f;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
"\ue061>\u0a08\u0a3c;" // REMAP (indicExceptions.txt): \u0a61>\u0a08\u0a3c = LETTER VOCALIC LL>LETTER II.SIGN NUKTA
"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
"\ue066>\u0a66;" // DIGIT ZERO
"\ue067>\u0a67;" // DIGIT ONE
"\ue068>\u0a68;" // DIGIT TWO
"\ue069>\u0a69;" // DIGIT THREE
"\ue06a>\u0a6a;" // DIGIT FOUR
"\ue06b>\u0a6b;" // DIGIT FIVE
"\ue06c>\u0a6c;" // DIGIT SIX
"\ue06d>\u0a6d;" // DIGIT SEVEN
"\ue06e>\u0a6e;" // DIGIT EIGHT
"\ue06f>\u0a6f;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Gurmukhi: ISSHAR
// \ue081>; # UNMAPPED InterIndic-Gurmukhi: LETTER E
// \ue082>; # UNMAPPED InterIndic-Gurmukhi: LETTER O (\u0a02 = SIGN BINDI)
"\ue05c>\u0a5c;" // LETTER RRA
// \ue084>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E
// \ue085>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (\u0a05 = LETTER A)
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Kannada.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Kannada
translit_InterIndic_Kannada {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Kannada.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Kannada
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:00 2001
//--------------------------------------------------------------------
// InterIndic-Kannada
//:: NFD (NFC) ;
"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
"\ue002>\u0c82;" // SIGN ANUSVARA
"\ue003>\u0c83;" // SIGN VISARGA
"\ue005>\u0c85;" // LETTER A
"\ue006>\u0c86;" // LETTER AA
"\ue007>\u0c87;" // LETTER I
"\ue008>\u0c88;" // LETTER II
"\ue009>\u0c89;" // LETTER U
"\ue00a>\u0c8a;" // LETTER UU
"\ue00b>\u0c8b;" // LETTER VOCALIC R
"\ue00c>\u0c8c;" // LETTER VOCALIC L
"\ue00f>\u0c8f;" // LETTER EE
"\ue010>\u0c90;" // LETTER AI
"\ue013>\u0c93;" // LETTER OO
"\ue014>\u0c94;" // LETTER AU
"\ue015>\u0c95;" // LETTER KA
"\ue016>\u0c96;" // LETTER KHA
"\ue017>\u0c97;" // LETTER GA
"\ue018>\u0c98;" // LETTER GHA
"\ue019>\u0c99;" // LETTER NGA
"\ue01a>\u0c9a;" // LETTER CA
"\ue01b>\u0c9b;" // LETTER CHA
"\ue01c>\u0c9c;" // LETTER JA
"\ue01d>\u0c9d;" // LETTER JHA
"\ue01e>\u0c9e;" // LETTER NYA
"\ue01f>\u0c9f;" // LETTER TTA
"\ue020>\u0ca0;" // LETTER TTHA
"\ue021>\u0ca1;" // LETTER DDA
"\ue022>\u0ca2;" // LETTER DDHA
"\ue023>\u0ca3;" // LETTER NNA
"\ue024>\u0ca4;" // LETTER TA
"\ue025>\u0ca5;" // LETTER THA
"\ue026>\u0ca6;" // LETTER DA
"\ue027>\u0ca7;" // LETTER DHA
"\ue028>\u0ca8;" // LETTER NA
"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
"\ue02a>\u0caa;" // LETTER PA
"\ue02b>\u0cab;" // LETTER PHA
"\ue02c>\u0cac;" // LETTER BA
"\ue02d>\u0cad;" // LETTER BHA
"\ue02e>\u0cae;" // LETTER MA
"\ue02f>\u0caf;" // LETTER YA
"\ue030>\u0cb0;" // LETTER RA
"\ue032>\u0cb2;" // LETTER LA
"\ue033>\u0cb3;" // LETTER LLA
"\ue034>\u0cb3;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
"\ue035>\u0cb5;" // LETTER VA
"\ue036>\u0cb6;" // LETTER SHA
"\ue037>\u0cb7;" // LETTER SSA
"\ue038>\u0cb8;" // LETTER SA
"\ue039>\u0cb9;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Kannada: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA
"\ue03e>\u0cbe;" // VOWEL SIGN AA
"\ue03f>\u0cbf;" // VOWEL SIGN I
"\ue040>\u0cc0;" // VOWEL SIGN II
"\ue041>\u0cc1;" // VOWEL SIGN U
"\ue042>\u0cc2;" // VOWEL SIGN UU
"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R
"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
"\ue047>\u0cc7;" // VOWEL SIGN EE
"\ue048>\u0cc8;" // VOWEL SIGN AI
"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\ue04b>\u0ccb;" // VOWEL SIGN OO
"\ue04c>\u0ccc;" // VOWEL SIGN AU
"\ue04d>\u0ccd;" // SIGN VIRAMA
"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
"\ue055>\u0cd5;" // LENGTH MARK
"\ue056>\u0cd6;" // AI LENGTH MARK
"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
"\ue05e>\u0cde;" // LETTER FA
"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
"\ue060>\u0ce0;" // LETTER VOCALIC RR
"\ue061>\u0ce1;" // LETTER VOCALIC LL
"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
"\ue066>\u0ce6;" // DIGIT ZERO
"\ue067>\u0ce7;" // DIGIT ONE
"\ue068>\u0ce8;" // DIGIT TWO
"\ue069>\u0ce9;" // DIGIT THREE
"\ue06a>\u0cea;" // DIGIT FOUR
"\ue06b>\u0ceb;" // DIGIT FIVE
"\ue06c>\u0cec;" // DIGIT SIX
"\ue06d>\u0ced;" // DIGIT SEVEN
"\ue06e>\u0cee;" // DIGIT EIGHT
"\ue06f>\u0cef;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Kannada: ISSHAR
"\ue00e>\u0c8e;" // LETTER E
"\ue012>\u0c92;" // LETTER O
"\ue031>\u0cb1;" // LETTER RRA
"\ue046>\u0cc6;" // VOWEL SIGN E
"\ue04a>\u0cca;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,385 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Latin.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Latin
translit_InterIndic_Latin {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// InterIndic-Latin
// :: NFD (NFC) ;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
//\u0950>\ue050; # OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$consonants=[$ka-$ha $virama];"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
//transliterations for anusvara
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
"$anusvara} [$ta$tha$da$dha$na] > n ;"
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
"$anusvara>'-'m\u0307;"
// normal consonants
"$cha}$x>ch;"
"$cha>cha;"
"$ca$virama}$ha>c'';"
"$ca}$x>c;"
"$ca>ca;"
"$jha}$x>jh;"
"$jha>jha;"
"$ja$virama}$ha>j'';"
"$ja}$x>j;"
"$ja>ja;"
//$nya}$x>ny;
//$nya>nya;
"$nya }$x>n\u0303 ;"
"$nya > n\u0303a ;"
"$ttha}$x>t\u0323h;"
"$tta$virama}$ha>t\u0323'';"
"$tta}$x>t\u0323;"
"$ddha}$x>d\u0323h;"
"$dda}$x$ha>d\u0323'';"
"$dda}$x>d\u0323;"
"$dha}$x>dh;"
"$da$virama}$ha>d'';"
"$da$virama}$ddha>d'';"
"$da$virama}$dda>d'';"
"$da$virama}$dha>d'';"
//$da$virama}$da>dda;
"$da}$x>d;"
"$tha}$x>th;"
"$ta$virama}$ha>t'';"
"$ta$virama}$ttha>t'';"
"$ta$virama}$tta>t'';"
"$ta$virama}$tha>t'';"
"$tta>t\u0323a;"
"$ttha>t\u0323ha;"
//$ta$virama}$ta>tta;
"$ta}$x>t;"
"$tha>tha;"
"$ta>ta;"
"$dda>d\u0323a;"
"$dha>dha;"
"$ddha>d\u0323ha;"
"$da>da;"
"$nna}$x>n\u0323 ;"
"$nna>n\u0323a ;"
"$na$virama}$ga>n'';"
"$na$virama}$ya>n'';"
"$na}$x>n;"
"$na>na;"
"$kha}$x>kh;"
"$kha>kha;"
"$ka$virama}$ha>k'';"
"$ka}$x>k;"
"$ka>ka;"
"$gha}$x>gh;"
"$gha>gha;"
"$ga$virama}$ha>g'';"
"$ga}$x>g;"
"$ga>ga;"
//ng<$nga}$x;
//nga<$nga;
"$nga}$x>n\u0307;"
"$nga>n\u0307a ;"
"$pha}$x>ph;"
"$pha>pha;"
"$pa$virama}$ha>p'';"
"$pa}$x>p;"
"$pa>pa;"
"$bha}$x>bh;"
"$bha>bha;"
"$ba$virama}$ha>b'';"
"$ba}$x>b;"
"$ba>ba;"
"$ma$virama}$ma>m'';"
//$ma$virama}$anusvara>m'';
"$ma}$x>m;"
"$ma>ma;"
"$ya}$x>y;"
"$ya>ya;"
"$ra$virama}$ha>r'';"
"$ra}$x>r;"
"$ra>ra;"
"$la$virama}$ha>l'';"
"$la}$x>l;"
"$la>la;"
"$lla$virama}$ha>l\u0323'';"
"$lla}$x>l\u0323;"
"$lla>l\u0323a;"
"$va}$x>v;"
"$va>va;"
"$sha}$x>s\u0301;"
"$ssa}$x>s\u0323;"
"$sa$virama}$ha>s'';"
"$sa$virama}$sha>s'';"
"$sa$virama}$ssa>s'';"
"$sa$virama}$sa>s'';"
"$sa}$x>s;"
"$sha>s\u0301a;"
"$ssa>s\u0323a;"
"$sa>sa;"
"$ha}$x>h;"
"$ha>ha;"
// Urdu compatibility
"$uya}$x > y\u0307 ;"
"$uya > y\u0307a ;"
"$ela}$x > l\u0331 ;"
"$ela > l\u0331a ;"
"$ena}$x > n\u0331 ;"
"$ena > n\u0331a ;"
"$uka}$x > q ;"
"$uka > qa ;"
"$ukha}$x > k\u0323 ;"
"$ukha > k\u0323a ;"
"$ugha}$x > g\u0307 ;"
"$ugha > g\u0307a ;"
"$ujha}$x > z ;"
"$ujha > za ;"
"$udha}$x > r\u0323h ;"
"$udha > r\u0323ha;"
"$uddha}$x> r\u0323 ;"
"$uddha > r\u0323a ;"
"$ufa}$x > f\u0323 ;"
"$ufa > f\u0323a ;"
// dependent vowels (should never occur except following consonants)
"$aa > a\u0304 ;"
"$ai > ai ;"
"$au > au ;"
"$ii > i\u0304 ;"
"$i > i ;"
"$uu > u\u0304 ;"
"$u > u ;"
"$rrh > r\u0325\u0304 ;"
"$rh}$consonants>r\u0325;"
"$rh > r\u0325a ;"
"$llh > l\u0325\u0304 ;"
"$lh > l\u0325 ;"
"$e > e\u0304 ;"
"$o > o\u0304 ;"
//extra vowels
"$ce > e\u0306 ;"
"$co > o\u0306 ;"
"$se > e ;"
"$so > o ;"
// independent vowels (when following consonants)
"a}$waa > ''a\u0304 ;"
"$z}$waa > ''a\u0304 ;"
"a}$wai > ''ai ;"
"$z}$wai > ''ai ;"
"a}$wau > ''au ;"
"$z}$wau > ''au ;"
"a}$wii > ''i\u0304 ;"
"$z}$wii > ''i\u0304 ;"
"a}$wi > ''i ;"
"$z}$wi > ''i ;"
"a}$wuu > ''u\u0304 ;"
"$z}$wuu > ''u\u0304 ;"
"a}$wu > ''u ;"
"$z}$wu > ''u ;"
"$z}$wrr > ''r\u0325\u0304 ;"
"$z}$wr > ''r\u0325 ;"
"$z}$wll > ''l\u0325\u0304 ;"
"$z}$wl > ''l\u0325 ;"
"$z}$we > ''e\u0304 ;"
"$z}$wo > ''o\u0304 ;"
"a}$wa > ''a ;"
"$z}$wa > ''a ;"
//extra vowels
"$z}$wce > ''e\u0306 ;"
"$z}$wco > ''o\u0306 ;"
"$z}$wse > ''e ;"
"$z}$wso > ''o ;"
// independent vowels (otherwise)
"$waa > a\u0304 ;"
"$wai > ai ;"
"$wau > au ;"
"$wii > i\u0304 ;"
"$wi > i ;"
"$wuu > u\u0304 ;"
"$wu > u ;"
"$wrr > r\u0325\u0304 ;"
"$wr > r\u0325 ;"
"$wll > l\u0325\u0304 ;"
"$wl > l\u0325 ;"
"$we > e\u0304 ;"
"$wo > o\u0304 ;"
"$wa > a ;"
//extra vowels
"$wce > e\u0306 ;"
"$wco > o\u0306 ;"
"$wse > e ;"
"$wso > o ;"
//stress marks
"$avagraha > \u0315;"
"$chandrabindu$anusvara>'-'\u0303;"
"$chandrabindu > '-'m\u0310;"
"$visarga>'-'h\u0323;"
//numbers
"$zero > 0;"
"$one > 1;"
"$two > 2;"
"$three > 3;"
"$four > 4;"
"$five > 5;"
"$six > 6;"
"$seven > 7;"
"$eight > 8;"
"$nine > 9;"
// blow away any remaining viramas
"$virama>;"
// :: NFC;
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Malayalam.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Malayalam
translit_InterIndic_Malayalam {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Malayalam.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Malayalam
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:00 2001
//--------------------------------------------------------------------
// InterIndic-Malayalam
//:: NFD (NFC) ;
"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
"\ue002>\u0d02;" // SIGN ANUSVARA
"\ue003>\u0d03;" // SIGN VISARGA
"\ue005>\u0d05;" // LETTER A
"\ue006>\u0d06;" // LETTER AA
"\ue007>\u0d07;" // LETTER I
"\ue008>\u0d08;" // LETTER II
"\ue009>\u0d09;" // LETTER U
"\ue00a>\u0d0a;" // LETTER UU
"\ue00b>\u0d0b;" // LETTER VOCALIC R
"\ue00c>\u0d0c;" // LETTER VOCALIC L
"\ue00f>\u0d0f;" // LETTER EE
"\ue010>\u0d10;" // LETTER AI
"\ue013>\u0d13;" // LETTER OO
"\ue014>\u0d14;" // LETTER AU
"\ue015>\u0d15;" // LETTER KA
"\ue016>\u0d16;" // LETTER KHA
"\ue017>\u0d17;" // LETTER GA
"\ue018>\u0d18;" // LETTER GHA
"\ue019>\u0d19;" // LETTER NGA
"\ue01a>\u0d1a;" // LETTER CA
"\ue01b>\u0d1b;" // LETTER CHA
"\ue01c>\u0d1c;" // LETTER JA
"\ue01d>\u0d1d;" // LETTER JHA
"\ue01e>\u0d1e;" // LETTER NYA
"\ue01f>\u0d1f;" // LETTER TTA
"\ue020>\u0d20;" // LETTER TTHA
"\ue021>\u0d21;" // LETTER DDA
"\ue022>\u0d22;" // LETTER DDHA
"\ue023>\u0d23;" // LETTER NNA
"\ue024>\u0d24;" // LETTER TA
"\ue025>\u0d25;" // LETTER THA
"\ue026>\u0d26;" // LETTER DA
"\ue027>\u0d27;" // LETTER DHA
"\ue028>\u0d28;" // LETTER NA
"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
"\ue02a>\u0d2a;" // LETTER PA
"\ue02b>\u0d2b;" // LETTER PHA
"\ue02c>\u0d2c;" // LETTER BA
"\ue02d>\u0d2d;" // LETTER BHA
"\ue02e>\u0d2e;" // LETTER MA
"\ue02f>\u0d2f;" // LETTER YA
"\ue030>\u0d30;" // LETTER RA
"\ue032>\u0d32;" // LETTER LA
"\ue033>\u0d33;" // LETTER LLA
"\ue034>\u0d34;" // LETTER LLLA
"\ue035>\u0d35;" // LETTER VA
"\ue036>\u0d36;" // LETTER SHA
"\ue037>\u0d37;" // LETTER SSA
"\ue038>\u0d38;" // LETTER SA
"\ue039>\u0d39;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Malayalam: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA
"\ue03e>\u0d3e;" // VOWEL SIGN AA
"\ue03f>\u0d3f;" // VOWEL SIGN I
"\ue040>\u0d40;" // VOWEL SIGN II
"\ue041>\u0d41;" // VOWEL SIGN U
"\ue042>\u0d42;" // VOWEL SIGN UU
"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R
// \ue044>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR
"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
"\ue047>\u0d47;" // VOWEL SIGN EE
"\ue048>\u0d48;" // VOWEL SIGN AI
"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
"\ue04b>\u0d4b;" // VOWEL SIGN OO
"\ue04c>\u0d4c;" // VOWEL SIGN AU
"\ue04d>\u0d4d;" // SIGN VIRAMA
// \ue050>; # UNMAPPED InterIndic-Malayalam: OM
// \ue055>; # UNMAPPED InterIndic-Malayalam: LENGTH MARK
"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0d57;" // AU LENGTH MARK
"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
"\ue060>\u0d60;" // LETTER VOCALIC RR
"\ue061>\u0d61;" // LETTER VOCALIC LL
// \ue062>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L
// \ue063>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL
"\ue066>\u0d66;" // DIGIT ZERO
"\ue067>\u0d67;" // DIGIT ONE
"\ue068>\u0d68;" // DIGIT TWO
"\ue069>\u0d69;" // DIGIT THREE
"\ue06a>\u0d6a;" // DIGIT FOUR
"\ue06b>\u0d6b;" // DIGIT FIVE
"\ue06c>\u0d6c;" // DIGIT SIX
"\ue06d>\u0d6d;" // DIGIT SEVEN
"\ue06e>\u0d6e;" // DIGIT EIGHT
"\ue06f>\u0d6f;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Malayalam: ISSHAR
"\ue00e>\u0d0e;" // LETTER E
"\ue012>\u0d12;" // LETTER O
"\ue031>\u0d31;" // LETTER RRA
"\ue046>\u0d46;" // VOWEL SIGN E
"\ue04a>\u0d4a;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Oriya.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Oriya
translit_InterIndic_Oriya {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Oriya.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Oriya
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:01 2001
//--------------------------------------------------------------------
// InterIndic-Oriya
//:: NFD (NFC) ;
"\ue001>\u0b01;" // SIGN CANDRABINDU
"\ue002>\u0b02;" // SIGN ANUSVARA
"\ue003>\u0b03;" // SIGN VISARGA
"\ue005>\u0b05;" // LETTER A
"\ue006>\u0b06;" // LETTER AA
"\ue007>\u0b07;" // LETTER I
"\ue008>\u0b08;" // LETTER II
"\ue009>\u0b09;" // LETTER U
"\ue00a>\u0b0a;" // LETTER UU
"\ue00b>\u0b0b;" // LETTER VOCALIC R
"\ue00c>\u0b0c;" // LETTER VOCALIC L
// \ue00f>; # UNMAPPED InterIndic-Oriya: LETTER EE (\u0b0f = LETTER E)
"\ue010>\u0b10;" // LETTER AI
// \ue013>; # UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O)
"\ue014>\u0b14;" // LETTER AU
"\ue015>\u0b15;" // LETTER KA
"\ue016>\u0b16;" // LETTER KHA
"\ue017>\u0b17;" // LETTER GA
"\ue018>\u0b18;" // LETTER GHA
"\ue019>\u0b19;" // LETTER NGA
"\ue01a>\u0b1a;" // LETTER CA
"\ue01b>\u0b1b;" // LETTER CHA
"\ue01c>\u0b1c;" // LETTER JA
"\ue01d>\u0b1d;" // LETTER JHA
"\ue01e>\u0b1e;" // LETTER NYA
"\ue01f>\u0b1f;" // LETTER TTA
"\ue020>\u0b20;" // LETTER TTHA
"\ue021>\u0b21;" // LETTER DDA
"\ue022>\u0b22;" // LETTER DDHA
"\ue023>\u0b23;" // LETTER NNA
"\ue024>\u0b24;" // LETTER TA
"\ue025>\u0b25;" // LETTER THA
"\ue026>\u0b26;" // LETTER DA
"\ue027>\u0b27;" // LETTER DHA
"\ue028>\u0b28;" // LETTER NA
"\ue029>\u0b28;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA
"\ue02a>\u0b2a;" // LETTER PA
"\ue02b>\u0b2b;" // LETTER PHA
"\ue02c>\u0b2c;" // LETTER BA
"\ue02d>\u0b2d;" // LETTER BHA
"\ue02e>\u0b2e;" // LETTER MA
"\ue02f>\u0b2f;" // LETTER YA
"\ue030>\u0b30;" // LETTER RA
"\ue032>\u0b32;" // LETTER LA
"\ue033>\u0b33;" // LETTER LLA
"\ue034>\u0b33;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA
"\ue035>\u0b2c;" // REMAP (indicExceptions.txt): \u0b35>\u0b2c = LETTER VA>LETTER BA
"\ue036>\u0b36;" // LETTER SHA
"\ue037>\u0b37;" // LETTER SSA
"\ue038>\u0b38;" // LETTER SA
"\ue039>\u0b39;" // LETTER HA
"\ue03c>\u0b3c;" // SIGN NUKTA
"\ue03d>\u0b3d;" // SIGN AVAGRAHA
"\ue03e>\u0b3e;" // VOWEL SIGN AA
"\ue03f>\u0b3f;" // VOWEL SIGN I
"\ue040>\u0b40;" // VOWEL SIGN II
"\ue041>\u0b41;" // VOWEL SIGN U
"\ue042>\u0b42;" // VOWEL SIGN UU
"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R
"\ue044>\u0b43\u0b3c;" // REMAP (indicExceptions.txt): \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
"\ue045>\u0b47;" // REMAP (indicExceptions.txt): \u0b45>\u0b47 = VOWEL SIGN CANDRA E>VOWEL SIGN E
// \ue047>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (\u0b47 = VOWEL SIGN E)
"\ue048>\u0b48;" // VOWEL SIGN AI
"\ue049>\u0b4b;" // REMAP (indicExceptions.txt): \u0b49>\u0b4b = VOWEL SIGN CANDRA O>VOWEL SIGN O
// \ue04b>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (\u0b4b = VOWEL SIGN O)
"\ue04c>\u0b4c;" // VOWEL SIGN AU
"\ue04d>\u0b4d;" // SIGN VIRAMA
"\ue050>\u0b13\u0b01;" // REMAP (indicExceptions.txt): \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
// \ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
"\ue056>\u0b56;" // AI LENGTH MARK
"\ue057>\u0b57;" // AU LENGTH MARK
"\ue059>\u0b16\u0b3c;" // REMAP (indicExceptions.txt): \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
"\ue05a>\u0b17\u0b3c;" // REMAP (indicExceptions.txt): \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
"\ue05b>\u0b1c\u0b3c;" // REMAP (indicExceptions.txt): \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
"\ue05d>\u0b5d;" // LETTER RHA
"\ue05e>\u0b2b\u0b3c;" // REMAP (indicExceptions.txt): \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
"\ue05f>\u0b5f;" // LETTER YYA
"\ue060>\u0b60;" // LETTER VOCALIC RR
"\ue061>\u0b61;" // LETTER VOCALIC LL
"\ue062>\u0b56\u0b3c;" // REMAP (indicExceptions.txt): \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
"\ue063>\u0b57\u0b3c;" // REMAP (indicExceptions.txt): \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
"\ue066>\u0b66;" // DIGIT ZERO
"\ue067>\u0b67;" // DIGIT ONE
"\ue068>\u0b68;" // DIGIT TWO
"\ue069>\u0b69;" // DIGIT THREE
"\ue06a>\u0b6a;" // DIGIT FOUR
"\ue06b>\u0b6b;" // DIGIT FIVE
"\ue06c>\u0b6c;" // DIGIT SIX
"\ue06d>\u0b6d;" // DIGIT SEVEN
"\ue06e>\u0b6e;" // DIGIT EIGHT
"\ue06f>\u0b6f;" // DIGIT NINE
"\ue070>\u0b70;" // ISSHAR
"\ue00e>\u0b0f;" // LETTER E
"\ue013>\u0b13;" // LETTER O
"\ue031>\u0b5c;" // LETTER RRA
"\ue047>\u0b47;" // VOWEL SIGN E
"\ue04b>\u0b4b;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Tamil.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Tamil
translit_InterIndic_Tamil {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Tamil.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Tamil
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:01 2001
//--------------------------------------------------------------------
// InterIndic-Tamil
//:: NFD (NFC) ;
// \ue001>; # UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU
"\ue002>\u0b82;" // SIGN ANUSVARA
"\ue003>\u0b83;" // SIGN VISARGA
"\ue005>\u0b85;" // LETTER A
"\ue006>\u0b86;" // LETTER AA
"\ue007>\u0b87;" // LETTER I
"\ue008>\u0b88;" // LETTER II
"\ue009>\u0b89;" // LETTER U
"\ue00a>\u0b8a;" // LETTER UU
"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
"\ue00c>\u0b87;" // REMAP (indicExceptions.txt): \u0b8c>\u0b87 = LETTER VOCALIC L>LETTER I
"\ue00f>\u0b8f;" // LETTER EE
"\ue010>\u0b90;" // LETTER AI
"\ue013>\u0b93;" // LETTER OO
"\ue014>\u0b94;" // LETTER AU
"\ue015>\u0b95;" // LETTER KA
"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
"\ue019>\u0b99;" // LETTER NGA
"\ue01a>\u0b9a;" // LETTER CA
"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
"\ue01c>\u0b9c;" // LETTER JA
"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
"\ue01e>\u0b9e;" // LETTER NYA
"\ue01f>\u0b9f;" // LETTER TTA
"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
"\ue023>\u0ba3;" // LETTER NNA
"\ue024>\u0ba4;" // LETTER TA
"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
"\ue028>\u0ba8;" // LETTER NA
"\ue029>\u0ba9;" // LETTER NNNA
"\ue02a>\u0baa;" // LETTER PA
"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
"\ue02e>\u0bae;" // LETTER MA
"\ue02f>\u0baf;" // LETTER YA
"\ue030>\u0bb0;" // LETTER RA
"\ue032>\u0bb2;" // LETTER LA
"\ue033>\u0bb3;" // LETTER LLA
"\ue034>\u0bb4;" // LETTER LLLA
"\ue035>\u0bb5;" // LETTER VA
"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
"\ue037>\u0bb7;" // LETTER SSA
"\ue038>\u0bb8;" // LETTER SA
"\ue039>\u0bb9;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Tamil: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA
"\ue03e>\u0bbe;" // VOWEL SIGN AA
"\ue03f>\u0bbf;" // VOWEL SIGN I
"\ue040>\u0bc0;" // VOWEL SIGN II
"\ue041>\u0bc1;" // VOWEL SIGN U
"\ue042>\u0bc2;" // VOWEL SIGN UU
"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
"\ue047>\u0bc7;" // VOWEL SIGN EE
"\ue048>\u0bc8;" // VOWEL SIGN AI
"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
"\ue04b>\u0bcb;" // VOWEL SIGN OO
"\ue04c>\u0bcc;" // VOWEL SIGN AU
"\ue04d>\u0bcd;" // SIGN VIRAMA
"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
// \ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0bd7;" // AU LENGTH MARK
"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
"\ue061>\u0b88;" // REMAP (indicExceptions.txt): \u0be1>\u0b88 = LETTER VOCALIC LL>LETTER II
// \ue062>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L
// \ue063>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL
// \ue066>; # UNMAPPED InterIndic-Tamil: DIGIT ZERO
"\ue067>\u0be7;" // DIGIT ONE
"\ue068>\u0be8;" // DIGIT TWO
"\ue069>\u0be9;" // DIGIT THREE
"\ue06a>\u0bea;" // DIGIT FOUR
"\ue06b>\u0beb;" // DIGIT FIVE
"\ue06c>\u0bec;" // DIGIT SIX
"\ue06d>\u0bed;" // DIGIT SEVEN
"\ue06e>\u0bee;" // DIGIT EIGHT
"\ue06f>\u0bef;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR
"\ue00e>\u0b8e;" // LETTER E
"\ue012>\u0b92;" // LETTER O
"\ue031>\u0bb1;" // LETTER RRA
"\ue046>\u0bc6;" // VOWEL SIGN E
"\ue04a>\u0bca;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,134 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_InterIndic_Telugu.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// InterIndic_Telugu
translit_InterIndic_Telugu {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Telugu.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// InterIndic_Telugu
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:01 2001
//--------------------------------------------------------------------
// InterIndic-Telugu
//:: NFD (NFC) ;
"\ue001>\u0c01;" // SIGN CANDRABINDU
"\ue002>\u0c02;" // SIGN ANUSVARA
"\ue003>\u0c03;" // SIGN VISARGA
"\ue005>\u0c05;" // LETTER A
"\ue006>\u0c06;" // LETTER AA
"\ue007>\u0c07;" // LETTER I
"\ue008>\u0c08;" // LETTER II
"\ue009>\u0c09;" // LETTER U
"\ue00a>\u0c0a;" // LETTER UU
"\ue00b>\u0c0b;" // LETTER VOCALIC R
"\ue00c>\u0c0c;" // LETTER VOCALIC L
"\ue00f>\u0c0f;" // LETTER EE
"\ue010>\u0c10;" // LETTER AI
"\ue013>\u0c13;" // LETTER OO
"\ue014>\u0c14;" // LETTER AU
"\ue015>\u0c15;" // LETTER KA
"\ue016>\u0c16;" // LETTER KHA
"\ue017>\u0c17;" // LETTER GA
"\ue018>\u0c18;" // LETTER GHA
"\ue019>\u0c19;" // LETTER NGA
"\ue01a>\u0c1a;" // LETTER CA
"\ue01b>\u0c1b;" // LETTER CHA
"\ue01c>\u0c1c;" // LETTER JA
"\ue01d>\u0c1d;" // LETTER JHA
"\ue01e>\u0c1e;" // LETTER NYA
"\ue01f>\u0c1f;" // LETTER TTA
"\ue020>\u0c20;" // LETTER TTHA
"\ue021>\u0c21;" // LETTER DDA
"\ue022>\u0c22;" // LETTER DDHA
"\ue023>\u0c23;" // LETTER NNA
"\ue024>\u0c24;" // LETTER TA
"\ue025>\u0c25;" // LETTER THA
"\ue026>\u0c26;" // LETTER DA
"\ue027>\u0c27;" // LETTER DHA
"\ue028>\u0c28;" // LETTER NA
"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
"\ue02a>\u0c2a;" // LETTER PA
"\ue02b>\u0c2b;" // LETTER PHA
"\ue02c>\u0c2c;" // LETTER BA
"\ue02d>\u0c2d;" // LETTER BHA
"\ue02e>\u0c2e;" // LETTER MA
"\ue02f>\u0c2f;" // LETTER YA
"\ue030>\u0c30;" // LETTER RA
"\ue032>\u0c32;" // LETTER LA
"\ue033>\u0c33;" // LETTER LLA
"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
"\ue035>\u0c35;" // LETTER VA
"\ue036>\u0c36;" // LETTER SHA
"\ue037>\u0c37;" // LETTER SSA
"\ue038>\u0c38;" // LETTER SA
"\ue039>\u0c39;" // LETTER HA
// \ue03c>; # UNMAPPED InterIndic-Telugu: SIGN NUKTA
// \ue03d>; # UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA
"\ue03e>\u0c3e;" // VOWEL SIGN AA
"\ue03f>\u0c3f;" // VOWEL SIGN I
"\ue040>\u0c40;" // VOWEL SIGN II
"\ue041>\u0c41;" // VOWEL SIGN U
"\ue042>\u0c42;" // VOWEL SIGN UU
"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R
"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E
"\ue047>\u0c47;" // VOWEL SIGN EE
"\ue048>\u0c48;" // VOWEL SIGN AI
"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\ue04b>\u0c4b;" // VOWEL SIGN OO
"\ue04c>\u0c4c;" // VOWEL SIGN AU
"\ue04d>\u0c4d;" // SIGN VIRAMA
"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
"\ue055>\u0c55;" // LENGTH MARK
"\ue056>\u0c56;" // AI LENGTH MARK
"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
"\ue060>\u0c60;" // LETTER VOCALIC RR
"\ue061>\u0c61;" // LETTER VOCALIC LL
"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
"\ue066>\u0c66;" // DIGIT ZERO
"\ue067>\u0c67;" // DIGIT ONE
"\ue068>\u0c68;" // DIGIT TWO
"\ue069>\u0c69;" // DIGIT THREE
"\ue06a>\u0c6a;" // DIGIT FOUR
"\ue06b>\u0c6b;" // DIGIT FIVE
"\ue06c>\u0c6c;" // DIGIT SIX
"\ue06d>\u0c6d;" // DIGIT SEVEN
"\ue06e>\u0c6e;" // DIGIT EIGHT
"\ue06f>\u0c6f;" // DIGIT NINE
// \ue080>; # UNMAPPED InterIndic-Telugu: ISSHAR
"\ue00e>\u0c0e;" // LETTER E
"\ue012>\u0c12;" // LETTER O
"\ue031>\u0c31;" // LETTER RRA
"\ue046>\u0c46;" // VOWEL SIGN E
"\ue04a>\u0c4a;" // VOWEL SIGN O
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,117 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Kannada_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Kannada_InterIndic
translit_Kannada_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Kannada_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Kannada_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:05 2001
//--------------------------------------------------------------------
// Kannada-InterIndic
//:: NFD (NFC) ;
"\u0c82>\ue002;" // SIGN ANUSVARA
"\u0c83>\ue003;" // SIGN VISARGA
"\u0c85>\ue005;" // LETTER A
"\u0c86>\ue006;" // LETTER AA
"\u0c87>\ue007;" // LETTER I
"\u0c88>\ue008;" // LETTER II
"\u0c89>\ue009;" // LETTER U
"\u0c8a>\ue00a;" // LETTER UU
"\u0c8b>\ue00b;" // LETTER VOCALIC R
"\u0c8c>\ue00c;" // LETTER VOCALIC L
"\u0c8e>\ue00e;" // LETTER E
"\u0c8f>\ue00f;" // LETTER EE
"\u0c90>\ue010;" // LETTER AI
"\u0c92>\ue012;" // LETTER O
"\u0c93>\ue013;" // LETTER OO
"\u0c94>\ue014;" // LETTER AU
"\u0c95>\ue015;" // LETTER KA
"\u0c96>\ue016;" // LETTER KHA
"\u0c97>\ue017;" // LETTER GA
"\u0c98>\ue018;" // LETTER GHA
"\u0c99>\ue019;" // LETTER NGA
"\u0c9a>\ue01a;" // LETTER CA
"\u0c9b>\ue01b;" // LETTER CHA
"\u0c9c>\ue01c;" // LETTER JA
"\u0c9d>\ue01d;" // LETTER JHA
"\u0c9e>\ue01e;" // LETTER NYA
"\u0c9f>\ue01f;" // LETTER TTA
"\u0ca0>\ue020;" // LETTER TTHA
"\u0ca1>\ue021;" // LETTER DDA
"\u0ca2>\ue022;" // LETTER DDHA
"\u0ca3>\ue023;" // LETTER NNA
"\u0ca4>\ue024;" // LETTER TA
"\u0ca5>\ue025;" // LETTER THA
"\u0ca6>\ue026;" // LETTER DA
"\u0ca7>\ue027;" // LETTER DHA
"\u0ca8>\ue028;" // LETTER NA
"\u0caa>\ue02a;" // LETTER PA
"\u0cab>\ue02b;" // LETTER PHA
"\u0cac>\ue02c;" // LETTER BA
"\u0cad>\ue02d;" // LETTER BHA
"\u0cae>\ue02e;" // LETTER MA
"\u0caf>\ue02f;" // LETTER YA
"\u0cb0>\ue030;" // LETTER RA
"\u0cb1>\ue031;" // LETTER RRA
"\u0cb2>\ue032;" // LETTER LA
"\u0cb3>\ue033;" // LETTER LLA
"\u0cb5>\ue035;" // LETTER VA
"\u0cb6>\ue036;" // LETTER SHA
"\u0cb7>\ue037;" // LETTER SSA
"\u0cb8>\ue038;" // LETTER SA
"\u0cb9>\ue039;" // LETTER HA
"\u0cbe>\ue03e;" // VOWEL SIGN AA
"\u0cbf>\ue03f;" // VOWEL SIGN I
"\u0cc0>\ue040;" // VOWEL SIGN II
"\u0cc1>\ue041;" // VOWEL SIGN U
"\u0cc2>\ue042;" // VOWEL SIGN UU
"\u0cc3>\ue043;" // VOWEL SIGN VOCALIC R
"\u0cc4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0cc6>\ue046;" // VOWEL SIGN E
"\u0cc7>\ue047;" // VOWEL SIGN EE
"\u0cc8>\ue048;" // VOWEL SIGN AI
"\u0cca>\ue04a;" // VOWEL SIGN O
"\u0ccb>\ue04b;" // VOWEL SIGN OO
"\u0ccc>\ue04c;" // VOWEL SIGN AU
"\u0ccd>\ue04d;" // SIGN VIRAMA
"\u0cd5>\ue055;" // LENGTH MARK
"\u0cd6>\ue056;" // AI LENGTH MARK
"\u0cde>\ue05e;" // LETTER FA
"\u0ce0>\ue060;" // LETTER VOCALIC RR
"\u0ce1>\ue061;" // LETTER VOCALIC LL
"\u0ce6>\ue066;" // DIGIT ZERO
"\u0ce7>\ue067;" // DIGIT ONE
"\u0ce8>\ue068;" // DIGIT TWO
"\u0ce9>\ue069;" // DIGIT THREE
"\u0cea>\ue06a;" // DIGIT FOUR
"\u0ceb>\ue06b;" // DIGIT FIVE
"\u0cec>\ue06c;" // DIGIT SIX
"\u0ced>\ue06d;" // DIGIT SEVEN
"\u0cee>\ue06e;" // DIGIT EIGHT
"\u0cef>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,319 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Latin_InterIndic.txt
// Date: Thu Oct 25 22:17:21 2001
//--------------------------------------------------------------------
// Latin_InterIndic
translit_Latin_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 2001-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Latin-InterIndic
//:: NFD;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
//\u0950>\ue050; # OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
//DEBUG: $consonants=[$ka-$ha $virama];
"\u0315 > $avagraha;"
"'-'\u0303>$chandrabindu$anusvara;"
"'-'m\u0310>$chandrabindu;"
"'-'h\u0323>$visarga;"
"mm>$anusvara;"
"x>$visarga;"
"aa>$waa;"
"a\u0304>$waa;"
"ai>$wai;"
"au>$wau;"
"ii>$wii;"
"i\u0304>$wii;"
"i>$wi;"
"uu>$wuu;"
"u\u0304>$wuu;"
"u>$wu;"
"rrh>$wrr;"
"r\u0325\u0304>$wrr;"
"rh>$wr;"
"r\u0325>$wr;"
"l\u0325\u0304>$wll;"
"lh>$wl;l\u0325>$wl;"
"e\u0304>$we;"
"o\u0304>$wo;"
"a>$wa;"
"e\u0306>$wce;"
"o\u0306>$wco;"
"e>$wse;"
"o>$wso;"
"n}na > $na|$virama;"
"n\u0307}[kg] > $anusvara;"
"n\u0307}n\u0303 > $anusvara;"
"n\u0304}[cj] > $anusvara;"
"n\u0304}n\u0307 > $anusvara;"
"n\u0323}[tdn]\u0323 > $anusvara;"
"n}[tdn] > $anusvara;"
"m}[pbm] > $anusvara;"
"n} [yrlvsh] > $anusvara;"
"'-'m\u0307 > $anusvara;"
"y\u0307>$uya|$virama;"
"l\u0331>$ela|$virama;"
"n\u0331>$ena|$virama;"
"n\u0307>$nga|$virama;"
"n\u0303>$nya|$virama;"
"n\u0323>$nna|$virama;"
"t\u0323h>$ttha|$virama;"
"t\u0323>$tta|$virama;"
"r\u0323h>$udha|$virama;"
"r\u0323>$uddha|$virama;"
"d\u0323h>$ddha|$virama;"
"d\u0323>$dda|$virama;"
"kh>$kha|$virama;"
"k>$ka|$virama;"
"q>$ka|$virama;"
"gh>$gha|$virama;"
"g>$ga|$virama;"
"ch>$cha|$virama;"
"c>$ca|$virama;"
"jh>$jha|$virama;"
"j>$ja|$virama;"
"ny>$nya|$virama;"
"tth>$ttha|$virama;"
"ddh>$ddha|$virama;"
"th>$tha|$virama;"
"t>$ta|$virama;"
"dh>$dha|$virama;"
"d>$da|$virama;"
"n>$na|$virama;"
"ph>$pha|$virama;"
"p>$pa|$virama;"
"bh>$bha|$virama;"
"b>$ba|$virama;"
"m>$ma|$virama;"
"y>$ya|$virama;"
"r>$ra|$virama;"
"l\u0323a>$lla;"
"l>$la|$virama;"
"v>$va|$virama;"
"f>$va|$virama;"
"w>$va|$virama;"
"sh>$sha|$virama;"
"ss>$ssa|$virama;"
"s\u0323>$ssa|$virama;"
"s\u0301>$sha|$virama;"
"s>$sa|$virama;"
"z>$sa|$virama;"
"h>$ha|$virama;"
"'.'>$danda;"
"$danda'.'>$doubleDanda;"
"$depVowelAbove{'~'>$anusvara;"
"$depVowelBelow{'~'>$chandrabindu;"
"$virama aa>$aa;"
"$virama a\u0304>$aa;"
"$virama ai>$ai;"
"$virama au>$au;"
"$virama ii>$ii;"
"$virama i\u0304>$ii;"
"$virama i>$i;"
"$virama uu>$uu;"
"$virama u\u0304>$uu;"
"$virama u>$u;"
"$virama rrh>$rrh;"
"$virama r\u0325\u0304>$rrh;"
"$virama rh>$rh;"
"$virama r\u0325a>$rh;"
"$virama r\u0325>$rh;"
"$virama l\u0325\u0304>$llh;"
"$virama lh>$lh;"
"$virama l\u0325>$lh;"
"$virama e\u0304>$e;"
"$virama o\u0304>$o;"
"$virama a>;"
"$virama e\u0306>$ce;"
"$virama o\u0306>$co;"
"$virama e>$se;"
"$virama o>$so;"
"$virama''aa>$waa;"
"$virama''a\u0304>$waa;"
"$virama''ai>$wai;"
"$virama''au>$wau;"
"$virama''ii>$wii;"
"$virama''i\u0304>$wii;"
"$virama''i>$wi;"
"$virama''uu>$wuu;"
"$virama''u\u0304>$wuu;"
"$virama''u>$wu;"
"$virama''rrh>$wrr;"
"$virama''r\u0325\u0304>$wrr;"
"$virama''rh>$wr;"
"$virama''r\u0325>$wr;"
"$virama''l\u0325\u0304>$wll;"
"$virama''lh>$wl;"
"$virama''l\u0325>$wl;"
"$virama''e\u0304>$we;"
"$virama''o\u0304>$wo;"
"$virama''a>$wa;"
"$virama''e\u0306>$wce;"
"$virama''o\u0306>$wco;"
"$virama''e>$wse;"
"$virama''o>$wso;"
"$virama } [$z] > $virama;"
"$virama } ' ' > $virama ;"
"$virama}$endThing>;"
"0>$zero;"
"1>$one;"
"2>$two;"
"3>$three;"
"4>$four;"
"5>$five;"
"6>$six;"
"7>$seven;"
"8>$eight;"
"9>$nine;"
"''>;"
//:: NFC (NFD) ;
}
}

View File

@ -0,0 +1,528 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Latin_Jamo.utf8.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Latin_Jamo
translit_Latin_Jamo {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Latin-Jamo
// Transliteration from Latin characters to Korean script is done in
// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
// transliteration is done algorithmically following Unicode 3.0
// section 3.11. This file implements the Latin to Jamo
// transliteration using rules.
// Jamo occupy the block 1100-11FF. Within this block there are three
// groups of characters: initial consonants or choseong (I), medial
// vowels or jungseong (M), and trailing consonants or jongseong (F).
// Standard Korean syllables are of the form I+M+F*.
// Section 3.11 describes the use of 'filler' jamo to convert
// nonstandard syllables to standard form: the choseong filler 115F and
// the junseong filler 1160. In this transliterator, we will not use
// 115F or 1160.
// We will, however, insert two 'null' jamo to make foreign words
// conform to Korean syllable structure. These are the null initial
// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
// we will use the hyphen in order to disambiguate strings,
// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
// We will not use all of the characters in the jamo block. We will
// only use the 19 initials, 21 medials, and 27 finals possessing a
// jamo short name as defined in section 4.4 of the Unicode book.
// Rules of thumb. These guidelines provide the basic framework
// for the rules. They are phrased in terms of Latin-Jamo transliteration.
// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
// just context-free transliteration of jamo to corresponding short names,
// with the addition of hyphens to maintain round-trip integrity
// in the context of the Latin-Jamo rules.
// A sequence of vowels:
// - Take the longest sequence you can. If there are too many, or you don't
// have a starting consonant, introduce a 110B necessary.
// A sequence of consonants.
// - First join the double consonants: G + G -> GG
// - In the remaining list,
// -- If there is no preceding vowel, take the first consonant, and insert EU
// after it. Continue with the rest of the consonants.
// -- If there is one consonant, attach to the following vowel
// -- If there are two consonants and a following vowel, attach one to the
// preceeding vowel, and one to the following vowel.
// -- If there are more than two consonants, join the first two together if you
// can: L + G => LG
// -- If you still end up with more than 2 consonants, insert EU after the
// first one, and continue with the rest of the consonants.
//----------------------------------------------------------------------
// Variables
// Some latin consonants or consonant pairs only occur as initials, and
// some only as finals, but some occur as both. This makes some jamo
// consonants ambiguous when transliterated into latin.
// Initial only: IEUNG BB DD JJ R
// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
// Initial and Final: B C D G GG H J K M N P S SS T
"$Gi = \u1100;"
"$GGi = \u1101;"
"$Ni = \u1102;"
"$Di = \u1103;"
"$DD = \u1104;"
"$R = \u1105;"
"$Mi = \u1106;"
"$Bi = \u1107;"
"$BB = \u1108;"
"$Si = \u1109;"
"$SSi = \u110A;"
"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
"$Ji = \u110C;"
"$JJ = \u110D;"
"$Ci = \u110E;"
"$Ki = \u110F;"
"$Ti = \u1110;"
"$Pi = \u1111;"
"$Hi = \u1112;"
"$A = \u1161;"
"$AE = \u1162;"
"$YA = \u1163;"
"$YAE = \u1164;"
"$EO = \u1165;"
"$E = \u1166;"
"$YEO = \u1167;"
"$YE = \u1168;"
"$O = \u1169;"
"$WA = \u116A;"
"$WAE = \u116B;"
"$OE = \u116C;"
"$YO = \u116D;"
"$U = \u116E;"
"$WEO = \u116F;"
"$WE = \u1170;"
"$WI = \u1171;"
"$YU = \u1172;"
"$EU = \u1173;" // null medial, inserted during Latin-Jamo
"$YI = \u1174;"
"$I = \u1175;"
"$Gf = \u11A8;"
"$GGf = \u11A9;"
"$GS = \u11AA;"
"$Nf = \u11AB;"
"$NJ = \u11AC;"
"$NH = \u11AD;"
"$Df = \u11AE;"
"$L = \u11AF;"
"$LG = \u11B0;"
"$LM = \u11B1;"
"$LB = \u11B2;"
"$LS = \u11B3;"
"$LT = \u11B4;"
"$LP = \u11B5;"
"$LH = \u11B6;"
"$Mf = \u11B7;"
"$Bf = \u11B8;"
"$BS = \u11B9;"
"$Sf = \u11BA;"
"$SSf = \u11BB;"
"$NG = \u11BC;"
"$Jf = \u11BD;"
"$Cf = \u11BE;"
"$Kf = \u11BF;"
"$Tf = \u11C0;"
"$Pf = \u11C1;"
"$Hf = \u11C2;"
"$jamoInitial = [\u1100-\u1112];"
"$jamoMedial = [\u1161-\u1175];"
"$latinInitial = [bcdghjkmnprst];"
// Any character in the latin transliteration of a medial
"$latinMedial = [aeiouwy];"
// The last character of the latin transliteration of a medial
"$latinMedialEnd = [aeiou];"
//----------------------------------------------------------------------
// Jamo-Latin
// Jamo to latin is relatively simple, since it is the latin that is
// ambiguous. Most rules are straightforward, and we encode them below
// as simple add-on back rule, e.g.:
// $jamoMedial {bs} > $BS;
// becomes
// $jamoMedial {bs} <> $BS;
// Furthermore, we don't care about the ordering for Jamo-Latin because
// we are going from single characters, so we can very easily piggyback
// on the Latin-Jamo.
// The main issue with Jamo-Latin is when to insert hyphens.
// Hyphens are inserted to obtain correct round trip behavior. For
// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
// would then round trip to Ki A GGi E. To prevent this, we insert a
// hyphen: "kag-ge". IMPORTANT: The need for hyphens depends
// very specifically on the behavior of the Latin-Jamo rules. A change
// in the Latin-Jamo behavior can completely change the way the
// hyphen insertion must be done.
// First try to preserve actual hyphens in the jamo text by doubling
// them. This fixes problems like:
// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
// -- if we don't care about losing hyphens in the jamo, we can delete
// this rule.
"'--' <> '-';"
// Triple consonants. For three consonants "axxx" we insert a
// hyphen between the first and second "x" if XXf, Xf, and Xi all
// exist, and we have A Xf XXi. This prevents the reverse
// transliteration to A XXf Xi.
"'-' < $latinMedialEnd g {} $GGi;"
"'-' < $latinMedialEnd s {} $SSi;"
// For vowels the rule is similar. If there is a vowel "ae" such that
// "a" by itself and "e" by itself are vowels, then we want to map A E
// to "a-e" so as not to round trip to AE. However, in the text Ki EO
// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
// tested. NOTE: These rules used to have a left context of
// $latinInitial instead of [^$latinMedial]. The problem with this is
// sequences where an initial IEUNG is transliterated away:
// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
"'-' < [^$latinMedial] [y w] e {} [$O $OE];"
"'-' < [^$latinMedial] e {} [$O $OE $U];"
"'-' < [^$latinMedial] [o a] {} [$E $EO $EU];"
"'-' < [^$latinMedial] [w y] a {} [$E $EO $EU];"
// Similar to the above, but with an intervening $IEUNG.
"'-' < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
"'-' < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
"'-' < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
"'-' < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"
// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
// where Xi also exists, must be transliterated as "ax-e" to prevent
// the round trip conversion to A Xi E.
"'-' < $latinMedialEnd b {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd c {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd d {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd h {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd j {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd k {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd m {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd p {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd t {} $IEUNG $jamoMedial;"
// Double finals followed by IEUNG. Similar to the single finals
// followed by IEUNG. Any latin consonant pair X Y, between medials,
// that we would split by Latin-Jamo, we must handle when it occurs as
// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
// E.
"'-' < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
"'-' < $latinMedialEnd s s {} $IEUNG $jamoMedial;"
// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
// we transliterate as "ax-xe" to prevent round trip transliteration as
// A XXi E.
"'-' < $latinMedialEnd b {} $Bi $jamoMedial;"
"'-' < $latinMedialEnd d {} $Di $jamoMedial;"
"'-' < $latinMedialEnd j {} $Ji $jamoMedial;"
"'-' < $latinMedialEnd g {} $Gi $jamoMedial;"
"'-' < $latinMedialEnd s {} $Si $jamoMedial;"
// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
// "xyy" forms that correspond to XYf Yi must be transliterated as
// "xy-y".
"'-' < $latinMedialEnd b s {} [$Si $SSi];"
"'-' < $latinMedialEnd g s {} [$Si $SSi];"
"'-' < $latinMedialEnd l b {} [$Bi $BB];"
"'-' < $latinMedialEnd l g {} [$Gi $GGi];"
"'-' < $latinMedialEnd l s {} [$Si $SSi];"
"'-' < $latinMedialEnd n g {} [$Gi $GGi];"
"'-' < $latinMedialEnd n j {} [$Ji $JJ];"
// Deletion of IEUNG is handled below.
//----------------------------------------------------------------------
// Latin-Jamo
// [Basic, context-free Jamo-Latin rules are embedded here too. See
// above.]
// Split digraphs: Text of the form 'axye', where 'xy' is a final
// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
// 'e' are medials, we want to transliterate this as A Xf Yi E rather
// than A XYf IEUNG E. We do NOT include text of the form "axxe",
// since that is handled differently below. These rules are generated
// programmatically from the jamo data.
"$jamoMedial {b s} $latinMedial > $Bf $Si;"
"$jamoMedial {g s} $latinMedial > $Gf $Si;"
"$jamoMedial {l b} $latinMedial > $L $Bi;"
"$jamoMedial {l g} $latinMedial > $L $Gi;"
"$jamoMedial {l h} $latinMedial > $L $Hi;"
"$jamoMedial {l m} $latinMedial > $L $Mi;"
"$jamoMedial {l p} $latinMedial > $L $Pi;"
"$jamoMedial {l s} $latinMedial > $L $Si;"
"$jamoMedial {l t} $latinMedial > $L $Ti;"
"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
"$jamoMedial {n j} $latinMedial > $Nf $Ji;"
// Single consonants are initials: Text of the form 'axe', where 'x'
// can be an initial or a final, and 'a' and 'e' are medials, we want
// to transliterate as A Xi E rather than A Xf IEUNG E.
"$jamoMedial {b} $latinMedial > $Bi;"
"$jamoMedial {c} $latinMedial > $Ci;"
"$jamoMedial {d} $latinMedial > $Di;"
"$jamoMedial {g} $latinMedial > $Gi;"
"$jamoMedial {h} $latinMedial > $Hi;"
"$jamoMedial {j} $latinMedial > $Ji;"
"$jamoMedial {k} $latinMedial > $Ki;"
"$jamoMedial {m} $latinMedial > $Mi;"
"$jamoMedial {n} $latinMedial > $Ni;"
"$jamoMedial {p} $latinMedial > $Pi;"
"$jamoMedial {s} $latinMedial > $Si;"
"$jamoMedial {t} $latinMedial > $Ti;"
// Doubled initials. The sequence "axxe", where XX exists as an initial
// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
// to transliterate as A XXi E, rather than split to A Xf Xi E.
"$jamoMedial {b b} $latinMedial > $BB;"
"$jamoMedial {d d} $latinMedial > $DD;"
"$jamoMedial {j j} $latinMedial > $JJ;"
"$jamoMedial {g g} $latinMedial > $GGi;"
"$jamoMedial {s s} $latinMedial > $SSi;"
// XYY. Because doubled consonants bind more strongly than XY
// consonants, we must handle the sequence "axyy" specially. Here XYf
// and YYi must exist. In these cases, we map to Xf YYi rather than
// XYf.
"$jamoMedial {b} s s > $Bf;"
"$jamoMedial {g} s s > $Gf;"
"$jamoMedial {l} b b > $L;"
"$jamoMedial {l} g g > $L;"
"$jamoMedial {l} s s > $L;"
"$jamoMedial {n} g g > $Nf;"
"$jamoMedial {n} j j > $Nf;"
// Finals: Attach consonant with preceding medial to preceding medial.
// Do this BEFORE mapping consonants to initials. Longer keys must
// precede shorter keys that they start with, e.g., the rule for 'bs'
// must precede 'b'.
// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
// block for Jamo-Latin.]
"$jamoMedial {bs} <> $BS;"
"$jamoMedial {b} <> $Bf;"
"$jamoMedial {c} <> $Cf;"
"$jamoMedial {d} <> $Df;"
"$jamoMedial {gg} <> $GGf;"
"$jamoMedial {gs} <> $GS;"
"$jamoMedial {g} <> $Gf;"
"$jamoMedial {h} <> $Hf;"
"$jamoMedial {j} <> $Jf;"
"$jamoMedial {k} <> $Kf;"
"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
"$jamoMedial {lh} <> $LH;"
"$jamoMedial {lm} <> $LM;"
"$jamoMedial {lp} <> $LP;"
"$jamoMedial {ls} <> $LS;"
"$jamoMedial {lt} <> $LT;"
"$jamoMedial {l} <> $L;"
"$jamoMedial {m} <> $Mf;"
"$jamoMedial {ng} <> $NG;"
"$jamoMedial {nh} <> $NH;"
"$jamoMedial {nj} <> $NJ;"
"$jamoMedial {n} <> $Nf;"
"$jamoMedial {p} <> $Pf;"
"$jamoMedial {ss} <> $SSf;"
"$jamoMedial {s} <> $Sf;"
"$jamoMedial {t} <> $Tf;"
// Initials: Attach single consonant to following medial. Do this
// AFTER mapping finals. Longer keys must precede shorter keys that
// they start with, e.g., the rule for 'gg' must precede 'g'.
// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
// this block for Jamo-Latin.]
"{gg} $latinMedial <> $GGi;"
"{g} $latinMedial <> $Gi;"
"{n} $latinMedial <> $Ni;"
"{dd} $latinMedial <> $DD;"
"{d} $latinMedial <> $Di;"
"{r} $latinMedial <> $R;"
"{m} $latinMedial <> $Mi;"
"{bb} $latinMedial <> $BB;"
"{b} $latinMedial <> $Bi;"
"{ss} $latinMedial <> $SSi;"
"{s} $latinMedial <> $Si;"
"{jj} $latinMedial <> $JJ;"
"{j} $latinMedial <> $Ji;"
"{c} $latinMedial <> $Ci;"
"{k} $latinMedial <> $Ki;"
"{t} $latinMedial <> $Ti;"
"{p} $latinMedial <> $Pi;"
"{h} $latinMedial <> $Hi;"
// 'r' in final position. Because of the equivalency of the 'l' and
// 'r' jamo (the glyphs are the same), we try to provide the same
// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
// below. If we see an 'r' in an apparent final position, treat it
// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
// Instead, we want Ki A L Ki A.
"$jamoMedial {r} $latinInitial > | l;"
// Initial + Final: If we match the next rule, we have initial then
// final consonant with no intervening medial. We insert the null
// vowel BEFORE it to create a well-formed syllable. (In the next rule
// we insert a null vowel AFTER an anomalous initial.)
"$jamoInitial {} [bcdghjklmnpst] > $EU;"
// Initial + X: This block matches an initial consonant not followed by
// a medial. We insert the null vowel after it. We handle double
// initials explicitly here; for single initial consonants we insert EU
// (as Latin) after them and let standard rules do the rest.
// BREAKS ROUND TRIP INTEGRITY
"gg > $GGi $EU;"
"dd > $DD $EU;"
"bb > $BB $EU;"
"ss > $SSi $EU;"
"jj > $JJ $EU;"
"([bcdghjkmnprst]) > | $1 eu;"
// X + Final: Finally we have to deal with a consonant that can only be
// interpreted as a final (not an initial) and which is preceded
// neither by an initial nor a medial. It is the start of the
// syllable, but cannot be. Most of these will already be handled by
// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
// For this isolated case, we could add a null initial and medial,
// which would give "la" => IEUNG EU L IEUNG A, for example. A more
// economical solution is to transliterate isolated "l" (that is,
// initial "l") to "r". (Other similar conversions of consonants that
// occur neither as initials nor as finals are handled below.)
"l > | r;"
// Medials. If a medial is preceded by an initial, then we proceed
// normally. As usual, longer keys must precede shorter ones.
// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
// this block for Jamo-Latin.]
"$jamoInitial {ae} <> $AE;"
"$jamoInitial {a} <> $A;"
"$jamoInitial {eo} <> $EO;"
"$jamoInitial {eu} <> $EU;"
"$jamoInitial {e} <> $E;"
"$jamoInitial {i} <> $I;"
"$jamoInitial {oe} <> $OE;"
"$jamoInitial {o} <> $O;"
"$jamoInitial {u} <> $U;"
"$jamoInitial {wae} <> $WAE;"
"$jamoInitial {wa} <> $WA;"
"$jamoInitial {weo} <> $WEO;"
"$jamoInitial {we} <> $WE;"
"$jamoInitial {wi} <> $WI;"
"$jamoInitial {yae} <> $YAE;"
"$jamoInitial {ya} <> $YA;"
"$jamoInitial {yeo} <> $YEO;"
"$jamoInitial {ye} <> $YE;"
"$jamoInitial {yi} <> $YI;"
"$jamoInitial {yo} <> $YO;"
"$jamoInitial {yu} <> $YU;"
// We may see an anomalous isolated 'w' or 'y'. In that case, we
// interpret it as 'wi' and 'yu', respectively.
// BREAKS ROUND TRIP INTEGRITY
"$jamoInitial {w} > | wi;"
"$jamoInitial {y} > | yu;"
// Otherwise, insert a null consonant IEUNG before the medial (which is
// still an untransliterated latin vowel).
"($latinMedial) > $IEUNG | $1;"
// Convert non-jamo latin consonants to equivalents. These occur as
// neither initials nor finals in jamo. 'l' occurs as a final, but not
// an initial; it is handled above. The following letters (left hand
// side) will never be output by Jamo-Latin.
"f > | p;"
"q > | k;"
"v > | b;"
"x > | ks;"
"z > | s;"
// Delete hyphens (Latin-Jamo).
"'-' > ;"
// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
// since these may also occur in text.
"< $IEUNG;"
// eof
}
}

View File

@ -0,0 +1,470 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Latin_Katakana.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Latin_Katakana
translit_Latin_Katakana {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Latn_Kana.txt,v $
// $Date: 2001/10/26 05:41:16 $
// $Revision: 1.1 $
//--------------------------------------------------------------------
"::NFD (NFC) ;"
":: [:Latin:] Lower ();"
// Uses modified Hepburn. Small changes to make unambiguous.
// | Kunrei-shiki: Hepburn/MHepburn
// | ------------------------------
// | si: shi
// | si ~ya: sha
// | si ~yu: shu
// | si ~yo: sho
// | zi: ji
// | zi ~ya: ja
// | zi ~yu: ju
// | zi ~yo: jo
// | ti: chi
// | ti ~ya: cha
// | ti ~yu: chu
// | ti ~yu: cho
// | tu: tsu
// | di: ji/dji
// | du: zu/dzu
// | hu: fu
// | For foreign words:
// | -----------------
// | se ~i si
// | si ~e she
// |
// | ze ~i zi
// | zi ~e je
// |
// | te ~i ti
// | ti ~e che
// | te ~u tu
// |
// | de ~i di
// | de ~u du
// | de ~i di
// |
// | he ~u: hu
// | hu ~a fa
// | hu ~i fi
// | hu ~e he
// | hu ~o ho
// Most small forms are generated, but if necessary
// explicit small forms are given with ~a, ~ya, etc.
//------------------------------------------------------
// Variables
"$vowel = [aeiou] ;"
"$macron = \u0304 ;"
// Variables used for doubled-consonants with tsu
"$kana = [\u3041-\u3094] ;"
"$voice = [\u3099\u309B];"
"$semivoice = [\u309A\u309C];"
"$k_start = [カキクケコかきくけこ] ;"
"$s_start = [サシスセソさしすせそ] ;"
"$j_start = [シし] $voice ;"
"$t_start = [タチツテトたちつてと] ;"
"$n_start = [ナニヌネノンなにぬねの] ;"
"$h_start = [ハヒヘホはひへほ] ;"
"$f_start = [フふ] ;"
"$m_start = [マミムメモまみむめも] ;"
"$y_start = [ヤユヨやゆよ] ;"
"$r_start = [ラリルレロらりるれろ] ;"
"$w_start = [ワヰヱヲわゐゑを] ;"
"$v_start = [ワヰヱヲ]゙ ;"
// if ン is followed by $n_quoter, then it needs an
// apostrophe after its romaji form to disambiguate it.
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ヤ ユ ヨ ン] ;"
"$small_y = [ャィュェョ] ;"
"$iteration = \u309D ;"
//------------------------------------------------------
// katakana rules
// Punctuation
"'.' <> 。;"
"',' <> 、;"
// ' ' } [a-z] > ; # delete spaces before latin
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
// Iteration Mark
// Copy previous letter & marks
// TODO
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
// Specials for katakana -- not shared with hiragana
"va <> ヷ ;"
"vi <> ヸ ;"
"ve <> ヹ ;"
"vo <> ヺ ;"
"'~ka' <> ヵ ;"
"'~ke' <> ヶ ;"
// ~~~ begin shared rules ~~~
//special
"ya < '~'ャ;"
"yi < '~'ィ ;"
"yu < '~'ュ;"
"ye < '~'ェ;"
"yo < '~'ョ;"
//normal
"a <> ア ;"
"b | '~' < ヒ ゙} $small_y ;"
"by } $vowel > ビ | '~y' ;"
"ba <> バ ;"
"bi <> ビ ;"
"bu <> ブ ;"
"be <> ベ ;"
"bo <> ボ ;"
"c } i > | s ;"
"c } e > | s ;"
"da <> ダ ;"
"di <> ディ ;"
"du <> デゥ ;"
"de <> デ ;"
"do <> ド ;"
"dzu <> ヅ ;"
"dja < ヂャ ;"
"dji'~i' < ヂィ ;" // liu
"dju < ヂュ ;"
"dje < ヂェ ;"
"djo < ヂョ ;"
"dji <> ヂ ;"
"dj } $vowel > ヂ | '~y' ;"
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
"cha < チャ ;"
"chi'~i' < チィ ;" // liu
"chu < チュ ;"
"che < チェ ;"
"cho < チョ ;"
"chi <> チ ;"
"ch } $vowel > チ | '~y' ;"
"e <> エ ;"
"g | '~' < ギ} $small_y ;"
"gy } $vowel > ギ | '~y' ;"
"ga <> ガ ;"
"gi <> ギ ;"
"gu <> グ ;"
"ge <> ゲ ;"
"go <> ゴ ;"
"i <> イ ;"
// j } $vowel > ジ | '~y' ;
"ja <> ジャ ;"
"ji'~i' < ジィ ;" // liu
"ju <> ジュ ;"
"je <> ジェ ;"
"jo <> ジョ ;"
"ji <> ジ ;"
"k | '~' < キ} $small_y ;"
"ky } $vowel > キ | '~y' ;"
"ka <> カ ;"
"ki <> キ ;"
"ku <> ク ;"
"ke <> ケ ;"
"ko <> コ ;"
"m | '~' < ミ} $small_y ;"
"my } $vowel > ミ | '~y' ;"
"ma <> マ ;"
"mi <> ミ ;"
"mu <> ム ;"
"me <> メ ;"
"mo <> モ ;"
"m } [pbfv] > ン ;"
"n | '~' < ニ } $small_y ;"
"ny } $vowel > ニ | '~y' ;"
"na <> ナ ;"
"ni <> ニ ;"
"nu <> ヌ ;"
"ne <> ネ ;"
"no <> ;"
"o <> オ ;"
"p | '~' < ピ } $small_y ;"
"py } $vowel > ピ | '~y' ;"
"pa <> パ ;"
"pi <> ピ ;"
"pu <> プ ;"
"pe <> ペ ;"
"po <> ポ ;"
"h | '~' < ヒ } $small_y ;"
"hy } $vowel > ヒ | '~y' ;"
"ha <> ハ ;"
"hi <> ヒ ;"
"hu <> ヘゥ ;"
"he <> ヘ ;"
"ho <> ホ ;"
// f | '~' < フ } $small_y ;
// f } $vowel > フ | '~' ;
"fa <> ファ ;"
"fi <> フィ ;"
"fe <> フェ ;"
"fo <> フォ ;"
"fu <> フ ;"
"r | '~' < リ } $small_y ;"
"ry } $vowel > リ | '~y' ;"
"ra <> ラ ;"
"ri <> リ ;"
"ru <> ル ;"
"re <> レ ;"
"ro <> ロ ;"
"za <> ザ ;"
"zi <> ゼィ ;"
"zu <> ズ ;"
"ze <> ゼ ;"
"zo <> ゾ ;"
"sa <> サ ;"
"si <> セィ ;"
"su <> ス ;"
"se <> セ ;"
"so <> ソ ;"
"sha < シャ ;"
"shi'~i' < シィ ;" // liu
"shu < シュ ;"
"she < シェ ;"
"sho < ショ ;"
"shi <> シ ;"
"sh } $vowel > シ | '~y' ;"
"ta <> タ ;"
"ti <> ティ ;"
"tu <> テゥ ;"
"te <> テ ;"
"to <> ト ;"
"tsu <> ツ ;"
// v } $vowel > ヴ | '~' ;
//'v~a' < ヴァ ; # liu
//'v~i' < ヴィ ; # liu
//'v~e' < ヴェ ; # liu
//'v~o' < ヴォ ; # liu
"vu <> ヴ ;"
"u <> ウ ;"
// w } $vowel > ウ | '~' ;
"wa <> ワ ;"
"wi <> ヰ ;"
"wu > ウ ;"
"we <> ヱ ;"
"wo <> ヲ ;"
"ya <> ヤ ;"
"yi > イ ;"
"yu <> ユ ;"
"ye > エ ;"
"yo <> ヨ ;"
// double consonants
//specials
"s } sh > ッ ;"
"t } ch > ッ ;"
//voiced
"j } j <> ッ } $j_start ;"
"b } b <> ッ } [$h_start$f_start] $voice;"
"d } d <> ッ } $t_start $voice;"
"g } g <> ッ } $k_start $voice;"
"p } p <> ッ } [$h_start$f_start] $semivoice;"
// v } v <> ッ } [ワヰウヱヲう] $voice ;
"z } z <> ッ } $s_start $voice;"
"v } v <> ッ } $v_start;"
// normal
"k } k <> ッ } $k_start ;"
"m } m <> ッ } $m_start ;"
"n } n <> ッ } $n_start ;"
"h } h <> ッ } $h_start ;"
"f } f <> ッ } $f_start ;"
"r } r <> ッ } $r_start ;"
"t } t <> ッ } $t_start ;"
"s } s <> ッ } $s_start ;"
"w } w <> ッ } $w_start;"
"y } y <> ッ } $y_start;"
// completeness
"x } x > ッ ;"
"c } k > ッ ;"
"c } c > ッ ;"
"c } q > ッ ;"
"l } l > ッ ;"
"q } q > ッ ;"
// y } y > ッ ;
// w } w > ッ ;
// prolonged vowel mark. this indicates a doubling of
// the preceding vowel sound
//a < a { ー ; # liu
//e < e { ー ; # liu
//i < i { ー ; # liu
//o < o { ー ; # liu
//u < u { ー ; # liu
"$macron <> ー ;"
// small forms
"'~a' <> ァ ;"
"'~i' <> ィ ;"
"'~u' <> ゥ ;"
"'~e' <> ェ ;"
"'~o' <> ォ ;"
"'~tsu' <> ッ ;"
"'~wa' <> ヮ ;"
"'~ya' <> ャ ;"
"'~yi' > ィ ;"
"'~yu' <> ュ ;"
"'~ye' > ェ ;"
"'~yo' <> ョ ;"
// h- rule: lengthens vowel if not followed by a vowel
"[aeiou] } h > ー ;"
// one-way latin- > kana rules. these do not occur in
// well-formed romaji representing actual japanese text.
// their purpose is to make all romaji map to kana of
// some sort.
// the following are not really necessary, but produce
// slightly more natural results.
"cy > セィ ;"
"dy > ディ ;"
"hy > ヒ ;"
"sy > セィ ;"
"ty > ティ ;"
"zy > ゼィ ;"
"h > ヘ ;"
// isolated consonants listed here so as not to mask
// longer rules above.
"ch > チ;"
"sh > シ ;"
"dz > ヅ ;"
"dj > ヂ;"
"b > ブ ;"
"d > デ ;"
"g > グ ;"
"k > ク ;"
"m > ム ;"
"n'' < ン } $n_quoter ;"
"n <> ン ;"
"p > プ ;"
"r > ル ;"
"s > ス ;"
"t > テ ;"
"y > イ ;"
"z > ズ ;"
"v > ヴ ;"
"f > フ;"
"j > ジ;"
"w > ウ;"
// simple substitutions using backup
"c > | k ;"
"l > | r ;"
"q > | k ;"
"x > | ks ;"
// ~~~ END shared rules ~~~
//------------------------------------------------------
// Final cleanup
"'~' > ;" // delete stray tildes between letters
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
":: NFC (NFD) ;"
// eof
}
}

View File

@ -0,0 +1,115 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Malayalam_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Malayalam_InterIndic
translit_Malayalam_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Malayalam_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Malayalam_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:06 2001
//--------------------------------------------------------------------
// Malayalam-InterIndic
//:: NFD (NFC) ;
"\u0d02>\ue002;" // SIGN ANUSVARA
"\u0d03>\ue003;" // SIGN VISARGA
"\u0d05>\ue005;" // LETTER A
"\u0d06>\ue006;" // LETTER AA
"\u0d07>\ue007;" // LETTER I
"\u0d08>\ue008;" // LETTER II
"\u0d09>\ue009;" // LETTER U
"\u0d0a>\ue00a;" // LETTER UU
"\u0d0b>\ue00b;" // LETTER VOCALIC R
"\u0d0c>\ue00c;" // LETTER VOCALIC L
"\u0d0e>\ue00e;" // LETTER E
"\u0d0f>\ue00f;" // LETTER EE
"\u0d10>\ue010;" // LETTER AI
"\u0d12>\ue012;" // LETTER O
"\u0d13>\ue013;" // LETTER OO
"\u0d14>\ue014;" // LETTER AU
"\u0d15>\ue015;" // LETTER KA
"\u0d16>\ue016;" // LETTER KHA
"\u0d17>\ue017;" // LETTER GA
"\u0d18>\ue018;" // LETTER GHA
"\u0d19>\ue019;" // LETTER NGA
"\u0d1a>\ue01a;" // LETTER CA
"\u0d1b>\ue01b;" // LETTER CHA
"\u0d1c>\ue01c;" // LETTER JA
"\u0d1d>\ue01d;" // LETTER JHA
"\u0d1e>\ue01e;" // LETTER NYA
"\u0d1f>\ue01f;" // LETTER TTA
"\u0d20>\ue020;" // LETTER TTHA
"\u0d21>\ue021;" // LETTER DDA
"\u0d22>\ue022;" // LETTER DDHA
"\u0d23>\ue023;" // LETTER NNA
"\u0d24>\ue024;" // LETTER TA
"\u0d25>\ue025;" // LETTER THA
"\u0d26>\ue026;" // LETTER DA
"\u0d27>\ue027;" // LETTER DHA
"\u0d28>\ue028;" // LETTER NA
"\u0d2a>\ue02a;" // LETTER PA
"\u0d2b>\ue02b;" // LETTER PHA
"\u0d2c>\ue02c;" // LETTER BA
"\u0d2d>\ue02d;" // LETTER BHA
"\u0d2e>\ue02e;" // LETTER MA
"\u0d2f>\ue02f;" // LETTER YA
"\u0d30>\ue030;" // LETTER RA
"\u0d31>\ue031;" // LETTER RRA
"\u0d32>\ue032;" // LETTER LA
"\u0d33>\ue033;" // LETTER LLA
"\u0d34>\ue034;" // LETTER LLLA
"\u0d35>\ue035;" // LETTER VA
"\u0d36>\ue036;" // LETTER SHA
"\u0d37>\ue037;" // LETTER SSA
"\u0d38>\ue038;" // LETTER SA
"\u0d39>\ue039;" // LETTER HA
"\u0d3e>\ue03e;" // VOWEL SIGN AA
"\u0d3f>\ue03f;" // VOWEL SIGN I
"\u0d40>\ue040;" // VOWEL SIGN II
"\u0d41>\ue041;" // VOWEL SIGN U
"\u0d42>\ue042;" // VOWEL SIGN UU
"\u0d43>\ue043;" // VOWEL SIGN VOCALIC R
"\u0d46>\ue046;" // VOWEL SIGN E
"\u0d47>\ue047;" // VOWEL SIGN EE
"\u0d48>\ue048;" // VOWEL SIGN AI
"\u0d4a>\ue04a;" // VOWEL SIGN O
"\u0d4b>\ue04b;" // VOWEL SIGN OO
"\u0d4c>\ue04c;" // VOWEL SIGN AU
"\u0d4d>\ue04d;" // SIGN VIRAMA
"\u0d57>\ue057;" // AU LENGTH MARK
"\u0d60>\ue060;" // LETTER VOCALIC RR
"\u0d61>\ue061;" // LETTER VOCALIC LL
"\u0d66>\ue066;" // DIGIT ZERO
"\u0d67>\ue067;" // DIGIT ONE
"\u0d68>\ue068;" // DIGIT TWO
"\u0d69>\ue069;" // DIGIT THREE
"\u0d6a>\ue06a;" // DIGIT FOUR
"\u0d6b>\ue06b;" // DIGIT FIVE
"\u0d6c>\ue06c;" // DIGIT SIX
"\u0d6d>\ue06d;" // DIGIT SEVEN
"\u0d6e>\ue06e;" // DIGIT EIGHT
"\u0d6f>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,116 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Oriya_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Oriya_InterIndic
translit_Oriya_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Oriya_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Oriya_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:07 2001
//--------------------------------------------------------------------
// Oriya-InterIndic
//:: NFD (NFC) ;
"\u0b01>\ue001;" // SIGN CANDRABINDU
"\u0b02>\ue002;" // SIGN ANUSVARA
"\u0b03>\ue003;" // SIGN VISARGA
"\u0b05>\ue005;" // LETTER A
"\u0b06>\ue006;" // LETTER AA
"\u0b07>\ue007;" // LETTER I
"\u0b08>\ue008;" // LETTER II
"\u0b09>\ue009;" // LETTER U
"\u0b0a>\ue00a;" // LETTER UU
"\u0b0b>\ue00b;" // LETTER VOCALIC R
"\u0b0c>\ue00c;" // LETTER VOCALIC L
"\u0b0f>\ue00f;" // LETTER E
"\u0b10>\ue010;" // LETTER AI
"\u0b13>\ue013;" // LETTER O
"\u0b14>\ue014;" // LETTER AU
"\u0b15>\ue015;" // LETTER KA
"\u0b16>\ue016;" // LETTER KHA
"\u0b17>\ue017;" // LETTER GA
"\u0b18>\ue018;" // LETTER GHA
"\u0b19>\ue019;" // LETTER NGA
"\u0b1a>\ue01a;" // LETTER CA
"\u0b1b>\ue01b;" // LETTER CHA
"\u0b1c>\ue01c;" // LETTER JA
"\u0b1d>\ue01d;" // LETTER JHA
"\u0b1e>\ue01e;" // LETTER NYA
"\u0b1f>\ue01f;" // LETTER TTA
"\u0b20>\ue020;" // LETTER TTHA
"\u0b21>\ue021;" // LETTER DDA
"\u0b22>\ue022;" // LETTER DDHA
"\u0b23>\ue023;" // LETTER NNA
"\u0b24>\ue024;" // LETTER TA
"\u0b25>\ue025;" // LETTER THA
"\u0b26>\ue026;" // LETTER DA
"\u0b27>\ue027;" // LETTER DHA
"\u0b28>\ue028;" // LETTER NA
"\u0b2a>\ue02a;" // LETTER PA
"\u0b2b>\ue02b;" // LETTER PHA
"\u0b2c>\ue02c;" // LETTER BA
"\u0b2d>\ue02d;" // LETTER BHA
"\u0b2e>\ue02e;" // LETTER MA
"\u0b2f>\ue02f;" // LETTER YA
"\u0b30>\ue030;" // LETTER RA
"\u0b32>\ue032;" // LETTER LA
"\u0b33>\ue033;" // LETTER LLA
"\u0b36>\ue036;" // LETTER SHA
"\u0b37>\ue037;" // LETTER SSA
"\u0b38>\ue038;" // LETTER SA
"\u0b39>\ue039;" // LETTER HA
"\u0b3c>\ue03c;" // SIGN NUKTA
"\u0b3d>\ue03d;" // SIGN AVAGRAHA
"\u0b3e>\ue03e;" // VOWEL SIGN AA
"\u0b3f>\ue03f;" // VOWEL SIGN I
"\u0b40>\ue040;" // VOWEL SIGN II
"\u0b41>\ue041;" // VOWEL SIGN U
"\u0b42>\ue042;" // VOWEL SIGN UU
"\u0b43>\ue043;" // VOWEL SIGN VOCALIC R
"\u0b47>\ue047;" // VOWEL SIGN E
"\u0b48>\ue048;" // VOWEL SIGN AI
"\u0b4b>\ue04b;" // VOWEL SIGN O
"\u0b4c>\ue04c;" // VOWEL SIGN AU
"\u0b4d>\ue04d;" // SIGN VIRAMA
"\u0b56>\ue056;" // AI LENGTH MARK
"\u0b57>\ue057;" // AU LENGTH MARK
"\u0b5c>\ue05c;" // LETTER RRA
"\u0b5d>\ue05d;" // LETTER RHA
"\u0b5f>\ue05f;" // LETTER YYA
"\u0b60>\ue060;" // LETTER VOCALIC RR
"\u0b61>\ue061;" // LETTER VOCALIC LL
"\u0b66>\ue066;" // DIGIT ZERO
"\u0b67>\ue067;" // DIGIT ONE
"\u0b68>\ue068;" // DIGIT TWO
"\u0b69>\ue069;" // DIGIT THREE
"\u0b6a>\ue06a;" // DIGIT FOUR
"\u0b6b>\ue06b;" // DIGIT FIVE
"\u0b6c>\ue06c;" // DIGIT SIX
"\u0b6d>\ue06d;" // DIGIT SEVEN
"\u0b6e>\ue06e;" // DIGIT EIGHT
"\u0b6f>\ue06f;" // DIGIT NINE
"\u0b70>\ue070;" // ISSHAR
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,98 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Tamil_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Tamil_InterIndic
translit_Tamil_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Tamil_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Tamil_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:07 2001
//--------------------------------------------------------------------
// Tamil-InterIndic
//:: NFD (NFC) ;
"\u0b82>\ue002;" // SIGN ANUSVARA
"\u0b83>\ue003;" // SIGN VISARGA
"\u0b85>\ue005;" // LETTER A
"\u0b86>\ue006;" // LETTER AA
"\u0b87>\ue007;" // LETTER I
"\u0b88>\ue008;" // LETTER II
"\u0b89>\ue009;" // LETTER U
"\u0b8a>\ue00a;" // LETTER UU
"\u0b8e>\ue00e;" // LETTER E
"\u0b8f>\ue00f;" // LETTER EE
"\u0b90>\ue010;" // LETTER AI
"\u0b92>\ue012;" // LETTER O
"\u0b93>\ue013;" // LETTER OO
"\u0b94>\ue014;" // LETTER AU
"\u0b95>\ue015;" // LETTER KA
"\u0b99>\ue019;" // LETTER NGA
"\u0b9a>\ue01a;" // LETTER CA
"\u0b9c>\ue01c;" // LETTER JA
"\u0b9e>\ue01e;" // LETTER NYA
"\u0b9f>\ue01f;" // LETTER TTA
"\u0ba3>\ue023;" // LETTER NNA
"\u0ba4>\ue024;" // LETTER TA
"\u0ba8>\ue028;" // LETTER NA
"\u0ba9>\ue029;" // LETTER NNNA
"\u0baa>\ue02a;" // LETTER PA
"\u0bae>\ue02e;" // LETTER MA
"\u0baf>\ue02f;" // LETTER YA
"\u0bb0>\ue030;" // LETTER RA
"\u0bb1>\ue031;" // LETTER RRA
"\u0bb2>\ue032;" // LETTER LA
"\u0bb3>\ue033;" // LETTER LLA
"\u0bb4>\ue034;" // LETTER LLLA
"\u0bb5>\ue035;" // LETTER VA
"\u0bb7>\ue037;" // LETTER SSA
"\u0bb8>\ue038;" // LETTER SA
"\u0bb9>\ue039;" // LETTER HA
"\u0bbe>\ue03e;" // VOWEL SIGN AA
"\u0bbf>\ue03f;" // VOWEL SIGN I
"\u0bc0>\ue040;" // VOWEL SIGN II
"\u0bc1>\ue041;" // VOWEL SIGN U
"\u0bc2>\ue042;" // VOWEL SIGN UU
"\u0bc6>\ue046;" // VOWEL SIGN E
"\u0bc7>\ue047;" // VOWEL SIGN EE
"\u0bc8>\ue048;" // VOWEL SIGN AI
"\u0bca>\ue04a;" // VOWEL SIGN O
"\u0bcb>\ue04b;" // VOWEL SIGN OO
"\u0bcc>\ue04c;" // VOWEL SIGN AU
"\u0bcd>\ue04d;" // SIGN VIRAMA
"\u0bd7>\ue057;" // AU LENGTH MARK
"\u0be7>\ue067;" // DIGIT ONE
"\u0be8>\ue068;" // DIGIT TWO
"\u0be9>\ue069;" // DIGIT THREE
"\u0bea>\ue06a;" // DIGIT FOUR
"\u0beb>\ue06b;" // DIGIT FIVE
"\u0bec>\ue06c;" // DIGIT SIX
"\u0bed>\ue06d;" // DIGIT SEVEN
"\u0bee>\ue06e;" // DIGIT EIGHT
"\u0bef>\ue06f;" // DIGIT NINE
// \u0bf0>; # UNMAPPED Tamil-InterIndic: NUMBER TEN
// \u0bf1>; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
// \u0bf2>; # UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
// :: NFC (NFD) ;
// eof
}
}

View File

@ -0,0 +1,117 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../text/resources/Transliterator_Telugu_InterIndic.txt
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
// Telugu_InterIndic
translit_Telugu_InterIndic {
Rule {
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Telugu_InterIndic.utf8.txt
// Date: Thu Mar 1 20:03:55 2001
//--------------------------------------------------------------------
// Telugu_InterIndic
//--------------------------------------------------------------------
// Copyright (c) 1999-2001, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// Date: Tue Jan 23 12:42:07 2001
//--------------------------------------------------------------------
// Telugu-InterIndic
//:: NFD (NFC) ;
"\u0c01>\ue001;" // SIGN CANDRABINDU
"\u0c02>\ue002;" // SIGN ANUSVARA
"\u0c03>\ue003;" // SIGN VISARGA
"\u0c05>\ue005;" // LETTER A
"\u0c06>\ue006;" // LETTER AA
"\u0c07>\ue007;" // LETTER I
"\u0c08>\ue008;" // LETTER II
"\u0c09>\ue009;" // LETTER U
"\u0c0a>\ue00a;" // LETTER UU
"\u0c0b>\ue00b;" // LETTER VOCALIC R
"\u0c0c>\ue00c;" // LETTER VOCALIC L
"\u0c0e>\ue00e;" // LETTER E
"\u0c0f>\ue00f;" // LETTER EE
"\u0c10>\ue010;" // LETTER AI
"\u0c12>\ue012;" // LETTER O
"\u0c13>\ue013;" // LETTER OO
"\u0c14>\ue014;" // LETTER AU
"\u0c15>\ue015;" // LETTER KA
"\u0c16>\ue016;" // LETTER KHA
"\u0c17>\ue017;" // LETTER GA
"\u0c18>\ue018;" // LETTER GHA
"\u0c19>\ue019;" // LETTER NGA
"\u0c1a>\ue01a;" // LETTER CA
"\u0c1b>\ue01b;" // LETTER CHA
"\u0c1c>\ue01c;" // LETTER JA
"\u0c1d>\ue01d;" // LETTER JHA
"\u0c1e>\ue01e;" // LETTER NYA
"\u0c1f>\ue01f;" // LETTER TTA
"\u0c20>\ue020;" // LETTER TTHA
"\u0c21>\ue021;" // LETTER DDA
"\u0c22>\ue022;" // LETTER DDHA
"\u0c23>\ue023;" // LETTER NNA
"\u0c24>\ue024;" // LETTER TA
"\u0c25>\ue025;" // LETTER THA
"\u0c26>\ue026;" // LETTER DA
"\u0c27>\ue027;" // LETTER DHA
"\u0c28>\ue028;" // LETTER NA
"\u0c2a>\ue02a;" // LETTER PA
"\u0c2b>\ue02b;" // LETTER PHA
"\u0c2c>\ue02c;" // LETTER BA
"\u0c2d>\ue02d;" // LETTER BHA
"\u0c2e>\ue02e;" // LETTER MA
"\u0c2f>\ue02f;" // LETTER YA
"\u0c30>\ue030;" // LETTER RA
"\u0c31>\ue031;" // LETTER RRA
"\u0c32>\ue032;" // LETTER LA
"\u0c33>\ue033;" // LETTER LLA
"\u0c35>\ue035;" // LETTER VA
"\u0c36>\ue036;" // LETTER SHA
"\u0c37>\ue037;" // LETTER SSA
"\u0c38>\ue038;" // LETTER SA
"\u0c39>\ue039;" // LETTER HA
"\u0c3e>\ue03e;" // VOWEL SIGN AA
"\u0c3f>\ue03f;" // VOWEL SIGN I
"\u0c40>\ue040;" // VOWEL SIGN II
"\u0c41>\ue041;" // VOWEL SIGN U
"\u0c42>\ue042;" // VOWEL SIGN UU
"\u0c43>\ue043;" // VOWEL SIGN VOCALIC R
"\u0c44>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0c46>\ue046;" // VOWEL SIGN E
"\u0c47>\ue047;" // VOWEL SIGN EE
"\u0c48>\ue048;" // VOWEL SIGN AI
"\u0c4a>\ue04a;" // VOWEL SIGN O
"\u0c4b>\ue04b;" // VOWEL SIGN OO
"\u0c4c>\ue04c;" // VOWEL SIGN AU
"\u0c4d>\ue04d;" // SIGN VIRAMA
"\u0c55>\ue055;" // LENGTH MARK
"\u0c56>\ue056;" // AI LENGTH MARK
"\u0c60>\ue060;" // LETTER VOCALIC RR
"\u0c61>\ue061;" // LETTER VOCALIC LL
"\u0c66>\ue066;" // DIGIT ZERO
"\u0c67>\ue067;" // DIGIT ONE
"\u0c68>\ue068;" // DIGIT TWO
"\u0c69>\ue069;" // DIGIT THREE
"\u0c6a>\ue06a;" // DIGIT FOUR
"\u0c6b>\ue06b;" // DIGIT FIVE
"\u0c6c>\ue06c;" // DIGIT SIX
"\u0c6d>\ue06d;" // DIGIT SEVEN
"\u0c6e>\ue06e;" // DIGIT EIGHT
"\u0c6f>\ue06f;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View File

@ -5,7 +5,7 @@
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: Transliterator_index.txt
// Date: Fri Mar 2 12:50:49 2001
// Date: Thu Oct 25 22:17:22 2001
//--------------------------------------------------------------------
//--------------------------------------------------------------------
@ -61,77 +61,70 @@ translit_index {
// Bidirectional rule files
{ "Fullwidth-Halfwidth", "file", "fullhalf", "FORWARD" },
{ "Halfwidth-Fullwidth", "file", "fullhalf", "REVERSE" },
{ "Fullwidth-Halfwidth", "file", "translit_Fullwidth_Halfwidth", "FORWARD" },
{ "Halfwidth-Fullwidth", "file", "translit_Fullwidth_Halfwidth", "REVERSE" },
{ "Latin-Arabic", "file", "larabic", "FORWARD" },
{ "Arabic-Latin", "file", "larabic", "REVERSE" },
{ "Latin-Cyrillic", "file", "translit_Cyrillic_Latin", "REVERSE" },
{ "Cyrillic-Latin", "file", "translit_Cyrillic_Latin", "FORWARD" },
{ "Latin-Cyrillic", "file", "lcyril", "FORWARD" },
{ "Cyrillic-Latin", "file", "lcyril", "REVERSE" },
{ "Latin-Greek", "file", "translit_Greek_Latin", "REVERSE" },
{ "Greek-Latin", "file", "translit_Greek_Latin", "FORWARD" },
//{ "Latin-Devanagari", "file", "ldevan", "FORWARD" },
//{ "Devanagari-Latin", "file", "ldevan", "REVERSE" },
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },
{ "Latin-Greek", "file", "lgreek", "FORWARD" },
{ "Greek-Latin", "file", "lgreek", "REVERSE" },
{ "Latin-Katakana", "file", "translit_Latin_Katakana", "FORWARD" },
{ "Katakana-Latin", "file", "translit_Latin_Katakana", "REVERSE" },
{ "Latin-Hebrew", "file", "lhebrew", "FORWARD" },
{ "Hebrew-Latin", "file", "lhebrew", "REVERSE" },
{ "Latin-Hiragana", "file", "translit_Hiragana_Latin", "REVERSE" },
{ "Hiragana-Latin", "file", "translit_Hiragana_Latin", "FORWARD" },
{ "Latin-Jamo", "file", "ljamo", "FORWARD" },
{ "Jamo-Latin", "file", "ljamo", "REVERSE" },
{ "Hiragana-Katakana", "file", "translit_Hiragana_Katakana", "FORWARD" },
{ "Katakana-Hiragana", "file", "translit_Hiragana_Katakana", "REVERSE" },
{ "Latin-Kana", "file", "lkana", "FORWARD" },
{ "Kana-Latin", "file", "lkana", "REVERSE" },
{ "Any-Accents", "file", "translit_Any_Accents", "FORWARD" },
{ "Accents-Any", "file", "translit_Any_Accents", "REVERSE" },
{ "Hiragana-Katakana", "file", "kana", "FORWARD" },
{ "Katakana-Hiragana", "file", "kana", "REVERSE" },
{ "StraightQuotes-CurlyQuotes", "file", "quotes", "FORWARD" },
{ "CurlyQuotes-StraightQuotes", "file", "quotes", "REVERSE" },
{ "Any-Publishing", "file", "translit_Any_Publishing", "FORWARD" },
{ "Publishing-Any", "file", "translit_Any_Publishing", "REVERSE" },
// One way rules (forward only)
// Java only: { "Han-Pinyin", "file", "-", "FORWARD" },
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
// Replaced by algorithmic transliterator:
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
// Compound rules
/// TODO
{ "Latin-Hangul", "alias", "[:Latin:];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
{ "Latin-Hangul", "alias", "[\p{Latin}];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
{ "Hangul-Latin", "alias", "[\uAC00-\uD7AF];NFD;Jamo-Latin", "" },
// Inter-Indic composed rules
{ "Latin-InterIndic", "internal", "Latin_InterIndic", "FORWARD" },
{ "Devanagari-InterIndic", "internal", "Devanagari_InterIndic", "FORWARD" },
{ "Bengali-InterIndic", "internal", "Bengali_InterIndic", "FORWARD" },
{ "Gurmukhi-InterIndic", "internal", "Gurmukhi_InterIndic", "FORWARD" },
{ "Gujarati-InterIndic", "internal", "Gujarati_InterIndic", "FORWARD" },
{ "Oriya-InterIndic", "internal", "Oriya_InterIndic", "FORWARD" },
{ "Tamil-InterIndic", "internal", "Tamil_InterIndic", "FORWARD" },
{ "Telugu-InterIndic", "internal", "Telugu_InterIndic", "FORWARD" },
{ "Kannada-InterIndic", "internal", "Kannada_InterIndic", "FORWARD" },
{ "Malayalam-InterIndic", "internal", "Malayalam_InterIndic", "FORWARD" },
{ "Latin-InterIndic", "internal", "translit_Latin_InterIndic", "FORWARD" },
{ "Devanagari-InterIndic", "internal", "translit_Devanagari_InterIndic", "FORWARD" },
{ "Bengali-InterIndic", "internal", "translit_Bengali_InterIndic", "FORWARD" },
{ "Gurmukhi-InterIndic", "internal", "translit_Gurmukhi_InterIndic", "FORWARD" },
{ "Gujarati-InterIndic", "internal", "translit_Gujarati_InterIndic", "FORWARD" },
{ "Oriya-InterIndic", "internal", "translit_Oriya_InterIndic", "FORWARD" },
{ "Tamil-InterIndic", "internal", "translit_Tamil_InterIndic", "FORWARD" },
{ "Telugu-InterIndic", "internal", "translit_Telugu_InterIndic", "FORWARD" },
{ "Kannada-InterIndic", "internal", "translit_Kannada_InterIndic", "FORWARD" },
{ "Malayalam-InterIndic", "internal", "translit_Malayalam_InterIndic", "FORWARD" },
{ "InterIndic-Latin", "internal", "InterIndic_Latin", "FORWARD" },
{ "InterIndic-Devanagari", "internal", "InterIndic_Devanagari", "FORWARD" },
{ "InterIndic-Bengali", "internal", "InterIndic_Bengali", "FORWARD" },
{ "InterIndic-Gurmukhi", "internal", "InterIndic_Gurmukhi", "FORWARD" },
{ "InterIndic-Gujarati", "internal", "InterIndic_Gujarati", "FORWARD" },
{ "InterIndic-Oriya", "internal", "InterIndic_Oriya", "FORWARD" },
{ "InterIndic-Tamil", "internal", "InterIndic_Tamil", "FORWARD" },
{ "InterIndic-Telugu", "internal", "InterIndic_Telugu", "FORWARD" },
{ "InterIndic-Kannada", "internal", "InterIndic_Kannada", "FORWARD" },
{ "InterIndic-Malayalam", "internal", "InterIndic_Malayalam", "FORWARD" },
{ "InterIndic-Latin", "internal", "translit_InterIndic_Latin", "FORWARD" },
{ "InterIndic-Devanagari", "internal", "translit_InterIndic_Devanagari", "FORWARD" },
{ "InterIndic-Bengali", "internal", "translit_InterIndic_Bengali", "FORWARD" },
{ "InterIndic-Gurmukhi", "internal", "translit_InterIndic_Gurmukhi", "FORWARD" },
{ "InterIndic-Gujarati", "internal", "translit_InterIndic_Gujarati", "FORWARD" },
{ "InterIndic-Oriya", "internal", "translit_InterIndic_Oriya", "FORWARD" },
{ "InterIndic-Tamil", "internal", "translit_InterIndic_Tamil", "FORWARD" },
{ "InterIndic-Telugu", "internal", "translit_InterIndic_Telugu", "FORWARD" },
{ "InterIndic-Kannada", "internal", "translit_InterIndic_Kannada", "FORWARD" },
{ "InterIndic-Malayalam", "internal", "translit_InterIndic_Malayalam", "FORWARD" },
//Latin-X transliterators
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
//Latin-Indic transliterators
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Latin-Bengali", "alias", "NFD;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Latin-Gurmukhi", "alias", "NFD;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Latin-Gujarati", "alias", "NFD;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
@ -141,8 +134,8 @@ translit_index {
{ "Latin-Kannada", "alias", "NFD;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Latin-Malayalam", "alias", "NFD;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
//X-Latin transliterators
{ "Devanagari-Latin","alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
//Indic-Latin transliterators
{ "Devanagari-Latin", "alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
{ "Bengali-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
{ "Gurmukhi-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
{ "Gujarati-Latin", "alias", "NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" },
@ -152,7 +145,6 @@ translit_index {
{ "Kannada-Latin", "alias", "NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" },
{ "Malayalam-Latin", "alias", "NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" },
{ "Devanagari-Bengali", "alias", "NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Devanagari-Gurmukhi", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Devanagari-Gujarati", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" },
@ -177,7 +169,7 @@ translit_index {
{ "Gurmukhi-Telugu", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Gurmukhi-Kannada", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Gurmukhi-Malayalam", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Gujarati-Devanagari", "alias", "NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Gujarati-Devanagari", "alias", "Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Gujarati-Bengali", "alias", "NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Gujarati-Gurmukhi", "alias", "NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Gujarati-Oriya", "alias", "NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" },
@ -227,5 +219,6 @@ translit_index {
{ "Malayalam-Kannada", "alias", "NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" },
// eof
}
}