ICU-1259 check in new 2.0 rules mechanically generated from icu4j masters
X-SVN-Rev: 6453
This commit is contained in:
parent
4853938bd8
commit
3f29a7e290
@ -103,28 +103,34 @@ uk.txt uk_UA.txt\
|
||||
vi.txt vi_VN.txt\
|
||||
zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
|
||||
|
||||
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
|
||||
larabic.txt lcyril.txt\
|
||||
lgreek.txt lhebrew.txt ljamo.txt\
|
||||
lkana.txt quotes.txt\
|
||||
Bengali_InterIndic.txt\
|
||||
Devanagari_InterIndic.txt\
|
||||
Gujarati_InterIndic.txt\
|
||||
Gurmukhi_InterIndic.txt\
|
||||
Kannada_InterIndic.txt\
|
||||
Malayalam_InterIndic.txt\
|
||||
Oriya_InterIndic.txt\
|
||||
Tamil_InterIndic.txt\
|
||||
Telugu_InterIndic.txt\
|
||||
InterIndic_Bengali.txt\
|
||||
InterIndic_Devanagari.txt\
|
||||
InterIndic_Gujarati.txt\
|
||||
InterIndic_Gurmukhi.txt\
|
||||
InterIndic_Kannada.txt\
|
||||
InterIndic_Malayalam.txt\
|
||||
InterIndic_Oriya.txt\
|
||||
InterIndic_Tamil.txt\
|
||||
InterIndic_Telugu.txt\
|
||||
Latin_InterIndic.txt\
|
||||
InterIndic_Latin.txt
|
||||
TRANSLIT_SOURCE=translit_Any_Accents.txt\
|
||||
translit_Any_Publishing.txt\
|
||||
translit_Bengali_InterIndic.txt\
|
||||
translit_Cyrillic_Latin.txt\
|
||||
translit_Devanagari_InterIndic.txt\
|
||||
translit_Fullwidth_Halfwidth.txt\
|
||||
translit_Greek_Latin.txt\
|
||||
translit_Gujarati_InterIndic.txt\
|
||||
translit_Gurmukhi_InterIndic.txt\
|
||||
translit_Hiragana_Katakana.txt\
|
||||
translit_Hiragana_Latin.txt\
|
||||
translit_InterIndic_Bengali.txt\
|
||||
translit_InterIndic_Devanagari.txt\
|
||||
translit_InterIndic_Gujarati.txt\
|
||||
translit_InterIndic_Gurmukhi.txt\
|
||||
translit_InterIndic_Kannada.txt\
|
||||
translit_InterIndic_Latin.txt\
|
||||
translit_InterIndic_Malayalam.txt\
|
||||
translit_InterIndic_Oriya.txt\
|
||||
translit_InterIndic_Tamil.txt\
|
||||
translit_InterIndic_Telugu.txt\
|
||||
translit_Kannada_InterIndic.txt\
|
||||
translit_Latin_InterIndic.txt\
|
||||
translit_Latin_Jamo.txt\
|
||||
translit_Latin_Katakana.txt\
|
||||
translit_Malayalam_InterIndic.txt\
|
||||
translit_Oriya_InterIndic.txt\
|
||||
translit_Tamil_InterIndic.txt\
|
||||
translit_Telugu_InterIndic.txt\
|
||||
translit_index.txt
|
||||
|
||||
|
311
icu4c/data/translit_Any_Accents.txt
Normal file
311
icu4c/data/translit_Any_Accents.txt
Normal file
@ -0,0 +1,311 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Accents.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Accents
|
||||
|
||||
translit_Any_Accents {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Any_Accents.txt,v $
|
||||
// $Date: 2001/10/26 05:41:15 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
// to do: make reversible
|
||||
|
||||
// define special conversion characters.
|
||||
// varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
"$pre = \< ;"
|
||||
"$post = \> ;"
|
||||
|
||||
// Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
"$pre \` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
|
||||
"$pre \. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
|
||||
// Combine common characters
|
||||
|
||||
"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE
|
||||
"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE
|
||||
"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH
|
||||
"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH
|
||||
"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE
|
||||
"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE
|
||||
"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN
|
||||
"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN
|
||||
"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE
|
||||
"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE
|
||||
|
||||
"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S
|
||||
|
||||
"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG
|
||||
"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG
|
||||
|
||||
"$pre T $post <> \u0398 ;" // THETA
|
||||
"$pre t $post <> \u03B8 ;" // THETA
|
||||
"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH
|
||||
"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH
|
||||
"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH
|
||||
"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH
|
||||
|
||||
"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON
|
||||
"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON
|
||||
"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA
|
||||
"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA
|
||||
"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O
|
||||
"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O
|
||||
"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E
|
||||
"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E
|
||||
|
||||
// three that don't have uppercases
|
||||
|
||||
"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP
|
||||
"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I
|
||||
"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V
|
||||
|
||||
// Additional Characters that may be added in the future
|
||||
|
||||
// $pre XXX $post <> \u0306 ; # COMBINING BREVE
|
||||
// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
|
||||
// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
|
||||
// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
|
||||
// $pre XXX $post <> \u030C ; # COMBINING CARON
|
||||
// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
|
||||
// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
|
||||
// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
|
||||
// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
|
||||
// $pre XXX $post <> \u031B ; # COMBINING HORN
|
||||
// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
|
||||
// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
|
||||
// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
|
||||
// $pre XXX $post <> \u0328 ; # COMBINING OGONEK
|
||||
// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
|
||||
// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
|
||||
// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
|
||||
|
||||
// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
|
||||
// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
|
||||
// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
|
||||
// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
|
||||
// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
|
||||
// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
|
||||
// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||
// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
|
||||
// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
|
||||
// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
|
||||
// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
|
||||
// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
|
||||
// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
|
||||
// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
|
||||
// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
|
||||
// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
|
||||
// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
|
||||
// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
|
||||
// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
|
||||
// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
|
||||
// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
|
||||
// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
|
||||
// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
|
||||
// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
|
||||
// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
|
||||
// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
|
||||
// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
|
||||
// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
|
||||
// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
|
||||
// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
|
||||
// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
|
||||
// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
|
||||
// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
|
||||
// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
|
||||
// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
|
||||
// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
|
||||
// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
|
||||
// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
|
||||
// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
|
||||
// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
|
||||
// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
|
||||
// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
|
||||
// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
|
||||
// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
|
||||
// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
|
||||
// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
|
||||
// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
|
||||
// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
|
||||
// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
|
||||
// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
|
||||
// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
|
||||
// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
|
||||
// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
|
||||
// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
|
||||
// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
|
||||
// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
|
||||
// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
|
||||
// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
|
||||
// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
|
||||
// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
|
||||
// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
|
||||
// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
|
||||
// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
|
||||
// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
|
||||
// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
|
||||
// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
|
||||
// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
|
||||
// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
|
||||
// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
|
||||
// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
|
||||
// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
|
||||
// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
|
||||
// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
|
||||
// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
|
||||
// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
|
||||
// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
|
||||
// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
|
||||
// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
|
||||
// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
|
||||
// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
|
||||
// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
|
||||
// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
|
||||
// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
|
||||
// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
|
||||
// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
|
||||
// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
|
||||
// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
|
||||
// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
|
||||
// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
|
||||
// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
|
||||
// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
|
||||
// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
|
||||
// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
|
||||
// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
|
||||
// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
|
||||
// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
|
||||
// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
|
||||
// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
|
||||
// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
|
||||
// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
|
||||
// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
|
||||
// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
|
||||
// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
|
||||
// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
|
||||
// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
|
||||
// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
|
||||
// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
|
||||
// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
|
||||
// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
|
||||
// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
|
||||
// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
|
||||
// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
|
||||
// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
|
||||
// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
|
||||
// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
|
||||
// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
|
||||
// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
|
||||
// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
|
||||
// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
|
||||
// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
|
||||
// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
|
||||
// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
|
||||
// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
|
||||
// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
|
||||
// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
|
||||
// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
|
||||
// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
|
||||
// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
|
||||
// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
|
||||
// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
|
||||
// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
|
||||
// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
|
||||
// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
|
||||
// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
|
||||
// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
|
||||
// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
|
||||
// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
|
||||
// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
|
||||
// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
|
||||
// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
|
||||
// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
|
||||
// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
|
||||
// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
|
||||
// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
|
||||
// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
|
||||
// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
|
||||
// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
|
||||
// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
|
||||
// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
|
||||
// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
|
||||
// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
|
||||
// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
|
||||
// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
|
||||
// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
|
||||
// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
|
||||
// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
|
||||
// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
|
||||
// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
}
|
||||
}
|
55
icu4c/data/translit_Any_Publishing.txt
Normal file
55
icu4c/data/translit_Any_Publishing.txt
Normal file
@ -0,0 +1,55 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Publishing.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Publishing
|
||||
|
||||
translit_Any_Publishing {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Any_Publishing.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Test case
|
||||
// "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
|
||||
|
||||
// Variables
|
||||
|
||||
"$single = \' ;"
|
||||
"$space = ' ' ;"
|
||||
"$double = \" ;"
|
||||
"$back = \` ;"
|
||||
"$tab = '\u0008' ;"
|
||||
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
|
||||
|
||||
// fix UNIX quotes
|
||||
|
||||
"$back $back > “ ;"
|
||||
"$back > ‘ ;"
|
||||
|
||||
// fix typewriter quotes, by context
|
||||
|
||||
"$makeRight {$double} <> “ ;"
|
||||
"$double <> ” ;"
|
||||
|
||||
"$makeRight {$single} <> ‘ ;"
|
||||
"$single <> ’;"
|
||||
|
||||
// fix multiple spaces and hyphens
|
||||
|
||||
"$space {$space} > ;"
|
||||
"'--' <> — ;"
|
||||
}
|
||||
}
|
121
icu4c/data/translit_Bengali_InterIndic.txt
Normal file
121
icu4c/data/translit_Bengali_InterIndic.txt
Normal file
@ -0,0 +1,121 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Bengali_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Bengali_InterIndic
|
||||
|
||||
translit_Bengali_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Bengali_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:57 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Bengali-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0981>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0982>\ue002;" // SIGN ANUSVARA
|
||||
"\u0983>\ue003;" // SIGN VISARGA
|
||||
"\u0985>\ue005;" // LETTER A
|
||||
"\u0986>\ue006;" // LETTER AA
|
||||
"\u0987>\ue007;" // LETTER I
|
||||
"\u0988>\ue008;" // LETTER II
|
||||
"\u0989>\ue009;" // LETTER U
|
||||
"\u098a>\ue00a;" // LETTER UU
|
||||
"\u098b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u098c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u098f>\ue00f;" // LETTER E
|
||||
"\u0990>\ue010;" // LETTER AI
|
||||
"\u0993>\ue013;" // LETTER O
|
||||
"\u0994>\ue014;" // LETTER AU
|
||||
"\u0995>\ue015;" // LETTER KA
|
||||
"\u0996>\ue016;" // LETTER KHA
|
||||
"\u0997>\ue017;" // LETTER GA
|
||||
"\u0998>\ue018;" // LETTER GHA
|
||||
"\u0999>\ue019;" // LETTER NGA
|
||||
"\u099a>\ue01a;" // LETTER CA
|
||||
"\u099b>\ue01b;" // LETTER CHA
|
||||
"\u099c>\ue01c;" // LETTER JA
|
||||
"\u099d>\ue01d;" // LETTER JHA
|
||||
"\u099e>\ue01e;" // LETTER NYA
|
||||
"\u099f>\ue01f;" // LETTER TTA
|
||||
"\u09a0>\ue020;" // LETTER TTHA
|
||||
"\u09a1>\ue021;" // LETTER DDA
|
||||
"\u09a2>\ue022;" // LETTER DDHA
|
||||
"\u09a3>\ue023;" // LETTER NNA
|
||||
"\u09a4>\ue024;" // LETTER TA
|
||||
"\u09a5>\ue025;" // LETTER THA
|
||||
"\u09a6>\ue026;" // LETTER DA
|
||||
"\u09a7>\ue027;" // LETTER DHA
|
||||
"\u09a8>\ue028;" // LETTER NA
|
||||
"\u09aa>\ue02a;" // LETTER PA
|
||||
"\u09ab>\ue02b;" // LETTER PHA
|
||||
"\u09ac>\ue02c;" // LETTER BA
|
||||
"\u09ad>\ue02d;" // LETTER BHA
|
||||
"\u09ae>\ue02e;" // LETTER MA
|
||||
"\u09af>\ue02f;" // LETTER YA
|
||||
"\u09b0>\ue030;" // LETTER RA
|
||||
"\u09b2>\ue032;" // LETTER LA
|
||||
"\u09b6>\ue036;" // LETTER SHA
|
||||
"\u09b7>\ue037;" // LETTER SSA
|
||||
"\u09b8>\ue038;" // LETTER SA
|
||||
"\u09b9>\ue039;" // LETTER HA
|
||||
"\u09bc>\ue03c;" // SIGN NUKTA
|
||||
"\u09be>\ue03e;" // VOWEL SIGN AA
|
||||
"\u09bf>\ue03f;" // VOWEL SIGN I
|
||||
"\u09c0>\ue040;" // VOWEL SIGN II
|
||||
"\u09c1>\ue041;" // VOWEL SIGN U
|
||||
"\u09c2>\ue042;" // VOWEL SIGN UU
|
||||
"\u09c3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u09c4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u09c7>\ue047;" // VOWEL SIGN E
|
||||
"\u09c8>\ue048;" // VOWEL SIGN AI
|
||||
"\u09cb>\ue04b;" // VOWEL SIGN O
|
||||
"\u09cc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u09cd>\ue04d;" // SIGN VIRAMA
|
||||
"\u09d7>\ue057;" // AU LENGTH MARK
|
||||
"\u09dc>\ue053;" // LETTER RRA
|
||||
"\u09dd>\ue05d;" // LETTER RHA
|
||||
"\u09df>\ue05f;" // LETTER YYA
|
||||
"\u09e0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u09e1>\ue061;" // LETTER VOCALIC LL
|
||||
"\u09e2>\ue062;" // VOWEL SIGN VOCALIC L
|
||||
"\u09e3>\ue063;" // VOWEL SIGN VOCALIC LL
|
||||
"\u09e6>\ue066;" // DIGIT ZERO
|
||||
"\u09e7>\ue067;" // DIGIT ONE
|
||||
"\u09e8>\ue068;" // DIGIT TWO
|
||||
"\u09e9>\ue069;" // DIGIT THREE
|
||||
"\u09ea>\ue06a;" // DIGIT FOUR
|
||||
"\u09eb>\ue06b;" // DIGIT FIVE
|
||||
"\u09ec>\ue06c;" // DIGIT SIX
|
||||
"\u09ed>\ue06d;" // DIGIT SEVEN
|
||||
"\u09ee>\ue06e;" // DIGIT EIGHT
|
||||
"\u09ef>\ue06f;" // DIGIT NINE
|
||||
// \u09f0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
|
||||
// \u09f1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
|
||||
// \u09f2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK
|
||||
// \u09f3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN
|
||||
// \u09f4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
|
||||
// \u09f5>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR TWO
|
||||
// \u09f6>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR THREE
|
||||
// \u09f7>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
|
||||
// \u09f8>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
// \u09f9>; # UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
|
||||
"\u09fa>\ue070;" // ISSHAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
316
icu4c/data/translit_Cyrillic_Latin.txt
Normal file
316
icu4c/data/translit_Cyrillic_Latin.txt
Normal file
@ -0,0 +1,316 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Cyrillic_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Cyrillic_Latin
|
||||
|
||||
translit_Cyrillic_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Cyrillic_Latin.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
// TODO: add remaining characters
|
||||
// Should add variants for Russian-English, Russian-German
|
||||
// Those can use this as a base, and then remap cases
|
||||
// like a $hat to ya or ja.
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
"$modprime = \u02B9;"
|
||||
"$modprime2 = \u02BA;"
|
||||
|
||||
"$grave = \u0300;"
|
||||
"$acute = \u0301;"
|
||||
"$hat = \u0302;"
|
||||
"$breve = \u0306 ;"
|
||||
"$dot = \u0307 ;"
|
||||
"$caron = \u030C ;"
|
||||
"$comma = \u0326 ;"
|
||||
|
||||
// move up so not masked
|
||||
|
||||
"я <> a $hat ;" // CYRILLIC SMALL LETTER YA
|
||||
"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA
|
||||
|
||||
"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE
|
||||
"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE
|
||||
// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
"э <> e $acute;" // CYRILLIC SMALL LETTER E
|
||||
"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E
|
||||
"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
|
||||
"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA
|
||||
"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA
|
||||
"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA
|
||||
"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA
|
||||
|
||||
"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE
|
||||
"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE
|
||||
// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
|
||||
"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU
|
||||
"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU
|
||||
|
||||
"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
"ј <> j $caron;" // CYRILLIC SMALL LETTER JE
|
||||
"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE
|
||||
|
||||
"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE
|
||||
"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE
|
||||
"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE
|
||||
"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE
|
||||
|
||||
"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE
|
||||
"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE
|
||||
|
||||
"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE
|
||||
"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE
|
||||
|
||||
// Normal order
|
||||
|
||||
"а <> a ;" // CYRILLIC SMALL LETTER A
|
||||
"А <> A ;" // CYRILLIC CAPITAL LETTER A
|
||||
"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA
|
||||
"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA
|
||||
"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE
|
||||
"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE
|
||||
"б <> b ;" // CYRILLIC SMALL LETTER BE
|
||||
"Б <> B ;" // CYRILLIC CAPITAL LETTER BE
|
||||
"в <> v ;" // CYRILLIC SMALL LETTER VE
|
||||
"В <> V ;" // CYRILLIC CAPITAL LETTER VE
|
||||
|
||||
"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE
|
||||
"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
|
||||
"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
"г <> g ;" // CYRILLIC SMALL LETTER GHE
|
||||
"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE
|
||||
|
||||
"д <> d;" // CYRILLIC SMALL LETTER DE
|
||||
"Д <> D;" // CYRILLIC CAPITAL LETTER DE
|
||||
"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE
|
||||
"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE
|
||||
"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER
|
||||
"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
"е <> e ;" // CYRILLIC SMALL LETTER IE
|
||||
"Е <> E;" // CYRILLIC CAPITAL LETTER IE
|
||||
|
||||
"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE
|
||||
"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE
|
||||
|
||||
// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
"з <> z ;" // CYRILLIC SMALL LETTER ZE
|
||||
"З <> Z;" // CYRILLIC CAPITAL LETTER ZE
|
||||
|
||||
"й <> j ;" // CYRILLIC SMALL LETTER I
|
||||
"Й <> J ;" // CYRILLIC CAPITAL LETTER I
|
||||
"и <> i ;" // CYRILLIC SMALL LETTER I
|
||||
"И <> I ;" // CYRILLIC CAPITAL LETTER I
|
||||
|
||||
"к <> k ;" // CYRILLIC SMALL LETTER KA
|
||||
"К <> K;" // CYRILLIC CAPITAL LETTER KA
|
||||
|
||||
// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
"л <> l ;" // CYRILLIC SMALL LETTER EL
|
||||
"Л <> L;" // CYRILLIC CAPITAL LETTER EL
|
||||
|
||||
"м <> m ;" // CYRILLIC SMALL LETTER EM
|
||||
"М <> M ;" // CYRILLIC CAPITAL LETTER EM
|
||||
"н <> n ;" // CYRILLIC SMALL LETTER EN
|
||||
"Н <> N;" // CYRILLIC CAPITAL LETTER EN
|
||||
// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
"о <> o ;" // CYRILLIC SMALL LETTER O
|
||||
"О <> O ;" // CYRILLIC CAPITAL LETTER O
|
||||
// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
"п <> p ;" // CYRILLIC SMALL LETTER PE
|
||||
"П <> P ;" // CYRILLIC CAPITAL LETTER PE
|
||||
// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
||||
// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
"р <> r ;" // CYRILLIC SMALL LETTER ER
|
||||
"Р <> R ;" // CYRILLIC CAPITAL LETTER ER
|
||||
// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
"с <> s ;" // CYRILLIC SMALL LETTER ES
|
||||
"С <> S ;" // CYRILLIC CAPITAL LETTER ES
|
||||
// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
"т <> t ;" // CYRILLIC SMALL LETTER TE
|
||||
"Т <> T ;" // CYRILLIC CAPITAL LETTER TE
|
||||
// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
"у <> u ;" // CYRILLIC SMALL LETTER U
|
||||
"У <> U ;" // CYRILLIC CAPITAL LETTER U
|
||||
// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
||||
// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
||||
"ф <> f ;" // CYRILLIC SMALL LETTER EF
|
||||
"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF
|
||||
"х <> h ;" // CYRILLIC SMALL LETTER HA
|
||||
"Х <> H;" // CYRILLIC CAPITAL LETTER HA
|
||||
// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
"ц <> c ;" // CYRILLIC SMALL LETTER TSE
|
||||
"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE
|
||||
// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||||
// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
|
||||
|
||||
"ъ <> [:Ll:] { $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN
|
||||
"Ъ <> $modprime2 ;" // CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
"ы <> y ;" // CYRILLIC SMALL LETTER YERU
|
||||
"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU
|
||||
"ь <> [:Ll:] { $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN
|
||||
"Ь <> $modprime ;" // CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
|
||||
// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
||||
// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
||||
|
||||
// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
||||
// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
||||
// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
|
||||
// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
|
||||
// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
||||
// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
||||
// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
||||
// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
||||
// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
||||
// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
||||
// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
||||
// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
||||
// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
||||
//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
|
||||
//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
|
||||
//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
|
||||
//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ё <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
|
||||
//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
|
||||
//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O
|
||||
//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
|
||||
//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA
|
||||
//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
|
||||
//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ў <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
|
||||
//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
|
||||
//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
|
||||
//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
|
||||
//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E
|
||||
//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
|
||||
//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
|
||||
// Completeness
|
||||
"$ignore = [[:Mark:]''] * ;"
|
||||
"| k < q ;"
|
||||
"| K < Q ;"
|
||||
"| u < w ;"
|
||||
"| U < W ;"
|
||||
"| KS < X } $ignore [:UppercaseLetter:] ;"
|
||||
"| KS < [:UppercaseLetter:] $ignore { X ;"
|
||||
"| Ks < X ;"
|
||||
"| ks < x ;"
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
}
|
||||
}
|
147
icu4c/data/translit_Devanagari_InterIndic.txt
Normal file
147
icu4c/data/translit_Devanagari_InterIndic.txt
Normal file
@ -0,0 +1,147 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Devanagari_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Devanagari_InterIndic
|
||||
|
||||
translit_Devanagari_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Devanagari_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:57 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Devanagari-InterIndic
|
||||
// :: NFD;
|
||||
//Rules for Decomposed characters
|
||||
"\u0928\u093c > \ue029;" //\u0929
|
||||
"\u0930\u093c > \ue031;" //\u0932
|
||||
"\u0933\u093c > \ue034;" //\u0934
|
||||
"\u0915\u093c > \ue058;" //\u0958 LETTER QA (For Urdu)
|
||||
"\u0916\u093c > \ue059;" //\u0959 LETTER KHHA (For Urdu)
|
||||
"\u0917\u093c > \ue05a;" //\u095a LETTER GHHA (For Urdu)
|
||||
"\u091c\u093c > \ue05b;" //\u095b LETTER ZA (For Urdu)
|
||||
"\u0921\u093c > \ue05c;" //\u095c LETTER DDDHA (pronounced RRA)
|
||||
"\u0922\u093c > \ue05d;" //\u095d LETTER RHA (pronounced RRHA)
|
||||
"\u092b\u093c > \ue05e;" //\u095e LETTER FA
|
||||
"\u092f\u093c > \ue05f;" //\u095f LETTER YYA
|
||||
"\u0901>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0902>\ue002;" // SIGN ANUSVARA
|
||||
"\u0903>\ue003;" // SIGN VISARGA
|
||||
"\u0905>\ue005;" // LETTER A
|
||||
"\u0906>\ue006;" // LETTER AA
|
||||
"\u0907>\ue007;" // LETTER I
|
||||
"\u0908>\ue008;" // LETTER II
|
||||
"\u0909>\ue009;" // LETTER U
|
||||
"\u090a>\ue00a;" // LETTER UU
|
||||
"\u090b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u090c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u090d>\ue00d;" // LETTER CANDRA E (For representing English sounds)
|
||||
//\u090e>\ue00e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
"\u090f>\ue00f;" // LETTER E
|
||||
"\u0910>\ue010;" // LETTER AI
|
||||
"\u0911>\ue011;" // LETTER CANDRA O (For representing English sounds)
|
||||
//\u0912>\ue012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
"\u0913>\ue013;" // LETTER O
|
||||
"\u0914>\ue014;" // LETTER AU
|
||||
"\u0915>\ue015;" // LETTER KA
|
||||
"\u0916>\ue016;" // LETTER KHA
|
||||
"\u0917>\ue017;" // LETTER GA
|
||||
"\u0918>\ue018;" // LETTER GHA
|
||||
"\u0919>\ue019;" // LETTER NGA
|
||||
"\u091a>\ue01a;" // LETTER CA
|
||||
"\u091b>\ue01b;" // LETTER CHA
|
||||
"\u091c>\ue01c;" // LETTER JA
|
||||
"\u091d>\ue01d;" // LETTER JHA
|
||||
"\u091e>\ue01e;" // LETTER NYA
|
||||
"\u091f>\ue01f;" // LETTER TTA
|
||||
"\u0920>\ue020;" // LETTER TTHA
|
||||
"\u0921>\ue021;" // LETTER DDA
|
||||
"\u0922>\ue022;" // LETTER DDHA
|
||||
"\u0923>\ue023;" // LETTER NNA
|
||||
"\u0924>\ue024;" // LETTER TA
|
||||
"\u0925>\ue025;" // LETTER THA
|
||||
"\u0926>\ue026;" // LETTER DA
|
||||
"\u0927>\ue027;" // LETTER DHA
|
||||
"\u0928>\ue028;" // LETTER NA
|
||||
"\u0929>\ue029;" // LETTER NNNA
|
||||
"\u092a>\ue02a;" // LETTER PA
|
||||
"\u092b>\ue02b;" // LETTER PHA
|
||||
"\u092c>\ue02c;" // LETTER BA
|
||||
"\u092d>\ue02d;" // LETTER BHA
|
||||
"\u092e>\ue02e;" // LETTER MA
|
||||
"\u092f>\ue02f;" // LETTER YA
|
||||
"\u0930>\ue030;" // LETTER RA
|
||||
//\u0931>\ue031; # UNMAPPED LETTER RRA (Eyelash RA for Southern scripts)
|
||||
"\u0932>\ue032;" // LETTER LA
|
||||
"\u0933>\ue033;" // LETTER LLA
|
||||
//\u0934>\ue034; # UNMAPPED LETTER LLLA (LLLA for Southern scripts)
|
||||
"\u0935>\ue035;" // LETTER VA
|
||||
"\u0936>\ue036;" // LETTER SHA
|
||||
"\u0937>\ue037;" // LETTER SSA
|
||||
"\u0938>\ue038;" // LETTER SA
|
||||
"\u0939>\ue039;" // LETTER HA
|
||||
"\u093c>\ue03c;" // SIGN NUKTA
|
||||
"\u093d>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u093e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u093f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0940>\ue040;" // VOWEL SIGN II
|
||||
"\u0941>\ue041;" // VOWEL SIGN U
|
||||
"\u0942>\ue042;" // VOWEL SIGN UU
|
||||
"\u0943>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0944>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0945>\ue045;" // VOWEL SIGN CANDRA E
|
||||
//\u0946>\ue046; # UNMAPPED VOWEL SIGN SHORT E
|
||||
"\u0947>\ue047;" // VOWEL SIGN E
|
||||
"\u0948>\ue048;" // VOWEL SIGN AI
|
||||
"\u0949>\ue049;" // VOWEL SIGN CANDRA O
|
||||
//\u094a>\ue04a; # UNMAPPED VOWEL SIGN SHORT O
|
||||
"\u094b>\ue04b;" // VOWEL SIGN O
|
||||
"\u094c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u094d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0950>\ue050;" // OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"\u0958>\ue058;" // LETTER QA (For Urdu)
|
||||
"\u0959>\ue059;" // LETTER KHHA (For Urdu)
|
||||
"\u095a>\ue05a;" // LETTER GHHA (For Urdu)
|
||||
"\u095b>\ue05b;" // LETTER ZA (For Urdu)
|
||||
"\u095c>\ue05c;" // LETTER DDDHA (pronounced RRA)
|
||||
"\u095d>\ue05d;" // LETTER RHA (pronounced RRHA)
|
||||
"\u095e>\ue05e;" // LETTER FA
|
||||
"\u095f>\ue05f;" // LETTER YYA
|
||||
"\u0960>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0961>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0962>\ue062;" // VOWEL SIGN VOCALIC L
|
||||
"\u0963>\ue063;" // VOWEL SIGN VOCALIC LL
|
||||
// \u0964>; # UNMAPPED Devanagari-InterIndic: DANDA
|
||||
// \u0965>; # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
|
||||
"\u0966>\ue066;" // DIGIT ZERO
|
||||
"\u0967>\ue067;" // DIGIT ONE
|
||||
"\u0968>\ue068;" // DIGIT TWO
|
||||
"\u0969>\ue069;" // DIGIT THREE
|
||||
"\u096a>\ue06a;" // DIGIT FOUR
|
||||
"\u096b>\ue06b;" // DIGIT FIVE
|
||||
"\u096c>\ue06c;" // DIGIT SIX
|
||||
"\u096d>\ue06d;" // DIGIT SEVEN
|
||||
"\u096e>\ue06e;" // DIGIT EIGHT
|
||||
"\u096f>\ue06f;" // DIGIT NINE
|
||||
// \u0970>; # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
// :: NFC (NFD) ;
|
||||
}
|
||||
}
|
287
icu4c/data/translit_Fullwidth_Halfwidth.txt
Normal file
287
icu4c/data/translit_Fullwidth_Halfwidth.txt
Normal file
@ -0,0 +1,287 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth_Halfwidth
|
||||
|
||||
translit_Fullwidth_Halfwidth {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:57 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth-Halfwidth
|
||||
|
||||
// Mechanically generated from Unicode Character Database
|
||||
|
||||
// multicharacter
|
||||
|
||||
"ガ<>ガ;" // to KATAKANA LETTER GA
|
||||
"ギ<>ギ;" // to KATAKANA LETTER GI
|
||||
"グ<>グ;" // to KATAKANA LETTER GU
|
||||
"ゲ<>ゲ;" // to KATAKANA LETTER GE
|
||||
"ゴ<>ゴ;" // to KATAKANA LETTER GO
|
||||
"ザ<>ザ;" // to KATAKANA LETTER ZA
|
||||
"ジ<>ジ;" // to KATAKANA LETTER ZI
|
||||
"ズ<>ズ;" // to KATAKANA LETTER ZU
|
||||
"ゼ<>ゼ;" // to KATAKANA LETTER ZE
|
||||
"ゾ<>ゾ;" // to KATAKANA LETTER ZO
|
||||
"ダ<>ダ;" // to KATAKANA LETTER DA
|
||||
"ヂ<>ヂ;" // to KATAKANA LETTER DI
|
||||
"ヅ<>ヅ;" // to KATAKANA LETTER DU
|
||||
"デ<>デ;" // to KATAKANA LETTER DE
|
||||
"ド<>ド;" // to KATAKANA LETTER DO
|
||||
"バ<>バ;" // to KATAKANA LETTER BA
|
||||
"パ<>パ;" // to KATAKANA LETTER PA
|
||||
"ビ<>ビ;" // to KATAKANA LETTER BI
|
||||
"ピ<>ピ;" // to KATAKANA LETTER PI
|
||||
"ブ<>ブ;" // to KATAKANA LETTER BU
|
||||
"プ<>プ;" // to KATAKANA LETTER PU
|
||||
"ベ<>ベ;" // to KATAKANA LETTER BE
|
||||
"ペ<>ペ;" // to KATAKANA LETTER PE
|
||||
"ボ<>ボ;" // to KATAKANA LETTER BO
|
||||
"ポ<>ポ;" // to KATAKANA LETTER PO
|
||||
"ヴ<>ヴ;" // to KATAKANA LETTER VU
|
||||
"ヷ<>ヷ;" // to KATAKANA LETTER VA
|
||||
"ヺ<>ヺ;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
"!<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
""<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"#<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"$<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"%<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
"&<>'&';" // from FULLWIDTH AMPERSAND
|
||||
"'<>'';" // from FULLWIDTH APOSTROPHE
|
||||
"(<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
")<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
"*<>'*';" // from FULLWIDTH ASTERISK
|
||||
"+<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
",<>',';" // from FULLWIDTH COMMA
|
||||
"-<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
".<>'.';" // from FULLWIDTH FULL STOP
|
||||
"/<>'/';" // from FULLWIDTH SOLIDUS
|
||||
"0<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
"1<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
"2<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
"3<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
"4<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
"5<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
"6<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
"7<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
"8<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
"9<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
":<>':';" // from FULLWIDTH COLON
|
||||
";<>';';" // from FULLWIDTH SEMICOLON
|
||||
"<<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
"=<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
"><>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
"?<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
"@<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
"A<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
"B<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
"C<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
"D<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
"E<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
"F<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
"G<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
"H<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
"I<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
"J<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
"K<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
"L<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
"M<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
"N<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
"O<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
"P<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
"Q<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
"R<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
"S<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
"T<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
"U<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
"V<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
"W<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
"X<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
"Y<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
"Z<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
"[<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
"\<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
"]<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
"^<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
"_<>'_';" // from FULLWIDTH LOW LINE
|
||||
"`<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
"a<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
"b<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
"c<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
"d<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
"e<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
"f<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
"g<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
"h<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
"i<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
"j<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
"k<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
"l<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
"m<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
"n<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
"o<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
"p<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
"q<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
"r<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
"s<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
"t<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
"u<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
"v<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
"w<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
"x<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
"y<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
"z<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
"{<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
"|<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
"}<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
"~<>'~';" // from FULLWIDTH TILDE
|
||||
"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A
|
||||
"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I
|
||||
"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U
|
||||
"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E
|
||||
"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O
|
||||
"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
"ノ<>ノ;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N
|
||||
"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
"ᅠ<>ᅠ;" // to HALFWIDTH HANGUL FILLER
|
||||
"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A
|
||||
"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE
|
||||
"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA
|
||||
"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO
|
||||
"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E
|
||||
"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE
|
||||
"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O
|
||||
"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA
|
||||
"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE
|
||||
"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO
|
||||
"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U
|
||||
"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE
|
||||
"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI
|
||||
"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU
|
||||
"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU
|
||||
"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI
|
||||
"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I
|
||||
"¢<>'¢';" // from FULLWIDTH CENT SIGN
|
||||
"£<>'£';" // from FULLWIDTH POUND SIGN
|
||||
"¬<>'¬';" // from FULLWIDTH NOT SIGN
|
||||
" ̄<>' '̄;" // from FULLWIDTH MACRON
|
||||
"' '<>' ';" // ideographic space (place this after MACRON)
|
||||
"¦<>'¦';" // from FULLWIDTH BROKEN BAR
|
||||
"¥<>'¥';" // from FULLWIDTH YEN SIGN
|
||||
"₩<>₩;" // from FULLWIDTH WON SIGN
|
||||
"│<>│;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
"←<>←;" // to HALFWIDTH LEFTWARDS ARROW
|
||||
"↑<>↑;" // to HALFWIDTH UPWARDS ARROW
|
||||
"→<>→;" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
"■<>■;" // to HALFWIDTH BLACK SQUARE
|
||||
"○<>○;" // to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
283
icu4c/data/translit_Greek_Latin.txt
Normal file
283
icu4c/data/translit_Greek_Latin.txt
Normal file
@ -0,0 +1,283 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin
|
||||
|
||||
translit_Greek_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Greek_Latin.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
"::NFD (NFC) ;"
|
||||
|
||||
// TEST CASES
|
||||
|
||||
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
||||
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
||||
// ᾳ ῃ ῳ ὃ ὄ
|
||||
// ὠς ὡς ὢς ὣς
|
||||
// Ὠς Ὡς Ὢς Ὣς
|
||||
// ὨΣ ὩΣ ὪΣ ὫΣ
|
||||
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
||||
|
||||
// Useful variables
|
||||
|
||||
"$lower = [:Ll:] ;"
|
||||
"$upper = [:Lu:] ;"
|
||||
"$accent = [:M:] ;"
|
||||
|
||||
"$macron = \u0304 ;"
|
||||
"$ddot = \u0308 ;"
|
||||
|
||||
"$lcgvowel = [αεηιουω] ;"
|
||||
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
||||
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
||||
"$lcgvowelC = [$lcgvowel $accent] ;"
|
||||
|
||||
"$vowel = [ AEIOUaeiou $gvowel] ;"
|
||||
|
||||
"$beforeLower = $accent * $lower ;"
|
||||
|
||||
"$gammaLike = [ΓΚΞΧγκξχ] ;"
|
||||
"$smooth = ̓ ;"
|
||||
"$rough = ̔ ;"
|
||||
"$iotasub = ͅ ;"
|
||||
|
||||
// Fix punctuation
|
||||
|
||||
"\; <> \? ;"
|
||||
"· <> \: ;"
|
||||
|
||||
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
"\u0342 <> \u0302 ;"
|
||||
|
||||
// IOTA: convert iota subscript to iota
|
||||
// first make previous alpha long!
|
||||
|
||||
"Α } $accent * $iotasub > A $macron ;"
|
||||
"α } $accent * $iotasub > a $macron ;"
|
||||
|
||||
// now convert to uppercase if after uppercase, ow to lowercase
|
||||
|
||||
"$upper $accent * { $iotasub > I ;"
|
||||
"$iotasub > i ;"
|
||||
|
||||
"| $1 $iotasub < ([:L:] $macron [:M:]*) i ;"
|
||||
|
||||
// BREATHING
|
||||
|
||||
// Convert rough breathing to h, and move before letters.
|
||||
|
||||
// Make A ` x = > H a x
|
||||
|
||||
"Α $rough } $beforeLower > H | α ;"
|
||||
"Ε $rough } $beforeLower > H | ε;"
|
||||
"Η $rough } $beforeLower > H | η ;"
|
||||
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
|
||||
"Ο $rough } $beforeLower > H | ο ;"
|
||||
"Υ $rough } $beforeLower > H | υ ;"
|
||||
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
|
||||
|
||||
// Make A x ` = > H a x
|
||||
|
||||
"Α ($lower) $rough > H | α $1 ;"
|
||||
"Ε ($lower) $rough > H | ε $1 ;"
|
||||
"Η ($lower) $rough > H | η $1 ;"
|
||||
"Ι ($lower $ddot?) $rough > H | ι $1 ;"
|
||||
"Ο ($lower) $rough > H | ο $1 ;"
|
||||
"Υ ($lower) $rough > H | υ $1 ;"
|
||||
"Ω ($lower $ddot?) $rough > H | ω $1 ;"
|
||||
|
||||
//Otherwise, make x ` into h x and X ` into H X
|
||||
|
||||
"($lcgvowel + $ddot? ) $rough > h | $1 ;"
|
||||
"($gvowel + $ddot? ) $rough > H | $1 ;"
|
||||
|
||||
// Go backwards with H
|
||||
|
||||
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot?) ;"
|
||||
|
||||
"| $1 $rough < H ([AEIOUY] $macron? $ddot?[aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
|
||||
|
||||
// titlecase, have to fix individually
|
||||
"| $1 $rough < H (a $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (e $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (i $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (o $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (u $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (y $macron? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (a $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (e $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (i $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (o $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (u $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (y $macron? $ddot? ) ;"
|
||||
|
||||
// Now do smooth
|
||||
|
||||
//delete smooth breathing for Latin
|
||||
"$smooth > ;"
|
||||
|
||||
// insert in Greek
|
||||
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron? [aeiouyAEIOUY] $macron?) } [^[$smooth]] ;"
|
||||
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] ;"
|
||||
|
||||
// TODO: preserve smooth/rough breathing if not
|
||||
// on initial vowel sequence
|
||||
|
||||
// need to have these up here so the rules don't mask
|
||||
|
||||
"η <> e $macron ;"
|
||||
"Η <> E $macron ;"
|
||||
|
||||
"φ <> ph ;"
|
||||
"Ψ } $beforeLower <> Ps ;"
|
||||
"Ψ <> PS ;"
|
||||
|
||||
"Φ } $beforeLower <> Ph ;"
|
||||
"Φ <> PH ;"
|
||||
"ψ <> ps ;"
|
||||
|
||||
"ω <> o $macron ;"
|
||||
"Ω <> O $macron;"
|
||||
|
||||
// NORMAL
|
||||
|
||||
"α <> a ;"
|
||||
"Α <> A ;"
|
||||
|
||||
"β <> b ;"
|
||||
"Β <> B ;"
|
||||
|
||||
"γ } $gammaLike <> n } [gkc] ;"
|
||||
"γ <> g ;"
|
||||
"Γ } $gammaLike <> N } [gkc] ;"
|
||||
"Γ <> G ;"
|
||||
|
||||
"δ <> d ;"
|
||||
"Δ <> D ;"
|
||||
|
||||
"ε <> e ;"
|
||||
"Ε <> E ;"
|
||||
|
||||
"ζ <> z ;"
|
||||
"Ζ <> Z ;"
|
||||
|
||||
"θ <> th ;"
|
||||
"Θ } $beforeLower <> Th ;"
|
||||
"Θ <> TH ;"
|
||||
|
||||
"ι <> i ;"
|
||||
"Ι <> I ;"
|
||||
|
||||
"κ <> k ;"
|
||||
"Κ <> K ;"
|
||||
|
||||
"λ <> l ;"
|
||||
"Λ <> L ;"
|
||||
|
||||
"μ <> m ;"
|
||||
"Μ <> M ;"
|
||||
|
||||
"ν } $gammaLike > n\' ;"
|
||||
"ν <> n ;"
|
||||
"Ν } $gammaLike <> N\' ;"
|
||||
"Ν <> N ;"
|
||||
|
||||
"ξ <> x ;"
|
||||
"Ξ <> X ;"
|
||||
|
||||
"ο <> o ;"
|
||||
"Ο <> O ;"
|
||||
|
||||
"π <> p ;"
|
||||
"Π <> P ;"
|
||||
|
||||
"ρ $rough <> rh;"
|
||||
"Ρ $rough } $beforeLower <> Rh ;"
|
||||
"Ρ $rough <> RH ;"
|
||||
"ρ <> r ;"
|
||||
"Ρ <> R ;"
|
||||
|
||||
"[Pp] {ς > \'s ;"
|
||||
"[Pp] {σ > \'s ;"
|
||||
"σ < [:^L:] [:M:]* { s } [:^L:] ;"
|
||||
"ς <> s } [:^L:] ;"
|
||||
"σ <> s ;"
|
||||
"[Pp] { Σ <> \'S ;"
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
"Τ <> T ;"
|
||||
|
||||
"$vowel {υ } <> u ;"
|
||||
"υ <> y ;"
|
||||
"$vowel { Υ <> U ;"
|
||||
"Υ <> Y ;"
|
||||
|
||||
"χ <> ch ;"
|
||||
"Χ } $beforeLower <> Ch ;"
|
||||
"Χ <> CH ;"
|
||||
|
||||
// Completeness for ASCII
|
||||
|
||||
"$ignore = [[:Mark:]''] * ;"
|
||||
|
||||
"| k < c ;"
|
||||
"| ph < f ;"
|
||||
"| i < j ;"
|
||||
"| k < q ;"
|
||||
"| u < v ;"
|
||||
"| u < w ;"
|
||||
"| K < C ;"
|
||||
"| PH < F } $ignore [:UppercaseLetter:] ;"
|
||||
"| PH < [:UppercaseLetter:] $ignore { F ;"
|
||||
"| PH < F ;"
|
||||
"| I < J ;"
|
||||
"| K < Q ;"
|
||||
"| U < V ;"
|
||||
"| U < W ;"
|
||||
|
||||
"$rough } $ignore [:UppercaseLetter:] > H ;"
|
||||
"$ignore [:UppercaseLetter:] { $rough > H ;"
|
||||
"$rough < H ;"
|
||||
"$rough <> h ;"
|
||||
|
||||
// Completeness for Greek
|
||||
|
||||
"ϐ > | β ;"
|
||||
"ϑ > | θ ;"
|
||||
"ϒ > | Υ ;"
|
||||
"ϕ > | φ ;"
|
||||
"ϖ > | π ;"
|
||||
|
||||
"ϰ > | κ ;"
|
||||
"ϱ > | ρ ;"
|
||||
"ϲ > | σ ;"
|
||||
"ϳ > j ;"
|
||||
"ϴ > | Θ ;"
|
||||
"ϵ > | ε ;"
|
||||
|
||||
"ͺ > i;"
|
||||
|
||||
"::NFC (NFD) ;"
|
||||
}
|
||||
}
|
115
icu4c/data/translit_Gujarati_InterIndic.txt
Normal file
115
icu4c/data/translit_Gujarati_InterIndic.txt
Normal file
@ -0,0 +1,115 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Gujarati_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gujarati_InterIndic
|
||||
|
||||
translit_Gujarati_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gujarati_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:54 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gujarati_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:58 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gujarati-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0a81>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0a82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0a83>\ue003;" // SIGN VISARGA
|
||||
"\u0a85>\ue005;" // LETTER A
|
||||
"\u0a86>\ue006;" // LETTER AA
|
||||
"\u0a87>\ue007;" // LETTER I
|
||||
"\u0a88>\ue008;" // LETTER II
|
||||
"\u0a89>\ue009;" // LETTER U
|
||||
"\u0a8a>\ue00a;" // LETTER UU
|
||||
"\u0a8b>\ue00b;" // LETTER VOCALIC R
|
||||
// \u0a8d>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E
|
||||
"\u0a8f>\ue00f;" // LETTER E
|
||||
"\u0a90>\ue010;" // LETTER AI
|
||||
// \u0a91>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O
|
||||
"\u0a93>\ue013;" // LETTER O
|
||||
"\u0a94>\ue014;" // LETTER AU
|
||||
"\u0a95>\ue015;" // LETTER KA
|
||||
"\u0a96>\ue016;" // LETTER KHA
|
||||
"\u0a97>\ue017;" // LETTER GA
|
||||
"\u0a98>\ue018;" // LETTER GHA
|
||||
"\u0a99>\ue019;" // LETTER NGA
|
||||
"\u0a9a>\ue01a;" // LETTER CA
|
||||
"\u0a9b>\ue01b;" // LETTER CHA
|
||||
"\u0a9c>\ue01c;" // LETTER JA
|
||||
"\u0a9d>\ue01d;" // LETTER JHA
|
||||
"\u0a9e>\ue01e;" // LETTER NYA
|
||||
"\u0a9f>\ue01f;" // LETTER TTA
|
||||
"\u0aa0>\ue020;" // LETTER TTHA
|
||||
"\u0aa1>\ue021;" // LETTER DDA
|
||||
"\u0aa2>\ue022;" // LETTER DDHA
|
||||
"\u0aa3>\ue023;" // LETTER NNA
|
||||
"\u0aa4>\ue024;" // LETTER TA
|
||||
"\u0aa5>\ue025;" // LETTER THA
|
||||
"\u0aa6>\ue026;" // LETTER DA
|
||||
"\u0aa7>\ue027;" // LETTER DHA
|
||||
"\u0aa8>\ue028;" // LETTER NA
|
||||
"\u0aaa>\ue02a;" // LETTER PA
|
||||
"\u0aab>\ue02b;" // LETTER PHA
|
||||
"\u0aac>\ue02c;" // LETTER BA
|
||||
"\u0aad>\ue02d;" // LETTER BHA
|
||||
"\u0aae>\ue02e;" // LETTER MA
|
||||
"\u0aaf>\ue02f;" // LETTER YA
|
||||
"\u0ab0>\ue030;" // LETTER RA
|
||||
"\u0ab2>\ue032;" // LETTER LA
|
||||
"\u0ab3>\ue033;" // LETTER LLA
|
||||
"\u0ab5>\ue035;" // LETTER VA
|
||||
"\u0ab6>\ue036;" // LETTER SHA
|
||||
"\u0ab7>\ue037;" // LETTER SSA
|
||||
"\u0ab8>\ue038;" // LETTER SA
|
||||
"\u0ab9>\ue039;" // LETTER HA
|
||||
"\u0abc>\ue03c;" // SIGN NUKTA
|
||||
"\u0abd>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u0abe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0abf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0ac0>\ue040;" // VOWEL SIGN II
|
||||
"\u0ac1>\ue041;" // VOWEL SIGN U
|
||||
"\u0ac2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E
|
||||
"\u0ac7>\ue047;" // VOWEL SIGN E
|
||||
"\u0ac8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O
|
||||
"\u0acb>\ue04b;" // VOWEL SIGN O
|
||||
"\u0acc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0acd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0ad0>\ue050;" // OM
|
||||
"\u0ae0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0ae6>\ue066;" // DIGIT ZERO
|
||||
"\u0ae7>\ue067;" // DIGIT ONE
|
||||
"\u0ae8>\ue068;" // DIGIT TWO
|
||||
"\u0ae9>\ue069;" // DIGIT THREE
|
||||
"\u0aea>\ue06a;" // DIGIT FOUR
|
||||
"\u0aeb>\ue06b;" // DIGIT FIVE
|
||||
"\u0aec>\ue06c;" // DIGIT SIX
|
||||
"\u0aed>\ue06d;" // DIGIT SEVEN
|
||||
"\u0aee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0aef>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
112
icu4c/data/translit_Gurmukhi_InterIndic.txt
Normal file
112
icu4c/data/translit_Gurmukhi_InterIndic.txt
Normal file
@ -0,0 +1,112 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Gurmukhi_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gurmukhi_InterIndic
|
||||
|
||||
translit_Gurmukhi_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gurmukhi_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:54 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gurmukhi_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:58 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gurmukhi-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0a02>\ue001;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
|
||||
"\u0a05>\ue005;" // LETTER A
|
||||
"\u0a06>\ue006;" // LETTER AA
|
||||
"\u0a07>\ue007;" // LETTER I
|
||||
"\u0a08>\ue008;" // LETTER II
|
||||
"\u0a09>\ue009;" // LETTER U
|
||||
"\u0a0a>\ue00a;" // LETTER UU
|
||||
"\u0a0f>\ue00f;" // LETTER EE
|
||||
"\u0a10>\ue010;" // LETTER AI
|
||||
"\u0a13>\ue013;" // LETTER OO
|
||||
"\u0a14>\ue014;" // LETTER AU
|
||||
"\u0a15>\ue015;" // LETTER KA
|
||||
"\u0a16>\ue016;" // LETTER KHA
|
||||
"\u0a17>\ue017;" // LETTER GA
|
||||
"\u0a18>\ue018;" // LETTER GHA
|
||||
"\u0a19>\ue019;" // LETTER NGA
|
||||
"\u0a1a>\ue01a;" // LETTER CA
|
||||
"\u0a1b>\ue01b;" // LETTER CHA
|
||||
"\u0a1c>\ue01c;" // LETTER JA
|
||||
"\u0a1d>\ue01d;" // LETTER JHA
|
||||
"\u0a1e>\ue01e;" // LETTER NYA
|
||||
"\u0a1f>\ue01f;" // LETTER TTA
|
||||
"\u0a20>\ue020;" // LETTER TTHA
|
||||
"\u0a21>\ue021;" // LETTER DDA
|
||||
"\u0a22>\ue022;" // LETTER DDHA
|
||||
"\u0a23>\ue023;" // LETTER NNA
|
||||
"\u0a24>\ue024;" // LETTER TA
|
||||
"\u0a25>\ue025;" // LETTER THA
|
||||
"\u0a26>\ue026;" // LETTER DA
|
||||
"\u0a27>\ue027;" // LETTER DHA
|
||||
"\u0a28>\ue028;" // LETTER NA
|
||||
"\u0a2a>\ue02a;" // LETTER PA
|
||||
"\u0a2b>\ue02b;" // LETTER PHA
|
||||
"\u0a2c>\ue02c;" // LETTER BA
|
||||
"\u0a2d>\ue02d;" // LETTER BHA
|
||||
"\u0a2e>\ue02e;" // LETTER MA
|
||||
"\u0a2f>\ue02f;" // LETTER YA
|
||||
"\u0a30>\ue030;" // LETTER RA
|
||||
"\u0a32>\ue032;" // LETTER LA
|
||||
"\u0a33>\ue033;" // LETTER LLA
|
||||
"\u0a35>\ue035;" // LETTER VA
|
||||
"\u0a36>\ue036;" // LETTER SHA
|
||||
"\u0a38>\ue038;" // LETTER SA
|
||||
"\u0a39>\ue039;" // LETTER HA
|
||||
"\u0a3c>\ue03c;" // SIGN NUKTA
|
||||
"\u0a3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0a3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0a40>\ue040;" // VOWEL SIGN II
|
||||
"\u0a41>\ue041;" // VOWEL SIGN U
|
||||
"\u0a42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0a47>\ue047;" // VOWEL SIGN EE
|
||||
"\u0a48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0a4b>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0a4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0a4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0a59>\ue059;" // LETTER KHHA
|
||||
"\u0a5a>\ue05a;" // LETTER GHHA
|
||||
"\u0a5b>\ue05b;" // LETTER ZA
|
||||
"\u0a5c>\ue05c;" // LETTER RRA
|
||||
"\u0a5e>\ue05e;" // LETTER FA
|
||||
"\u0a66>\ue066;" // DIGIT ZERO
|
||||
"\u0a67>\ue067;" // DIGIT ONE
|
||||
"\u0a68>\ue068;" // DIGIT TWO
|
||||
"\u0a69>\ue069;" // DIGIT THREE
|
||||
"\u0a6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0a6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0a6c>\ue06c;" // DIGIT SIX
|
||||
"\u0a6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0a6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0a6f>\ue06f;" // DIGIT NINE
|
||||
// \u0a70>; # UNMAPPED Gurmukhi-InterIndic: TIPPI
|
||||
// \u0a71>; # UNMAPPED Gurmukhi-InterIndic: ADDAK
|
||||
// \u0a72>; # UNMAPPED Gurmukhi-InterIndic: IRI
|
||||
// \u0a73>; # UNMAPPED Gurmukhi-InterIndic: URA
|
||||
// \u0a74>; # UNMAPPED Gurmukhi-InterIndic: EK ONKAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
217
icu4c/data/translit_Hiragana_Katakana.txt
Normal file
217
icu4c/data/translit_Hiragana_Katakana.txt
Normal file
@ -0,0 +1,217 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Hiragana_Katakana.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana_Katakana
|
||||
|
||||
translit_Hiragana_Katakana {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana-Katana
|
||||
|
||||
// This is largely a one-to-one mapping, but it has a
|
||||
// few kinks:
|
||||
|
||||
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||||
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||||
// (308F-3092) with a voicing mark (3099), which is
|
||||
// semantically equivalent. However, this is a non-
|
||||
// roundtripping transformation.
|
||||
|
||||
// 2. The Katakana small ka/ke (30F5,30F6) have no
|
||||
// Hiragana equiavlents. We convert them to normal
|
||||
// Hiragana ka/ke (304B,3051). This is a one-way
|
||||
// information-losing transformation and precludes
|
||||
// round-tripping of 30F5 and 30F6.
|
||||
|
||||
// 3. The combining marks 3099-309C are in the Hiragana
|
||||
// block, but they apply to Katakana as well, so we
|
||||
// leave them untouched.
|
||||
|
||||
// 4. The Katakana prolonged sound mark 30FC doubles the
|
||||
// preceding vowel. This is a one-way information-
|
||||
// losing transformation from Katakana to Hiragana.
|
||||
|
||||
// 5. The Katakana middle dot separates words in foreign
|
||||
// expressions; we leave this unmodified.
|
||||
|
||||
// The above points preclude successful round-trip
|
||||
// transformations of arbitrary input text. However,
|
||||
// they provide naturalistic results that should conform
|
||||
// to user expectations.
|
||||
|
||||
|
||||
// Combining equivalents va/vi/ve/vo
|
||||
"わ゙ <> ヷ;"
|
||||
"ゐ゙ <> ヸ;"
|
||||
"ゑ゙ <> ヹ;"
|
||||
"を゙ <> ヺ;"
|
||||
|
||||
// One-to-one mappings, main block
|
||||
// 3041:3094 <> 30A1:30F4
|
||||
// 309D,E <> 30FD,E
|
||||
"ぁ <> ァ;"
|
||||
"あ <> ア;"
|
||||
"ぃ <> ィ;"
|
||||
"い <> イ;"
|
||||
"ぅ <> ゥ;"
|
||||
"う <> ウ;"
|
||||
"ぇ <> ェ;"
|
||||
"え <> エ;"
|
||||
"ぉ <> ォ;"
|
||||
"お <> オ;"
|
||||
"か <> カ;"
|
||||
"が <> ガ;"
|
||||
"き <> キ;"
|
||||
"ぎ <> ギ;"
|
||||
"く <> ク;"
|
||||
"ぐ <> グ;"
|
||||
"け <> ケ;"
|
||||
"げ <> ゲ;"
|
||||
"こ <> コ;"
|
||||
"ご <> ゴ;"
|
||||
"さ <> サ;"
|
||||
"ざ <> ザ;"
|
||||
"し <> シ;"
|
||||
"じ <> ジ;"
|
||||
"す <> ス;"
|
||||
"ず <> ズ;"
|
||||
"せ <> セ;"
|
||||
"ぜ <> ゼ;"
|
||||
"そ <> ソ;"
|
||||
"ぞ <> ゾ;"
|
||||
"た <> タ;"
|
||||
"だ <> ダ;"
|
||||
"ち <> チ;"
|
||||
"ぢ <> ヂ;"
|
||||
"っ <> ッ;"
|
||||
"つ <> ツ;"
|
||||
"づ <> ヅ;"
|
||||
"て <> テ;"
|
||||
"で <> デ;"
|
||||
"と <> ト;"
|
||||
"ど <> ド;"
|
||||
"な <> ナ;"
|
||||
"に <> ニ;"
|
||||
"ぬ <> ヌ;"
|
||||
"ね <> ネ;"
|
||||
"の <> ノ;"
|
||||
"は <> ハ;"
|
||||
"ば <> バ;"
|
||||
"ぱ <> パ;"
|
||||
"ひ <> ヒ;"
|
||||
"び <> ビ;"
|
||||
"ぴ <> ピ;"
|
||||
"ふ <> フ;"
|
||||
"ぶ <> ブ;"
|
||||
"ぷ <> プ;"
|
||||
"へ <> ヘ;"
|
||||
"べ <> ベ;"
|
||||
"ぺ <> ペ;"
|
||||
"ほ <> ホ;"
|
||||
"ぼ <> ボ;"
|
||||
"ぽ <> ポ;"
|
||||
"ま <> マ;"
|
||||
"み <> ミ;"
|
||||
"む <> ム;"
|
||||
"め <> メ;"
|
||||
"も <> モ;"
|
||||
"ゃ <> ャ;"
|
||||
"や <> ヤ;"
|
||||
"ゅ <> ュ;"
|
||||
"ゆ <> ユ;"
|
||||
"ょ <> ョ;"
|
||||
"よ <> ヨ;"
|
||||
"ら <> ラ;"
|
||||
"り <> リ;"
|
||||
"る <> ル;"
|
||||
"れ <> レ;"
|
||||
"ろ <> ロ;"
|
||||
"ゎ <> ヮ;"
|
||||
"わ <> ワ;"
|
||||
"ゐ <> ヰ;"
|
||||
"ゑ <> ヱ;"
|
||||
"を <> ヲ;"
|
||||
"ん <> ン;"
|
||||
"ゔ <> ヴ;"
|
||||
"ゝ <> ヽ;"
|
||||
"ゞ <> ヾ;"
|
||||
|
||||
// One-way Katakana-Hiragana xform of small K ka/ke to
|
||||
// normal H ka/ke.
|
||||
"か < ヵ;"
|
||||
"け < ヶ;"
|
||||
|
||||
// Katakana followed by a prolonged sound mark 30FC has
|
||||
// its final vowel doubled. This is a Katakana-Hiragana
|
||||
// one-way information-losing transformation. We
|
||||
// include the small Katakana (e.g., small A 3041) and
|
||||
// do not distinguish them from their large
|
||||
// counterparts. It doesn't make sense to double a
|
||||
// small counterpart vowel as a small Hiragana vowel, so
|
||||
// we don't do so. In natural text this should never
|
||||
// occur anyway. If a 30FC is seen without a preceding
|
||||
// vowel sound (e.g., after n 30F3) we do not change it.
|
||||
|
||||
//## $long = ー;
|
||||
|
||||
// The following categories are Hiragana, not Katakana
|
||||
// as might be expected, since by the time we get to the
|
||||
// 30FC, the preceding character will have already been
|
||||
// transformed to Hiragana.
|
||||
|
||||
// {The following mechanically generated from the
|
||||
// Unicode 3.0 data:}
|
||||
|
||||
"$xa = ["
|
||||
"ぁ あ か が さ ざ"
|
||||
"た だ な は ば ぱ"
|
||||
"ま ゃ や ら ゎ わ"
|
||||
"];"
|
||||
|
||||
"$xi = ["
|
||||
"ぃ い き ぎ し じ"
|
||||
"ち ぢ に ひ び ぴ"
|
||||
"み り ゐ"
|
||||
"];"
|
||||
|
||||
"$xu = ["
|
||||
"ぅ う く ぐ す ず"
|
||||
"っ つ づ ぬ ふ ぶ"
|
||||
"ぷ む ゅ ゆ る ゔ"
|
||||
"];"
|
||||
|
||||
"$xe = ["
|
||||
"ぇ え け げ せ ぜ"
|
||||
"て で ね へ べ ぺ"
|
||||
"め れ ゑ"
|
||||
"];"
|
||||
|
||||
"$xo = ["
|
||||
"ぉ お こ ご そ ぞ"
|
||||
"と ど の ほ ぼ ぽ"
|
||||
"も ょ よ ろ を"
|
||||
"];"
|
||||
|
||||
"あ < $xa {ー};"
|
||||
"い < $xi {ー};"
|
||||
"う < $xu {ー};"
|
||||
"え < $xe {ー};"
|
||||
"お < $xo {ー};"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
32
icu4c/data/translit_Hiragana_Latin.txt
Normal file
32
icu4c/data/translit_Hiragana_Latin.txt
Normal file
@ -0,0 +1,32 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Hiragana_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana_Latin
|
||||
|
||||
translit_Hiragana_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Hiragana_Latin.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: [:^Katakana:] ;" // don't touch any katakana that was in the text!
|
||||
|
||||
":: Hiragana-Katakana;"
|
||||
":: Katakana-Latin;"
|
||||
|
||||
":: ([:^Katakana:]) ;" // don't touch any katakana that was in the text!
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Bengali.txt
Normal file
134
icu4c/data/translit_InterIndic_Bengali.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Bengali.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Bengali
|
||||
|
||||
translit_InterIndic_Bengali {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Bengali.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Bengali
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:59 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Bengali
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0981;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0982;" // SIGN ANUSVARA
|
||||
"\ue003>\u0983;" // SIGN VISARGA
|
||||
"\ue005>\u0985;" // LETTER A
|
||||
"\ue006>\u0986;" // LETTER AA
|
||||
"\ue007>\u0987;" // LETTER I
|
||||
"\ue008>\u0988;" // LETTER II
|
||||
"\ue009>\u0989;" // LETTER U
|
||||
"\ue00a>\u098a;" // LETTER UU
|
||||
"\ue00b>\u098b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u098c;" // LETTER VOCALIC L
|
||||
// \ue00f>; # UNMAPPED InterIndic-Bengali: LETTER EE (\u098f = LETTER E)
|
||||
"\ue010>\u0990;" // LETTER AI
|
||||
// \ue013>; # UNMAPPED InterIndic-Bengali: LETTER OO (\u0993 = LETTER O)
|
||||
"\ue014>\u0994;" // LETTER AU
|
||||
"\ue015>\u0995;" // LETTER KA
|
||||
"\ue016>\u0996;" // LETTER KHA
|
||||
"\ue017>\u0997;" // LETTER GA
|
||||
"\ue018>\u0998;" // LETTER GHA
|
||||
"\ue019>\u0999;" // LETTER NGA
|
||||
"\ue01a>\u099a;" // LETTER CA
|
||||
"\ue01b>\u099b;" // LETTER CHA
|
||||
"\ue01c>\u099c;" // LETTER JA
|
||||
"\ue01d>\u099d;" // LETTER JHA
|
||||
"\ue01e>\u099e;" // LETTER NYA
|
||||
"\ue01f>\u099f;" // LETTER TTA
|
||||
"\ue020>\u09a0;" // LETTER TTHA
|
||||
"\ue021>\u09a1;" // LETTER DDA
|
||||
"\ue022>\u09a2;" // LETTER DDHA
|
||||
"\ue023>\u09a3;" // LETTER NNA
|
||||
"\ue024>\u09a4;" // LETTER TA
|
||||
"\ue025>\u09a5;" // LETTER THA
|
||||
"\ue026>\u09a6;" // LETTER DA
|
||||
"\ue027>\u09a7;" // LETTER DHA
|
||||
"\ue028>\u09a8;" // LETTER NA
|
||||
"\ue029>\u09a8;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u09aa;" // LETTER PA
|
||||
"\ue02b>\u09ab;" // LETTER PHA
|
||||
"\ue02c>\u09ac;" // LETTER BA
|
||||
"\ue02d>\u09ad;" // LETTER BHA
|
||||
"\ue02e>\u09ae;" // LETTER MA
|
||||
"\ue02f>\u09af;" // LETTER YA
|
||||
"\ue030>\u09b0;" // LETTER RA
|
||||
"\ue032>\u09b2;" // LETTER LA
|
||||
"\ue033>\u09b2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
|
||||
"\ue034>\u09b2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
|
||||
"\ue035>\u09ac;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
|
||||
"\ue036>\u09b6;" // LETTER SHA
|
||||
"\ue037>\u09b7;" // LETTER SSA
|
||||
"\ue038>\u09b8;" // LETTER SA
|
||||
"\ue039>\u09b9;" // LETTER HA
|
||||
"\ue03c>\u09bc;" // SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA
|
||||
"\ue03e>\u09be;" // VOWEL SIGN AA
|
||||
"\ue03f>\u09bf;" // VOWEL SIGN I
|
||||
"\ue040>\u09c0;" // VOWEL SIGN II
|
||||
"\ue041>\u09c1;" // VOWEL SIGN U
|
||||
"\ue042>\u09c2;" // VOWEL SIGN UU
|
||||
"\ue043>\u09c3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u09c4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u09c7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
// \ue047>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (\u09c7 = VOWEL SIGN E)
|
||||
"\ue048>\u09c8;" // VOWEL SIGN AI
|
||||
"\ue049>\u09cb;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
// \ue04b>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (\u09cb = VOWEL SIGN O)
|
||||
"\ue04c>\u09cc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u09cd;" // SIGN VIRAMA
|
||||
// \ue050>; # UNMAPPED InterIndic-Bengali: OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Bengali: LENGTH MARK
|
||||
"\ue056>\u09c8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u09d7;" // AU LENGTH MARK
|
||||
"\ue059>\u0996;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0997;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u099c;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u09dd;" // LETTER RHA
|
||||
"\ue05e>\u09ab;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u09df;" // LETTER YYA
|
||||
"\ue060>\u09e0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u09e1;" // LETTER VOCALIC LL
|
||||
"\ue062>\u09e2;" // VOWEL SIGN VOCALIC L
|
||||
"\ue063>\u09e3;" // VOWEL SIGN VOCALIC LL
|
||||
"\ue066>\u09e6;" // DIGIT ZERO
|
||||
"\ue067>\u09e7;" // DIGIT ONE
|
||||
"\ue068>\u09e8;" // DIGIT TWO
|
||||
"\ue069>\u09e9;" // DIGIT THREE
|
||||
"\ue06a>\u09ea;" // DIGIT FOUR
|
||||
"\ue06b>\u09eb;" // DIGIT FIVE
|
||||
"\ue06c>\u09ec;" // DIGIT SIX
|
||||
"\ue06d>\u09ed;" // DIGIT SEVEN
|
||||
"\ue06e>\u09ee;" // DIGIT EIGHT
|
||||
"\ue06f>\u09ef;" // DIGIT NINE
|
||||
"\ue0fa>\u09fa;" // ISSHAR
|
||||
"\ue00f>\u098f;" // LETTER E
|
||||
"\ue013>\u0993;" // LETTER O
|
||||
"\ue031>\u09dc;" // LETTER RRA
|
||||
"\ue047>\u09c7;" // VOWEL SIGN E
|
||||
"\ue04b>\u09cb;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
159
icu4c/data/translit_InterIndic_Devanagari.txt
Normal file
159
icu4c/data/translit_InterIndic_Devanagari.txt
Normal file
@ -0,0 +1,159 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Devanagari.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Devanagari
|
||||
|
||||
translit_InterIndic_Devanagari {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Devanagari.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Devanagari
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:59 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Devanagari
|
||||
//:: NFD (NFC) ;
|
||||
//Rules for Decomposed characters
|
||||
"\ue028\ue03c > \u0929;" //\ue029
|
||||
"\ue030\ue03c > \u0931;" //\ue031
|
||||
"\ue033\ue03c > \u0934;" //\ue034
|
||||
"\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu)
|
||||
"\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu)
|
||||
"\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu)
|
||||
"\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu)
|
||||
"\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA)
|
||||
"\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA)
|
||||
"\ue02b\ue03c > \u095e;" //\ue05e LETTER FA
|
||||
"\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA
|
||||
"\ue001 > \u0901;" // SIGN CANDRABINDU
|
||||
"\ue002 > \u0902;" // SIGN ANUSVARA
|
||||
"\ue003 > \u0903;" // SIGN VISARGA
|
||||
"\ue005 > \u0905;" // LETTER A
|
||||
"\ue006 > \u0906;" // LETTER AA
|
||||
"\ue007 > \u0907;" // LETTER I
|
||||
"\ue008 > \u0908;" // LETTER II
|
||||
"\ue009 > \u0909;" // LETTER U
|
||||
"\ue00a > \u090a;" // LETTER UU
|
||||
"\ue00b > \u090b;" // LETTER VOCALIC R
|
||||
"\ue00c > \u090c;" // LETTER VOCALIC L
|
||||
"\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds)
|
||||
//\ue00e > \u090e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
"\ue00e > \u090f;"
|
||||
"\ue00f > \u090f;" // LETTER E
|
||||
"\ue010 > \u0910;" // LETTER AI
|
||||
"\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds)
|
||||
//\ue012 > \u0912; # UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
"\ue012 > \u0913;"
|
||||
"\ue013 > \u0913;" // LETTER O
|
||||
"\ue014 > \u0914;" // LETTER AU
|
||||
"\ue015 > \u0915;" // LETTER KA
|
||||
"\ue016 > \u0916;" // LETTER KHA
|
||||
"\ue017 > \u0917;" // LETTER GA
|
||||
"\ue018 > \u0918;" // LETTER GHA
|
||||
"\ue019 > \u0919;" // LETTER NGA
|
||||
"\ue01a > \u091a;" // LETTER CA
|
||||
"\ue01b > \u091b;" // LETTER CHA
|
||||
"\ue01c > \u091c;" // LETTER JA
|
||||
"\ue01d > \u091d;" // LETTER JHA
|
||||
"\ue01e > \u091e;" // LETTER NYA
|
||||
"\ue01f > \u091f;" // LETTER TTA
|
||||
"\ue020 > \u0920;" // LETTER TTHA
|
||||
"\ue021 > \u0921;" // LETTER DDA
|
||||
"\ue022 > \u0922;" // LETTER DDHA
|
||||
"\ue023 > \u0923;" // LETTER NNA
|
||||
"\ue024 > \u0924;" // LETTER TA
|
||||
"\ue025 > \u0925;" // LETTER THA
|
||||
"\ue026 > \u0926;" // LETTER DA
|
||||
"\ue027 > \u0927;" // LETTER DHA
|
||||
"\ue028 > \u0928;" // LETTER NA
|
||||
"\ue029 > \u0929;" // LETTER NNNA
|
||||
"\ue02a > \u092a;" // LETTER PA
|
||||
"\ue02b > \u092b;" // LETTER PHA
|
||||
"\ue02c > \u092c;" // LETTER BA
|
||||
"\ue02d > \u092d;" // LETTER BHA
|
||||
"\ue02e > \u092e;" // LETTER MA
|
||||
"\ue02f > \u092f;" // LETTER YA
|
||||
"\ue030 > \u0930;" // LETTER RA
|
||||
//\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
|
||||
"\ue031 > \u0930;"
|
||||
"\ue032 > \u0932;" // LETTER LA
|
||||
"\ue033 > \u0933;" // LETTER LLA
|
||||
//\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
|
||||
"\ue034 > \u0933;"
|
||||
"\ue035 > \u0935;" // LETTER VA
|
||||
"\ue036 > \u0936;" // LETTER SHA
|
||||
"\ue037 > \u0937;" // LETTER SSA
|
||||
"\ue038 > \u0938;" // LETTER SA
|
||||
"\ue039 > \u0939;" // LETTER HA
|
||||
"\ue03c > \u093c;" // SIGN NUKTA
|
||||
"\ue03d > \u093d;" // SIGN AVAGRAHA
|
||||
"\ue03e > \u093e;" // VOWEL SIGN AA
|
||||
"\ue03f > \u093f;" // VOWEL SIGN I
|
||||
"\ue040 > \u0940;" // VOWEL SIGN II
|
||||
"\ue041 > \u0941;" // VOWEL SIGN U
|
||||
"\ue042 > \u0942;" // VOWEL SIGN UU
|
||||
"\ue043 > \u0943;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045 > \u0945;" // VOWEL SIGN CANDRA E
|
||||
//\ue046 > \u0946; # UNMAPPED VOWEL SIGN SHORT E
|
||||
"\ue046 > \u0947;"
|
||||
"\ue047 > \u0947;" // VOWEL SIGN E
|
||||
"\ue048 > \u0948;" // VOWEL SIGN AI
|
||||
"\ue049 > \u0949;" // VOWEL SIGN CANDRA O
|
||||
//\ue04a > \u094a; # UNMAPPED VOWEL SIGN SHORT O
|
||||
"\ue04a > \u094b;"
|
||||
"\ue04b > \u094b;" // VOWEL SIGN O
|
||||
"\ue04c > \u094c;" // VOWEL SIGN AU
|
||||
"\ue04d > \u094d;" // SIGN VIRAMA
|
||||
"\ue050 > \u0950;" // OM
|
||||
// \u0951 # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952 # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953 # UNMAPPED GRAVE ACCENT
|
||||
// \u0954 # UNMAPPED ACUTE ACCENT
|
||||
"\ue058 > \u0958;" // LETTER QA (For Urdu)
|
||||
"\ue059 > \u0959;" // LETTER KHHA (For Urdu)
|
||||
"\ue05a > \u095a;" // LETTER GHHA (For Urdu)
|
||||
"\ue05b > \u095b;" // LETTER ZA (For Urdu)
|
||||
"\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA)
|
||||
"\ue05d > \u095d;" // LETTER RHA (pronounced RRHA)
|
||||
"\ue05e > \u095e;" // LETTER FA
|
||||
"\ue05f > \u095f;" // LETTER YYA
|
||||
"\ue060 > \u0960;" // LETTER VOCALIC RR
|
||||
"\ue061 > \u0961;" // LETTER VOCALIC LL
|
||||
"\ue062 > \u0962;" // VOWEL SIGN VOCALIC L
|
||||
"\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL
|
||||
// > ; \u0964 # UNMAPPED Devanagari-InterIndic: DANDA
|
||||
// > ; \u0965 # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
|
||||
"\ue066 > \u0966;" // DIGIT ZERO
|
||||
"\ue067 > \u0967;" // DIGIT ONE
|
||||
"\ue068 > \u0968;" // DIGIT TWO
|
||||
"\ue069 > \u0969;" // DIGIT THREE
|
||||
"\ue06a > \u096a;" // DIGIT FOUR
|
||||
"\ue06b > \u096b;" // DIGIT FIVE
|
||||
"\ue06c > \u096c;" // DIGIT SIX
|
||||
"\ue06d > \u096d;" // DIGIT SEVEN
|
||||
"\ue06e > \u096e;" // DIGIT EIGHT
|
||||
"\ue06f > \u096f;" // DIGIT NINE
|
||||
// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
// :: NFC;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Gujarati.txt
Normal file
134
icu4c/data/translit_InterIndic_Gujarati.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Gujarati.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Gujarati
|
||||
|
||||
translit_InterIndic_Gujarati {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gujarati.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Gujarati
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:59 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Gujarati
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0a81;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0a82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0a83;" // SIGN VISARGA
|
||||
"\ue005>\u0a85;" // LETTER A
|
||||
"\ue006>\u0a86;" // LETTER AA
|
||||
"\ue007>\u0a87;" // LETTER I
|
||||
"\ue008>\u0a88;" // LETTER II
|
||||
"\ue009>\u0a89;" // LETTER U
|
||||
"\ue00a>\u0a8a;" // LETTER UU
|
||||
"\ue00b>\u0a8b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0a8c>\u0ab2\u0ac3 = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R
|
||||
// \ue00f>; # UNMAPPED InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
|
||||
"\ue010>\u0a90;" // LETTER AI
|
||||
// \ue013>; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
|
||||
"\ue014>\u0a94;" // LETTER AU
|
||||
"\ue015>\u0a95;" // LETTER KA
|
||||
"\ue016>\u0a96;" // LETTER KHA
|
||||
"\ue017>\u0a97;" // LETTER GA
|
||||
"\ue018>\u0a98;" // LETTER GHA
|
||||
"\ue019>\u0a99;" // LETTER NGA
|
||||
"\ue01a>\u0a9a;" // LETTER CA
|
||||
"\ue01b>\u0a9b;" // LETTER CHA
|
||||
"\ue01c>\u0a9c;" // LETTER JA
|
||||
"\ue01d>\u0a9d;" // LETTER JHA
|
||||
"\ue01e>\u0a9e;" // LETTER NYA
|
||||
"\ue01f>\u0a9f;" // LETTER TTA
|
||||
"\ue020>\u0aa0;" // LETTER TTHA
|
||||
"\ue021>\u0aa1;" // LETTER DDA
|
||||
"\ue022>\u0aa2;" // LETTER DDHA
|
||||
"\ue023>\u0aa3;" // LETTER NNA
|
||||
"\ue024>\u0aa4;" // LETTER TA
|
||||
"\ue025>\u0aa5;" // LETTER THA
|
||||
"\ue026>\u0aa6;" // LETTER DA
|
||||
"\ue027>\u0aa7;" // LETTER DHA
|
||||
"\ue028>\u0aa8;" // LETTER NA
|
||||
"\ue029>\u0aa8;" // REMAP (indicExceptions.txt): \u0aa9>\u0aa8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0aaa;" // LETTER PA
|
||||
"\ue02b>\u0aab;" // LETTER PHA
|
||||
"\ue02c>\u0aac;" // LETTER BA
|
||||
"\ue02d>\u0aad;" // LETTER BHA
|
||||
"\ue02e>\u0aae;" // LETTER MA
|
||||
"\ue02f>\u0aaf;" // LETTER YA
|
||||
"\ue030>\u0ab0;" // LETTER RA
|
||||
"\ue032>\u0ab2;" // LETTER LA
|
||||
"\ue033>\u0ab3;" // LETTER LLA
|
||||
"\ue034>\u0ab3;" // REMAP (indicExceptions.txt): \u0ab4>\u0ab3 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0ab5;" // LETTER VA
|
||||
"\ue036>\u0ab6;" // LETTER SHA
|
||||
"\ue037>\u0ab7;" // LETTER SSA
|
||||
"\ue038>\u0ab8;" // LETTER SA
|
||||
"\ue039>\u0ab9;" // LETTER HA
|
||||
"\ue03c>\u0abc;" // SIGN NUKTA
|
||||
"\ue03d>\u0abd;" // SIGN AVAGRAHA
|
||||
"\ue03e>\u0abe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0abf;" // VOWEL SIGN I
|
||||
"\ue040>\u0ac0;" // VOWEL SIGN II
|
||||
"\ue041>\u0ac1;" // VOWEL SIGN U
|
||||
"\ue042>\u0ac2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E
|
||||
// \ue047>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
|
||||
"\ue048>\u0ac8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O
|
||||
// \ue04b>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
|
||||
"\ue04c>\u0acc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0acd;" // SIGN VIRAMA
|
||||
"\ue050>\u0ad0;" // OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
|
||||
"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
|
||||
"\ue060>\u0ae0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0ae1>\u0ab2\u0ac3 = LETTER VOCALIC LL>LETTER LA.VOWEL SIGN VOCALIC R
|
||||
"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
"\ue066>\u0ae6;" // DIGIT ZERO
|
||||
"\ue067>\u0ae7;" // DIGIT ONE
|
||||
"\ue068>\u0ae8;" // DIGIT TWO
|
||||
"\ue069>\u0ae9;" // DIGIT THREE
|
||||
"\ue06a>\u0aea;" // DIGIT FOUR
|
||||
"\ue06b>\u0aeb;" // DIGIT FIVE
|
||||
"\ue06c>\u0aec;" // DIGIT SIX
|
||||
"\ue06d>\u0aed;" // DIGIT SEVEN
|
||||
"\ue06e>\u0aee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0aef;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
|
||||
"\ue00f>\u0a8f;" // LETTER E
|
||||
"\ue013>\u0a93;" // LETTER O
|
||||
// \ue083>; # UNMAPPED InterIndic-Gujarati: LETTER RRA (\u0a83 = SIGN VISARGA)
|
||||
"\ue047>\u0ac7;" // VOWEL SIGN E
|
||||
"\ue04b>\u0acb;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Gurmukhi.txt
Normal file
134
icu4c/data/translit_InterIndic_Gurmukhi.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Gurmukhi.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Gurmukhi
|
||||
|
||||
translit_InterIndic_Gurmukhi {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gurmukhi.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Gurmukhi
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:00 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Gurmukhi
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0a02;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
|
||||
// \ue002>; # UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (\u0a02 = SIGN BINDI)
|
||||
// \ue003>; # UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA
|
||||
"\ue005>\u0a05;" // LETTER A
|
||||
"\ue006>\u0a06;" // LETTER AA
|
||||
"\ue007>\u0a07;" // LETTER I
|
||||
"\ue008>\u0a08;" // LETTER II
|
||||
"\ue009>\u0a09;" // LETTER U
|
||||
"\ue00a>\u0a0a;" // LETTER UU
|
||||
"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
"\ue00c>\u0a07;" // REMAP (indicExceptions.txt): \u0a0c>\u0a07 = LETTER VOCALIC L>LETTER I
|
||||
"\ue00f>\u0a0f;" // LETTER EE
|
||||
"\ue010>\u0a10;" // LETTER AI
|
||||
"\ue013>\u0a13;" // LETTER OO
|
||||
"\ue014>\u0a14;" // LETTER AU
|
||||
"\ue015>\u0a15;" // LETTER KA
|
||||
"\ue016>\u0a16;" // LETTER KHA
|
||||
"\ue017>\u0a17;" // LETTER GA
|
||||
"\ue018>\u0a18;" // LETTER GHA
|
||||
"\ue019>\u0a19;" // LETTER NGA
|
||||
"\ue01a>\u0a1a;" // LETTER CA
|
||||
"\ue01b>\u0a1b;" // LETTER CHA
|
||||
"\ue01c>\u0a1c;" // LETTER JA
|
||||
"\ue01d>\u0a1d;" // LETTER JHA
|
||||
"\ue01e>\u0a1e;" // LETTER NYA
|
||||
"\ue01f>\u0a1f;" // LETTER TTA
|
||||
"\ue020>\u0a20;" // LETTER TTHA
|
||||
"\ue021>\u0a21;" // LETTER DDA
|
||||
"\ue022>\u0a22;" // LETTER DDHA
|
||||
"\ue023>\u0a23;" // LETTER NNA
|
||||
"\ue024>\u0a24;" // LETTER TA
|
||||
"\ue025>\u0a25;" // LETTER THA
|
||||
"\ue026>\u0a26;" // LETTER DA
|
||||
"\ue027>\u0a27;" // LETTER DHA
|
||||
"\ue028>\u0a28;" // LETTER NA
|
||||
"\ue029>\u0a28;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0a2a;" // LETTER PA
|
||||
"\ue02b>\u0a2b;" // LETTER PHA
|
||||
"\ue02c>\u0a2c;" // LETTER BA
|
||||
"\ue02d>\u0a2d;" // LETTER BHA
|
||||
"\ue02e>\u0a2e;" // LETTER MA
|
||||
"\ue02f>\u0a2f;" // LETTER YA
|
||||
"\ue030>\u0a30;" // LETTER RA
|
||||
"\ue032>\u0a32;" // LETTER LA
|
||||
"\ue033>\u0a33;" // LETTER LLA
|
||||
"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0a35;" // LETTER VA
|
||||
"\ue036>\u0a36;" // LETTER SHA
|
||||
"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
|
||||
"\ue038>\u0a38;" // LETTER SA
|
||||
"\ue039>\u0a39;" // LETTER HA
|
||||
"\ue03c>\u0a3c;" // SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA
|
||||
"\ue03e>\u0a3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0a3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0a40;" // VOWEL SIGN II
|
||||
"\ue041>\u0a41;" // VOWEL SIGN U
|
||||
"\ue042>\u0a42;" // VOWEL SIGN UU
|
||||
// \ue043>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R
|
||||
// \ue044>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
|
||||
"\ue047>\u0a47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0a48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
|
||||
"\ue04b>\u0a4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0a4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0a4d;" // SIGN VIRAMA
|
||||
// \ue050>; # UNMAPPED InterIndic-Gurmukhi: OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Gurmukhi: LENGTH MARK
|
||||
"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0a59;" // LETTER KHHA
|
||||
"\ue05a>\u0a5a;" // LETTER GHHA
|
||||
"\ue05b>\u0a5b;" // LETTER ZA
|
||||
"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
"\ue05e>\u0a5e;" // LETTER FA
|
||||
"\ue05f>\u0a2f;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
"\ue061>\u0a08\u0a3c;" // REMAP (indicExceptions.txt): \u0a61>\u0a08\u0a3c = LETTER VOCALIC LL>LETTER II.SIGN NUKTA
|
||||
"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
"\ue066>\u0a66;" // DIGIT ZERO
|
||||
"\ue067>\u0a67;" // DIGIT ONE
|
||||
"\ue068>\u0a68;" // DIGIT TWO
|
||||
"\ue069>\u0a69;" // DIGIT THREE
|
||||
"\ue06a>\u0a6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0a6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0a6c;" // DIGIT SIX
|
||||
"\ue06d>\u0a6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0a6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0a6f;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Gurmukhi: ISSHAR
|
||||
// \ue081>; # UNMAPPED InterIndic-Gurmukhi: LETTER E
|
||||
// \ue082>; # UNMAPPED InterIndic-Gurmukhi: LETTER O (\u0a02 = SIGN BINDI)
|
||||
"\ue05c>\u0a5c;" // LETTER RRA
|
||||
// \ue084>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E
|
||||
// \ue085>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (\u0a05 = LETTER A)
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Kannada.txt
Normal file
134
icu4c/data/translit_InterIndic_Kannada.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Kannada.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Kannada
|
||||
|
||||
translit_InterIndic_Kannada {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Kannada.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Kannada
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:00 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Kannada
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
"\ue002>\u0c82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0c83;" // SIGN VISARGA
|
||||
"\ue005>\u0c85;" // LETTER A
|
||||
"\ue006>\u0c86;" // LETTER AA
|
||||
"\ue007>\u0c87;" // LETTER I
|
||||
"\ue008>\u0c88;" // LETTER II
|
||||
"\ue009>\u0c89;" // LETTER U
|
||||
"\ue00a>\u0c8a;" // LETTER UU
|
||||
"\ue00b>\u0c8b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0c8c;" // LETTER VOCALIC L
|
||||
"\ue00f>\u0c8f;" // LETTER EE
|
||||
"\ue010>\u0c90;" // LETTER AI
|
||||
"\ue013>\u0c93;" // LETTER OO
|
||||
"\ue014>\u0c94;" // LETTER AU
|
||||
"\ue015>\u0c95;" // LETTER KA
|
||||
"\ue016>\u0c96;" // LETTER KHA
|
||||
"\ue017>\u0c97;" // LETTER GA
|
||||
"\ue018>\u0c98;" // LETTER GHA
|
||||
"\ue019>\u0c99;" // LETTER NGA
|
||||
"\ue01a>\u0c9a;" // LETTER CA
|
||||
"\ue01b>\u0c9b;" // LETTER CHA
|
||||
"\ue01c>\u0c9c;" // LETTER JA
|
||||
"\ue01d>\u0c9d;" // LETTER JHA
|
||||
"\ue01e>\u0c9e;" // LETTER NYA
|
||||
"\ue01f>\u0c9f;" // LETTER TTA
|
||||
"\ue020>\u0ca0;" // LETTER TTHA
|
||||
"\ue021>\u0ca1;" // LETTER DDA
|
||||
"\ue022>\u0ca2;" // LETTER DDHA
|
||||
"\ue023>\u0ca3;" // LETTER NNA
|
||||
"\ue024>\u0ca4;" // LETTER TA
|
||||
"\ue025>\u0ca5;" // LETTER THA
|
||||
"\ue026>\u0ca6;" // LETTER DA
|
||||
"\ue027>\u0ca7;" // LETTER DHA
|
||||
"\ue028>\u0ca8;" // LETTER NA
|
||||
"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0caa;" // LETTER PA
|
||||
"\ue02b>\u0cab;" // LETTER PHA
|
||||
"\ue02c>\u0cac;" // LETTER BA
|
||||
"\ue02d>\u0cad;" // LETTER BHA
|
||||
"\ue02e>\u0cae;" // LETTER MA
|
||||
"\ue02f>\u0caf;" // LETTER YA
|
||||
"\ue030>\u0cb0;" // LETTER RA
|
||||
"\ue032>\u0cb2;" // LETTER LA
|
||||
"\ue033>\u0cb3;" // LETTER LLA
|
||||
"\ue034>\u0cb3;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0cb5;" // LETTER VA
|
||||
"\ue036>\u0cb6;" // LETTER SHA
|
||||
"\ue037>\u0cb7;" // LETTER SSA
|
||||
"\ue038>\u0cb8;" // LETTER SA
|
||||
"\ue039>\u0cb9;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Kannada: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA
|
||||
"\ue03e>\u0cbe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0cbf;" // VOWEL SIGN I
|
||||
"\ue040>\u0cc0;" // VOWEL SIGN II
|
||||
"\ue041>\u0cc1;" // VOWEL SIGN U
|
||||
"\ue042>\u0cc2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\ue047>\u0cc7;" // VOWEL SIGN EE
|
||||
"\ue048>\u0cc8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\ue04b>\u0ccb;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0ccc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0ccd;" // SIGN VIRAMA
|
||||
"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
|
||||
"\ue055>\u0cd5;" // LENGTH MARK
|
||||
"\ue056>\u0cd6;" // AI LENGTH MARK
|
||||
"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0cde;" // LETTER FA
|
||||
"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0ce0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0ce1;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
"\ue066>\u0ce6;" // DIGIT ZERO
|
||||
"\ue067>\u0ce7;" // DIGIT ONE
|
||||
"\ue068>\u0ce8;" // DIGIT TWO
|
||||
"\ue069>\u0ce9;" // DIGIT THREE
|
||||
"\ue06a>\u0cea;" // DIGIT FOUR
|
||||
"\ue06b>\u0ceb;" // DIGIT FIVE
|
||||
"\ue06c>\u0cec;" // DIGIT SIX
|
||||
"\ue06d>\u0ced;" // DIGIT SEVEN
|
||||
"\ue06e>\u0cee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0cef;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Kannada: ISSHAR
|
||||
"\ue00e>\u0c8e;" // LETTER E
|
||||
"\ue012>\u0c92;" // LETTER O
|
||||
"\ue031>\u0cb1;" // LETTER RRA
|
||||
"\ue046>\u0cc6;" // VOWEL SIGN E
|
||||
"\ue04a>\u0cca;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
385
icu4c/data/translit_InterIndic_Latin.txt
Normal file
385
icu4c/data/translit_InterIndic_Latin.txt
Normal file
@ -0,0 +1,385 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Latin
|
||||
|
||||
translit_InterIndic_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2001-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Latin
|
||||
// :: NFD (NFC) ;
|
||||
//\u0e00 reserved
|
||||
//consonants
|
||||
"$chandrabindu=\ue001;"
|
||||
"$anusvara=\ue002;"
|
||||
"$visarga=\ue003;"
|
||||
//\u0e004 reserved
|
||||
// w<vowel> represents the stand-alone form
|
||||
"$wa=\ue005;"
|
||||
"$waa=\ue006;"
|
||||
"$wi=\ue007;"
|
||||
"$wii=\ue008;"
|
||||
"$wu=\ue009;"
|
||||
"$wuu=\ue00a;"
|
||||
"$wr=\ue00b;"
|
||||
"$wl=\ue00c;"
|
||||
"$wce=\ue00d;" // LETTER CANDRA E
|
||||
"$wse=\ue00e;" // LETTER SHORT E
|
||||
"$we=\ue00f;" // \u090f LETTER E
|
||||
"$wai=\ue010;"
|
||||
"$wco=\ue011;" // LETTER CANDRA O
|
||||
"$wso=\ue012;" // LETTER SHORT O
|
||||
"$wo=\ue013;" // \u0913 LETTER O
|
||||
"$wau=\ue014;"
|
||||
"$ka=\ue015;"
|
||||
"$kha=\ue016;"
|
||||
"$ga=\ue017;"
|
||||
"$gha=\ue018;"
|
||||
"$nga=\ue019;"
|
||||
"$ca=\ue01a;"
|
||||
"$cha=\ue01b;"
|
||||
"$ja=\ue01c;"
|
||||
"$jha=\ue01d;"
|
||||
"$nya=\ue01e;"
|
||||
"$tta=\ue01f;"
|
||||
"$ttha=\ue020;"
|
||||
"$dda=\ue021;"
|
||||
"$ddha=\ue022;"
|
||||
"$nna=\ue023;"
|
||||
"$ta=\ue024;"
|
||||
"$tha=\ue025;"
|
||||
"$da=\ue026;"
|
||||
"$dha=\ue027;"
|
||||
"$na=\ue028;"
|
||||
"$ena=\ue029;" //compatibility
|
||||
"$pa=\ue02a;"
|
||||
"$pha=\ue02b;"
|
||||
"$ba=\ue02c;"
|
||||
"$bha=\ue02d;"
|
||||
"$ma=\ue02e;"
|
||||
"$ya=\ue02f;"
|
||||
"$ra=\ue030;"
|
||||
"$rra=\ue031;"
|
||||
"$la=\ue032;"
|
||||
"$lla=\ue033;"
|
||||
"$ela=\ue034;" //compatibility
|
||||
"$va=\ue035;"
|
||||
"$sha=\ue036;"
|
||||
"$ssa=\ue037;"
|
||||
"$sa=\ue038;"
|
||||
"$ha=\ue039;"
|
||||
//\u093a Reserved
|
||||
//\u093b Reserved
|
||||
"$nukta=\ue03c;"
|
||||
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
||||
// <vowel> represents the dependent form
|
||||
"$aa=\ue03e;"
|
||||
"$i=\ue03f;"
|
||||
"$ii=\ue040;"
|
||||
"$u=\ue041;"
|
||||
"$uu=\ue042;"
|
||||
"$rh=\ue043;"
|
||||
"$lh=\ue044;"
|
||||
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
||||
"$se=\ue046;" //VOWEL SIGN SHORT E
|
||||
"$e=\ue047;"
|
||||
"$ai=\ue048;"
|
||||
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
||||
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
||||
"$o=\ue04b;" // \u094b
|
||||
"$au=\ue04c;"
|
||||
"$virama=\ue04d;"
|
||||
// \u094e Reserved
|
||||
// \u094f Reserved
|
||||
//\u0950>\ue050; # OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"$lm = \ue055;"// Telugu Length Mark
|
||||
"$ailm=\ue056;"// AI Length Mark
|
||||
"$aulm=\ue057;"// AU Length Mark
|
||||
//urdu compatibity forms
|
||||
"$uka=\ue058;"
|
||||
"$ukha=\ue059;"
|
||||
"$ugha=\ue05a;"
|
||||
"$ujha=\ue05b;"
|
||||
"$uddha=\ue05c;"
|
||||
"$udha=\ue05d;"
|
||||
"$ufa=\ue05e;"
|
||||
"$uya=\ue05f;"
|
||||
"$wrr=\ue060;"
|
||||
"$wll=\ue061;"
|
||||
"$rrh=\ue062;"
|
||||
"$llh=\ue063;"
|
||||
"$danda=\ue064;"
|
||||
"$doubleDanda=\ue065;"
|
||||
"$zero=\ue066;" // DIGIT ZERO
|
||||
"$one=\ue067;" // DIGIT ONE
|
||||
"$two=\ue068;" // DIGIT TWO
|
||||
"$three=\ue069;" // DIGIT THREE
|
||||
"$four=\ue06a;" // DIGIT FOUR
|
||||
"$five=\ue06b;" // DIGIT FIVE
|
||||
"$six=\ue06c;" // DIGIT SIX
|
||||
"$seven=\ue06d;" // DIGIT SEVEN
|
||||
"$eight=\ue06e;" // DIGIT EIGHT
|
||||
"$nine=\ue06f;" // DIGIT NINE
|
||||
// For all other scripts
|
||||
"$ecp0=\ue070;"
|
||||
"$ecp1=\ue071;"
|
||||
"$ecp2=\ue072;"
|
||||
"$ecp3=\ue073;"
|
||||
"$ecp4=\ue074;"
|
||||
"$ecp5=\ue075;"
|
||||
"$ecp6=\ue076;"
|
||||
"$ecp7=\ue077;"
|
||||
"$ecp8=\ue078;"
|
||||
"$ecp9=\ue079;"
|
||||
"$ecpA=\ue07a;"
|
||||
"$ecpB=\ue07b;"
|
||||
"$ecpC=\ue07c;"
|
||||
"$ecpD=\ue07d;"
|
||||
"$ecpE=\ue07e;"
|
||||
"$ecpF=\ue07f;"
|
||||
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
||||
"$depVowelBelow=[\ue041-\ue044];"
|
||||
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
|
||||
// $x was originally called '&'; $z was '%'
|
||||
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
||||
"$z=[bcdfghjklmnpqrstvwxyz];"
|
||||
"$consonants=[$ka-$ha $virama];"
|
||||
//#####################################################################
|
||||
// convert from Native letters to Latin letters
|
||||
//#####################################################################
|
||||
//transliterations for anusvara
|
||||
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
|
||||
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
|
||||
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
|
||||
"$anusvara} [$ta$tha$da$dha$na] > n ;"
|
||||
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
|
||||
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
|
||||
"$anusvara>'-'m\u0307;"
|
||||
// normal consonants
|
||||
"$cha}$x>ch;"
|
||||
"$cha>cha;"
|
||||
"$ca$virama}$ha>c'';"
|
||||
"$ca}$x>c;"
|
||||
"$ca>ca;"
|
||||
"$jha}$x>jh;"
|
||||
"$jha>jha;"
|
||||
"$ja$virama}$ha>j'';"
|
||||
"$ja}$x>j;"
|
||||
"$ja>ja;"
|
||||
//$nya}$x>ny;
|
||||
//$nya>nya;
|
||||
"$nya }$x>n\u0303 ;"
|
||||
"$nya > n\u0303a ;"
|
||||
"$ttha}$x>t\u0323h;"
|
||||
"$tta$virama}$ha>t\u0323'';"
|
||||
"$tta}$x>t\u0323;"
|
||||
"$ddha}$x>d\u0323h;"
|
||||
"$dda}$x$ha>d\u0323'';"
|
||||
"$dda}$x>d\u0323;"
|
||||
"$dha}$x>dh;"
|
||||
"$da$virama}$ha>d'';"
|
||||
"$da$virama}$ddha>d'';"
|
||||
"$da$virama}$dda>d'';"
|
||||
"$da$virama}$dha>d'';"
|
||||
//$da$virama}$da>dda;
|
||||
"$da}$x>d;"
|
||||
"$tha}$x>th;"
|
||||
"$ta$virama}$ha>t'';"
|
||||
"$ta$virama}$ttha>t'';"
|
||||
"$ta$virama}$tta>t'';"
|
||||
"$ta$virama}$tha>t'';"
|
||||
"$tta>t\u0323a;"
|
||||
"$ttha>t\u0323ha;"
|
||||
//$ta$virama}$ta>tta;
|
||||
"$ta}$x>t;"
|
||||
"$tha>tha;"
|
||||
"$ta>ta;"
|
||||
"$dda>d\u0323a;"
|
||||
"$dha>dha;"
|
||||
"$ddha>d\u0323ha;"
|
||||
"$da>da;"
|
||||
"$nna}$x>n\u0323 ;"
|
||||
"$nna>n\u0323a ;"
|
||||
"$na$virama}$ga>n'';"
|
||||
"$na$virama}$ya>n'';"
|
||||
"$na}$x>n;"
|
||||
"$na>na;"
|
||||
"$kha}$x>kh;"
|
||||
"$kha>kha;"
|
||||
"$ka$virama}$ha>k'';"
|
||||
"$ka}$x>k;"
|
||||
"$ka>ka;"
|
||||
"$gha}$x>gh;"
|
||||
"$gha>gha;"
|
||||
"$ga$virama}$ha>g'';"
|
||||
"$ga}$x>g;"
|
||||
"$ga>ga;"
|
||||
//ng<$nga}$x;
|
||||
//nga<$nga;
|
||||
"$nga}$x>n\u0307;"
|
||||
"$nga>n\u0307a ;"
|
||||
"$pha}$x>ph;"
|
||||
"$pha>pha;"
|
||||
"$pa$virama}$ha>p'';"
|
||||
"$pa}$x>p;"
|
||||
"$pa>pa;"
|
||||
"$bha}$x>bh;"
|
||||
"$bha>bha;"
|
||||
"$ba$virama}$ha>b'';"
|
||||
"$ba}$x>b;"
|
||||
"$ba>ba;"
|
||||
"$ma$virama}$ma>m'';"
|
||||
//$ma$virama}$anusvara>m'';
|
||||
"$ma}$x>m;"
|
||||
"$ma>ma;"
|
||||
"$ya}$x>y;"
|
||||
"$ya>ya;"
|
||||
"$ra$virama}$ha>r'';"
|
||||
"$ra}$x>r;"
|
||||
"$ra>ra;"
|
||||
"$la$virama}$ha>l'';"
|
||||
"$la}$x>l;"
|
||||
"$la>la;"
|
||||
"$lla$virama}$ha>l\u0323'';"
|
||||
"$lla}$x>l\u0323;"
|
||||
"$lla>l\u0323a;"
|
||||
"$va}$x>v;"
|
||||
"$va>va;"
|
||||
"$sha}$x>s\u0301;"
|
||||
"$ssa}$x>s\u0323;"
|
||||
"$sa$virama}$ha>s'';"
|
||||
"$sa$virama}$sha>s'';"
|
||||
"$sa$virama}$ssa>s'';"
|
||||
"$sa$virama}$sa>s'';"
|
||||
"$sa}$x>s;"
|
||||
"$sha>s\u0301a;"
|
||||
"$ssa>s\u0323a;"
|
||||
"$sa>sa;"
|
||||
"$ha}$x>h;"
|
||||
"$ha>ha;"
|
||||
// Urdu compatibility
|
||||
"$uya}$x > y\u0307 ;"
|
||||
"$uya > y\u0307a ;"
|
||||
"$ela}$x > l\u0331 ;"
|
||||
"$ela > l\u0331a ;"
|
||||
"$ena}$x > n\u0331 ;"
|
||||
"$ena > n\u0331a ;"
|
||||
"$uka}$x > q ;"
|
||||
"$uka > qa ;"
|
||||
"$ukha}$x > k\u0323 ;"
|
||||
"$ukha > k\u0323a ;"
|
||||
"$ugha}$x > g\u0307 ;"
|
||||
"$ugha > g\u0307a ;"
|
||||
"$ujha}$x > z ;"
|
||||
"$ujha > za ;"
|
||||
"$udha}$x > r\u0323h ;"
|
||||
"$udha > r\u0323ha;"
|
||||
"$uddha}$x> r\u0323 ;"
|
||||
"$uddha > r\u0323a ;"
|
||||
"$ufa}$x > f\u0323 ;"
|
||||
"$ufa > f\u0323a ;"
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
"$aa > a\u0304 ;"
|
||||
"$ai > ai ;"
|
||||
"$au > au ;"
|
||||
"$ii > i\u0304 ;"
|
||||
"$i > i ;"
|
||||
"$uu > u\u0304 ;"
|
||||
"$u > u ;"
|
||||
"$rrh > r\u0325\u0304 ;"
|
||||
"$rh}$consonants>r\u0325;"
|
||||
"$rh > r\u0325a ;"
|
||||
"$llh > l\u0325\u0304 ;"
|
||||
"$lh > l\u0325 ;"
|
||||
"$e > e\u0304 ;"
|
||||
"$o > o\u0304 ;"
|
||||
//extra vowels
|
||||
"$ce > e\u0306 ;"
|
||||
"$co > o\u0306 ;"
|
||||
"$se > e ;"
|
||||
"$so > o ;"
|
||||
// independent vowels (when following consonants)
|
||||
"a}$waa > ''a\u0304 ;"
|
||||
"$z}$waa > ''a\u0304 ;"
|
||||
"a}$wai > ''ai ;"
|
||||
"$z}$wai > ''ai ;"
|
||||
"a}$wau > ''au ;"
|
||||
"$z}$wau > ''au ;"
|
||||
"a}$wii > ''i\u0304 ;"
|
||||
"$z}$wii > ''i\u0304 ;"
|
||||
"a}$wi > ''i ;"
|
||||
"$z}$wi > ''i ;"
|
||||
"a}$wuu > ''u\u0304 ;"
|
||||
"$z}$wuu > ''u\u0304 ;"
|
||||
"a}$wu > ''u ;"
|
||||
"$z}$wu > ''u ;"
|
||||
"$z}$wrr > ''r\u0325\u0304 ;"
|
||||
"$z}$wr > ''r\u0325 ;"
|
||||
"$z}$wll > ''l\u0325\u0304 ;"
|
||||
"$z}$wl > ''l\u0325 ;"
|
||||
"$z}$we > ''e\u0304 ;"
|
||||
"$z}$wo > ''o\u0304 ;"
|
||||
"a}$wa > ''a ;"
|
||||
"$z}$wa > ''a ;"
|
||||
//extra vowels
|
||||
"$z}$wce > ''e\u0306 ;"
|
||||
"$z}$wco > ''o\u0306 ;"
|
||||
"$z}$wse > ''e ;"
|
||||
"$z}$wso > ''o ;"
|
||||
// independent vowels (otherwise)
|
||||
"$waa > a\u0304 ;"
|
||||
"$wai > ai ;"
|
||||
"$wau > au ;"
|
||||
"$wii > i\u0304 ;"
|
||||
"$wi > i ;"
|
||||
"$wuu > u\u0304 ;"
|
||||
"$wu > u ;"
|
||||
"$wrr > r\u0325\u0304 ;"
|
||||
"$wr > r\u0325 ;"
|
||||
"$wll > l\u0325\u0304 ;"
|
||||
"$wl > l\u0325 ;"
|
||||
"$we > e\u0304 ;"
|
||||
"$wo > o\u0304 ;"
|
||||
"$wa > a ;"
|
||||
//extra vowels
|
||||
"$wce > e\u0306 ;"
|
||||
"$wco > o\u0306 ;"
|
||||
"$wse > e ;"
|
||||
"$wso > o ;"
|
||||
//stress marks
|
||||
"$avagraha > \u0315;"
|
||||
"$chandrabindu$anusvara>'-'\u0303;"
|
||||
"$chandrabindu > '-'m\u0310;"
|
||||
"$visarga>'-'h\u0323;"
|
||||
//numbers
|
||||
"$zero > 0;"
|
||||
"$one > 1;"
|
||||
"$two > 2;"
|
||||
"$three > 3;"
|
||||
"$four > 4;"
|
||||
"$five > 5;"
|
||||
"$six > 6;"
|
||||
"$seven > 7;"
|
||||
"$eight > 8;"
|
||||
"$nine > 9;"
|
||||
// blow away any remaining viramas
|
||||
"$virama>;"
|
||||
// :: NFC;
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Malayalam.txt
Normal file
134
icu4c/data/translit_InterIndic_Malayalam.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Malayalam.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Malayalam
|
||||
|
||||
translit_InterIndic_Malayalam {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Malayalam.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Malayalam
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:00 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Malayalam
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
"\ue002>\u0d02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0d03;" // SIGN VISARGA
|
||||
"\ue005>\u0d05;" // LETTER A
|
||||
"\ue006>\u0d06;" // LETTER AA
|
||||
"\ue007>\u0d07;" // LETTER I
|
||||
"\ue008>\u0d08;" // LETTER II
|
||||
"\ue009>\u0d09;" // LETTER U
|
||||
"\ue00a>\u0d0a;" // LETTER UU
|
||||
"\ue00b>\u0d0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0d0c;" // LETTER VOCALIC L
|
||||
"\ue00f>\u0d0f;" // LETTER EE
|
||||
"\ue010>\u0d10;" // LETTER AI
|
||||
"\ue013>\u0d13;" // LETTER OO
|
||||
"\ue014>\u0d14;" // LETTER AU
|
||||
"\ue015>\u0d15;" // LETTER KA
|
||||
"\ue016>\u0d16;" // LETTER KHA
|
||||
"\ue017>\u0d17;" // LETTER GA
|
||||
"\ue018>\u0d18;" // LETTER GHA
|
||||
"\ue019>\u0d19;" // LETTER NGA
|
||||
"\ue01a>\u0d1a;" // LETTER CA
|
||||
"\ue01b>\u0d1b;" // LETTER CHA
|
||||
"\ue01c>\u0d1c;" // LETTER JA
|
||||
"\ue01d>\u0d1d;" // LETTER JHA
|
||||
"\ue01e>\u0d1e;" // LETTER NYA
|
||||
"\ue01f>\u0d1f;" // LETTER TTA
|
||||
"\ue020>\u0d20;" // LETTER TTHA
|
||||
"\ue021>\u0d21;" // LETTER DDA
|
||||
"\ue022>\u0d22;" // LETTER DDHA
|
||||
"\ue023>\u0d23;" // LETTER NNA
|
||||
"\ue024>\u0d24;" // LETTER TA
|
||||
"\ue025>\u0d25;" // LETTER THA
|
||||
"\ue026>\u0d26;" // LETTER DA
|
||||
"\ue027>\u0d27;" // LETTER DHA
|
||||
"\ue028>\u0d28;" // LETTER NA
|
||||
"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0d2a;" // LETTER PA
|
||||
"\ue02b>\u0d2b;" // LETTER PHA
|
||||
"\ue02c>\u0d2c;" // LETTER BA
|
||||
"\ue02d>\u0d2d;" // LETTER BHA
|
||||
"\ue02e>\u0d2e;" // LETTER MA
|
||||
"\ue02f>\u0d2f;" // LETTER YA
|
||||
"\ue030>\u0d30;" // LETTER RA
|
||||
"\ue032>\u0d32;" // LETTER LA
|
||||
"\ue033>\u0d33;" // LETTER LLA
|
||||
"\ue034>\u0d34;" // LETTER LLLA
|
||||
"\ue035>\u0d35;" // LETTER VA
|
||||
"\ue036>\u0d36;" // LETTER SHA
|
||||
"\ue037>\u0d37;" // LETTER SSA
|
||||
"\ue038>\u0d38;" // LETTER SA
|
||||
"\ue039>\u0d39;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Malayalam: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA
|
||||
"\ue03e>\u0d3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0d3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0d40;" // VOWEL SIGN II
|
||||
"\ue041>\u0d41;" // VOWEL SIGN U
|
||||
"\ue042>\u0d42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R
|
||||
// \ue044>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
"\ue047>\u0d47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0d48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
|
||||
"\ue04b>\u0d4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0d4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0d4d;" // SIGN VIRAMA
|
||||
// \ue050>; # UNMAPPED InterIndic-Malayalam: OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Malayalam: LENGTH MARK
|
||||
"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0d57;" // AU LENGTH MARK
|
||||
"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0d60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0d61;" // LETTER VOCALIC LL
|
||||
// \ue062>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L
|
||||
// \ue063>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL
|
||||
"\ue066>\u0d66;" // DIGIT ZERO
|
||||
"\ue067>\u0d67;" // DIGIT ONE
|
||||
"\ue068>\u0d68;" // DIGIT TWO
|
||||
"\ue069>\u0d69;" // DIGIT THREE
|
||||
"\ue06a>\u0d6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0d6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0d6c;" // DIGIT SIX
|
||||
"\ue06d>\u0d6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0d6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0d6f;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Malayalam: ISSHAR
|
||||
"\ue00e>\u0d0e;" // LETTER E
|
||||
"\ue012>\u0d12;" // LETTER O
|
||||
"\ue031>\u0d31;" // LETTER RRA
|
||||
"\ue046>\u0d46;" // VOWEL SIGN E
|
||||
"\ue04a>\u0d4a;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Oriya.txt
Normal file
134
icu4c/data/translit_InterIndic_Oriya.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Oriya.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Oriya
|
||||
|
||||
translit_InterIndic_Oriya {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Oriya.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Oriya
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:01 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Oriya
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0b01;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0b02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0b03;" // SIGN VISARGA
|
||||
"\ue005>\u0b05;" // LETTER A
|
||||
"\ue006>\u0b06;" // LETTER AA
|
||||
"\ue007>\u0b07;" // LETTER I
|
||||
"\ue008>\u0b08;" // LETTER II
|
||||
"\ue009>\u0b09;" // LETTER U
|
||||
"\ue00a>\u0b0a;" // LETTER UU
|
||||
"\ue00b>\u0b0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0b0c;" // LETTER VOCALIC L
|
||||
// \ue00f>; # UNMAPPED InterIndic-Oriya: LETTER EE (\u0b0f = LETTER E)
|
||||
"\ue010>\u0b10;" // LETTER AI
|
||||
// \ue013>; # UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O)
|
||||
"\ue014>\u0b14;" // LETTER AU
|
||||
"\ue015>\u0b15;" // LETTER KA
|
||||
"\ue016>\u0b16;" // LETTER KHA
|
||||
"\ue017>\u0b17;" // LETTER GA
|
||||
"\ue018>\u0b18;" // LETTER GHA
|
||||
"\ue019>\u0b19;" // LETTER NGA
|
||||
"\ue01a>\u0b1a;" // LETTER CA
|
||||
"\ue01b>\u0b1b;" // LETTER CHA
|
||||
"\ue01c>\u0b1c;" // LETTER JA
|
||||
"\ue01d>\u0b1d;" // LETTER JHA
|
||||
"\ue01e>\u0b1e;" // LETTER NYA
|
||||
"\ue01f>\u0b1f;" // LETTER TTA
|
||||
"\ue020>\u0b20;" // LETTER TTHA
|
||||
"\ue021>\u0b21;" // LETTER DDA
|
||||
"\ue022>\u0b22;" // LETTER DDHA
|
||||
"\ue023>\u0b23;" // LETTER NNA
|
||||
"\ue024>\u0b24;" // LETTER TA
|
||||
"\ue025>\u0b25;" // LETTER THA
|
||||
"\ue026>\u0b26;" // LETTER DA
|
||||
"\ue027>\u0b27;" // LETTER DHA
|
||||
"\ue028>\u0b28;" // LETTER NA
|
||||
"\ue029>\u0b28;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0b2a;" // LETTER PA
|
||||
"\ue02b>\u0b2b;" // LETTER PHA
|
||||
"\ue02c>\u0b2c;" // LETTER BA
|
||||
"\ue02d>\u0b2d;" // LETTER BHA
|
||||
"\ue02e>\u0b2e;" // LETTER MA
|
||||
"\ue02f>\u0b2f;" // LETTER YA
|
||||
"\ue030>\u0b30;" // LETTER RA
|
||||
"\ue032>\u0b32;" // LETTER LA
|
||||
"\ue033>\u0b33;" // LETTER LLA
|
||||
"\ue034>\u0b33;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0b2c;" // REMAP (indicExceptions.txt): \u0b35>\u0b2c = LETTER VA>LETTER BA
|
||||
"\ue036>\u0b36;" // LETTER SHA
|
||||
"\ue037>\u0b37;" // LETTER SSA
|
||||
"\ue038>\u0b38;" // LETTER SA
|
||||
"\ue039>\u0b39;" // LETTER HA
|
||||
"\ue03c>\u0b3c;" // SIGN NUKTA
|
||||
"\ue03d>\u0b3d;" // SIGN AVAGRAHA
|
||||
"\ue03e>\u0b3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0b3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0b40;" // VOWEL SIGN II
|
||||
"\ue041>\u0b41;" // VOWEL SIGN U
|
||||
"\ue042>\u0b42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0b43\u0b3c;" // REMAP (indicExceptions.txt): \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
|
||||
"\ue045>\u0b47;" // REMAP (indicExceptions.txt): \u0b45>\u0b47 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
// \ue047>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (\u0b47 = VOWEL SIGN E)
|
||||
"\ue048>\u0b48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0b4b;" // REMAP (indicExceptions.txt): \u0b49>\u0b4b = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
// \ue04b>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (\u0b4b = VOWEL SIGN O)
|
||||
"\ue04c>\u0b4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0b4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0b13\u0b01;" // REMAP (indicExceptions.txt): \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
|
||||
// \ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
|
||||
"\ue056>\u0b56;" // AI LENGTH MARK
|
||||
"\ue057>\u0b57;" // AU LENGTH MARK
|
||||
"\ue059>\u0b16\u0b3c;" // REMAP (indicExceptions.txt): \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
"\ue05a>\u0b17\u0b3c;" // REMAP (indicExceptions.txt): \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
"\ue05b>\u0b1c\u0b3c;" // REMAP (indicExceptions.txt): \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
"\ue05d>\u0b5d;" // LETTER RHA
|
||||
"\ue05e>\u0b2b\u0b3c;" // REMAP (indicExceptions.txt): \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
"\ue05f>\u0b5f;" // LETTER YYA
|
||||
"\ue060>\u0b60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0b61;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0b56\u0b3c;" // REMAP (indicExceptions.txt): \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
|
||||
"\ue063>\u0b57\u0b3c;" // REMAP (indicExceptions.txt): \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
|
||||
"\ue066>\u0b66;" // DIGIT ZERO
|
||||
"\ue067>\u0b67;" // DIGIT ONE
|
||||
"\ue068>\u0b68;" // DIGIT TWO
|
||||
"\ue069>\u0b69;" // DIGIT THREE
|
||||
"\ue06a>\u0b6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0b6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0b6c;" // DIGIT SIX
|
||||
"\ue06d>\u0b6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0b6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0b6f;" // DIGIT NINE
|
||||
"\ue070>\u0b70;" // ISSHAR
|
||||
"\ue00e>\u0b0f;" // LETTER E
|
||||
"\ue013>\u0b13;" // LETTER O
|
||||
"\ue031>\u0b5c;" // LETTER RRA
|
||||
"\ue047>\u0b47;" // VOWEL SIGN E
|
||||
"\ue04b>\u0b4b;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Tamil.txt
Normal file
134
icu4c/data/translit_InterIndic_Tamil.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Tamil.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Tamil
|
||||
|
||||
translit_InterIndic_Tamil {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Tamil.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Tamil
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:01 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Tamil
|
||||
//:: NFD (NFC) ;
|
||||
// \ue001>; # UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU
|
||||
"\ue002>\u0b82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0b83;" // SIGN VISARGA
|
||||
"\ue005>\u0b85;" // LETTER A
|
||||
"\ue006>\u0b86;" // LETTER AA
|
||||
"\ue007>\u0b87;" // LETTER I
|
||||
"\ue008>\u0b88;" // LETTER II
|
||||
"\ue009>\u0b89;" // LETTER U
|
||||
"\ue00a>\u0b8a;" // LETTER UU
|
||||
"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
"\ue00c>\u0b87;" // REMAP (indicExceptions.txt): \u0b8c>\u0b87 = LETTER VOCALIC L>LETTER I
|
||||
"\ue00f>\u0b8f;" // LETTER EE
|
||||
"\ue010>\u0b90;" // LETTER AI
|
||||
"\ue013>\u0b93;" // LETTER OO
|
||||
"\ue014>\u0b94;" // LETTER AU
|
||||
"\ue015>\u0b95;" // LETTER KA
|
||||
"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
|
||||
"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
|
||||
"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
|
||||
"\ue019>\u0b99;" // LETTER NGA
|
||||
"\ue01a>\u0b9a;" // LETTER CA
|
||||
"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
|
||||
"\ue01c>\u0b9c;" // LETTER JA
|
||||
"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
|
||||
"\ue01e>\u0b9e;" // LETTER NYA
|
||||
"\ue01f>\u0b9f;" // LETTER TTA
|
||||
"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
|
||||
"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
|
||||
"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
|
||||
"\ue023>\u0ba3;" // LETTER NNA
|
||||
"\ue024>\u0ba4;" // LETTER TA
|
||||
"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
|
||||
"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
|
||||
"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
|
||||
"\ue028>\u0ba8;" // LETTER NA
|
||||
"\ue029>\u0ba9;" // LETTER NNNA
|
||||
"\ue02a>\u0baa;" // LETTER PA
|
||||
"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
|
||||
"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
|
||||
"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
|
||||
"\ue02e>\u0bae;" // LETTER MA
|
||||
"\ue02f>\u0baf;" // LETTER YA
|
||||
"\ue030>\u0bb0;" // LETTER RA
|
||||
"\ue032>\u0bb2;" // LETTER LA
|
||||
"\ue033>\u0bb3;" // LETTER LLA
|
||||
"\ue034>\u0bb4;" // LETTER LLLA
|
||||
"\ue035>\u0bb5;" // LETTER VA
|
||||
"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
|
||||
"\ue037>\u0bb7;" // LETTER SSA
|
||||
"\ue038>\u0bb8;" // LETTER SA
|
||||
"\ue039>\u0bb9;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Tamil: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA
|
||||
"\ue03e>\u0bbe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0bbf;" // VOWEL SIGN I
|
||||
"\ue040>\u0bc0;" // VOWEL SIGN II
|
||||
"\ue041>\u0bc1;" // VOWEL SIGN U
|
||||
"\ue042>\u0bc2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
"\ue047>\u0bc7;" // VOWEL SIGN EE
|
||||
"\ue048>\u0bc8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
|
||||
"\ue04b>\u0bcb;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0bcc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0bcd;" // SIGN VIRAMA
|
||||
"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
|
||||
// \ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
|
||||
"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0bd7;" // AU LENGTH MARK
|
||||
"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
|
||||
"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
|
||||
"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
|
||||
"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
|
||||
"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
"\ue061>\u0b88;" // REMAP (indicExceptions.txt): \u0be1>\u0b88 = LETTER VOCALIC LL>LETTER II
|
||||
// \ue062>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L
|
||||
// \ue063>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL
|
||||
// \ue066>; # UNMAPPED InterIndic-Tamil: DIGIT ZERO
|
||||
"\ue067>\u0be7;" // DIGIT ONE
|
||||
"\ue068>\u0be8;" // DIGIT TWO
|
||||
"\ue069>\u0be9;" // DIGIT THREE
|
||||
"\ue06a>\u0bea;" // DIGIT FOUR
|
||||
"\ue06b>\u0beb;" // DIGIT FIVE
|
||||
"\ue06c>\u0bec;" // DIGIT SIX
|
||||
"\ue06d>\u0bed;" // DIGIT SEVEN
|
||||
"\ue06e>\u0bee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0bef;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR
|
||||
"\ue00e>\u0b8e;" // LETTER E
|
||||
"\ue012>\u0b92;" // LETTER O
|
||||
"\ue031>\u0bb1;" // LETTER RRA
|
||||
"\ue046>\u0bc6;" // VOWEL SIGN E
|
||||
"\ue04a>\u0bca;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/data/translit_InterIndic_Telugu.txt
Normal file
134
icu4c/data/translit_InterIndic_Telugu.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Telugu.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Telugu
|
||||
|
||||
translit_InterIndic_Telugu {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Telugu.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Telugu
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:01 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Telugu
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0c01;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0c02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0c03;" // SIGN VISARGA
|
||||
"\ue005>\u0c05;" // LETTER A
|
||||
"\ue006>\u0c06;" // LETTER AA
|
||||
"\ue007>\u0c07;" // LETTER I
|
||||
"\ue008>\u0c08;" // LETTER II
|
||||
"\ue009>\u0c09;" // LETTER U
|
||||
"\ue00a>\u0c0a;" // LETTER UU
|
||||
"\ue00b>\u0c0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0c0c;" // LETTER VOCALIC L
|
||||
"\ue00f>\u0c0f;" // LETTER EE
|
||||
"\ue010>\u0c10;" // LETTER AI
|
||||
"\ue013>\u0c13;" // LETTER OO
|
||||
"\ue014>\u0c14;" // LETTER AU
|
||||
"\ue015>\u0c15;" // LETTER KA
|
||||
"\ue016>\u0c16;" // LETTER KHA
|
||||
"\ue017>\u0c17;" // LETTER GA
|
||||
"\ue018>\u0c18;" // LETTER GHA
|
||||
"\ue019>\u0c19;" // LETTER NGA
|
||||
"\ue01a>\u0c1a;" // LETTER CA
|
||||
"\ue01b>\u0c1b;" // LETTER CHA
|
||||
"\ue01c>\u0c1c;" // LETTER JA
|
||||
"\ue01d>\u0c1d;" // LETTER JHA
|
||||
"\ue01e>\u0c1e;" // LETTER NYA
|
||||
"\ue01f>\u0c1f;" // LETTER TTA
|
||||
"\ue020>\u0c20;" // LETTER TTHA
|
||||
"\ue021>\u0c21;" // LETTER DDA
|
||||
"\ue022>\u0c22;" // LETTER DDHA
|
||||
"\ue023>\u0c23;" // LETTER NNA
|
||||
"\ue024>\u0c24;" // LETTER TA
|
||||
"\ue025>\u0c25;" // LETTER THA
|
||||
"\ue026>\u0c26;" // LETTER DA
|
||||
"\ue027>\u0c27;" // LETTER DHA
|
||||
"\ue028>\u0c28;" // LETTER NA
|
||||
"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0c2a;" // LETTER PA
|
||||
"\ue02b>\u0c2b;" // LETTER PHA
|
||||
"\ue02c>\u0c2c;" // LETTER BA
|
||||
"\ue02d>\u0c2d;" // LETTER BHA
|
||||
"\ue02e>\u0c2e;" // LETTER MA
|
||||
"\ue02f>\u0c2f;" // LETTER YA
|
||||
"\ue030>\u0c30;" // LETTER RA
|
||||
"\ue032>\u0c32;" // LETTER LA
|
||||
"\ue033>\u0c33;" // LETTER LLA
|
||||
"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0c35;" // LETTER VA
|
||||
"\ue036>\u0c36;" // LETTER SHA
|
||||
"\ue037>\u0c37;" // LETTER SSA
|
||||
"\ue038>\u0c38;" // LETTER SA
|
||||
"\ue039>\u0c39;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Telugu: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA
|
||||
"\ue03e>\u0c3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0c3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0c40;" // VOWEL SIGN II
|
||||
"\ue041>\u0c41;" // VOWEL SIGN U
|
||||
"\ue042>\u0c42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\ue047>\u0c47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0c48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\ue04b>\u0c4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0c4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0c4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
|
||||
"\ue055>\u0c55;" // LENGTH MARK
|
||||
"\ue056>\u0c56;" // AI LENGTH MARK
|
||||
"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0c60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0c61;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
"\ue066>\u0c66;" // DIGIT ZERO
|
||||
"\ue067>\u0c67;" // DIGIT ONE
|
||||
"\ue068>\u0c68;" // DIGIT TWO
|
||||
"\ue069>\u0c69;" // DIGIT THREE
|
||||
"\ue06a>\u0c6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0c6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0c6c;" // DIGIT SIX
|
||||
"\ue06d>\u0c6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0c6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0c6f;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Telugu: ISSHAR
|
||||
"\ue00e>\u0c0e;" // LETTER E
|
||||
"\ue012>\u0c12;" // LETTER O
|
||||
"\ue031>\u0c31;" // LETTER RRA
|
||||
"\ue046>\u0c46;" // VOWEL SIGN E
|
||||
"\ue04a>\u0c4a;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
117
icu4c/data/translit_Kannada_InterIndic.txt
Normal file
117
icu4c/data/translit_Kannada_InterIndic.txt
Normal file
@ -0,0 +1,117 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Kannada_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Kannada_InterIndic
|
||||
|
||||
translit_Kannada_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Kannada_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Kannada_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:05 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Kannada-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0c82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0c83>\ue003;" // SIGN VISARGA
|
||||
"\u0c85>\ue005;" // LETTER A
|
||||
"\u0c86>\ue006;" // LETTER AA
|
||||
"\u0c87>\ue007;" // LETTER I
|
||||
"\u0c88>\ue008;" // LETTER II
|
||||
"\u0c89>\ue009;" // LETTER U
|
||||
"\u0c8a>\ue00a;" // LETTER UU
|
||||
"\u0c8b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0c8c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0c8e>\ue00e;" // LETTER E
|
||||
"\u0c8f>\ue00f;" // LETTER EE
|
||||
"\u0c90>\ue010;" // LETTER AI
|
||||
"\u0c92>\ue012;" // LETTER O
|
||||
"\u0c93>\ue013;" // LETTER OO
|
||||
"\u0c94>\ue014;" // LETTER AU
|
||||
"\u0c95>\ue015;" // LETTER KA
|
||||
"\u0c96>\ue016;" // LETTER KHA
|
||||
"\u0c97>\ue017;" // LETTER GA
|
||||
"\u0c98>\ue018;" // LETTER GHA
|
||||
"\u0c99>\ue019;" // LETTER NGA
|
||||
"\u0c9a>\ue01a;" // LETTER CA
|
||||
"\u0c9b>\ue01b;" // LETTER CHA
|
||||
"\u0c9c>\ue01c;" // LETTER JA
|
||||
"\u0c9d>\ue01d;" // LETTER JHA
|
||||
"\u0c9e>\ue01e;" // LETTER NYA
|
||||
"\u0c9f>\ue01f;" // LETTER TTA
|
||||
"\u0ca0>\ue020;" // LETTER TTHA
|
||||
"\u0ca1>\ue021;" // LETTER DDA
|
||||
"\u0ca2>\ue022;" // LETTER DDHA
|
||||
"\u0ca3>\ue023;" // LETTER NNA
|
||||
"\u0ca4>\ue024;" // LETTER TA
|
||||
"\u0ca5>\ue025;" // LETTER THA
|
||||
"\u0ca6>\ue026;" // LETTER DA
|
||||
"\u0ca7>\ue027;" // LETTER DHA
|
||||
"\u0ca8>\ue028;" // LETTER NA
|
||||
"\u0caa>\ue02a;" // LETTER PA
|
||||
"\u0cab>\ue02b;" // LETTER PHA
|
||||
"\u0cac>\ue02c;" // LETTER BA
|
||||
"\u0cad>\ue02d;" // LETTER BHA
|
||||
"\u0cae>\ue02e;" // LETTER MA
|
||||
"\u0caf>\ue02f;" // LETTER YA
|
||||
"\u0cb0>\ue030;" // LETTER RA
|
||||
"\u0cb1>\ue031;" // LETTER RRA
|
||||
"\u0cb2>\ue032;" // LETTER LA
|
||||
"\u0cb3>\ue033;" // LETTER LLA
|
||||
"\u0cb5>\ue035;" // LETTER VA
|
||||
"\u0cb6>\ue036;" // LETTER SHA
|
||||
"\u0cb7>\ue037;" // LETTER SSA
|
||||
"\u0cb8>\ue038;" // LETTER SA
|
||||
"\u0cb9>\ue039;" // LETTER HA
|
||||
"\u0cbe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0cbf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0cc0>\ue040;" // VOWEL SIGN II
|
||||
"\u0cc1>\ue041;" // VOWEL SIGN U
|
||||
"\u0cc2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0cc3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0cc4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0cc6>\ue046;" // VOWEL SIGN E
|
||||
"\u0cc7>\ue047;" // VOWEL SIGN EE
|
||||
"\u0cc8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0cca>\ue04a;" // VOWEL SIGN O
|
||||
"\u0ccb>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0ccc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0ccd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0cd5>\ue055;" // LENGTH MARK
|
||||
"\u0cd6>\ue056;" // AI LENGTH MARK
|
||||
"\u0cde>\ue05e;" // LETTER FA
|
||||
"\u0ce0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0ce1>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0ce6>\ue066;" // DIGIT ZERO
|
||||
"\u0ce7>\ue067;" // DIGIT ONE
|
||||
"\u0ce8>\ue068;" // DIGIT TWO
|
||||
"\u0ce9>\ue069;" // DIGIT THREE
|
||||
"\u0cea>\ue06a;" // DIGIT FOUR
|
||||
"\u0ceb>\ue06b;" // DIGIT FIVE
|
||||
"\u0cec>\ue06c;" // DIGIT SIX
|
||||
"\u0ced>\ue06d;" // DIGIT SEVEN
|
||||
"\u0cee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0cef>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
319
icu4c/data/translit_Latin_InterIndic.txt
Normal file
319
icu4c/data/translit_Latin_InterIndic.txt
Normal file
@ -0,0 +1,319 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Latin_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_InterIndic
|
||||
|
||||
translit_Latin_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2001-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Latin-InterIndic
|
||||
//:: NFD;
|
||||
//\u0e00 reserved
|
||||
//consonants
|
||||
"$chandrabindu=\ue001;"
|
||||
"$anusvara=\ue002;"
|
||||
"$visarga=\ue003;"
|
||||
//\u0e004 reserved
|
||||
// w<vowel> represents the stand-alone form
|
||||
"$wa=\ue005;"
|
||||
"$waa=\ue006;"
|
||||
"$wi=\ue007;"
|
||||
"$wii=\ue008;"
|
||||
"$wu=\ue009;"
|
||||
"$wuu=\ue00a;"
|
||||
"$wr=\ue00b;"
|
||||
"$wl=\ue00c;"
|
||||
"$wce=\ue00d;" // LETTER CANDRA E
|
||||
"$wse=\ue00e;" // LETTER SHORT E
|
||||
"$we=\ue00f;" // \u090f LETTER E
|
||||
"$wai=\ue010;"
|
||||
"$wco=\ue011;" // LETTER CANDRA O
|
||||
"$wso=\ue012;" // LETTER SHORT O
|
||||
"$wo=\ue013;" // \u0913 LETTER O
|
||||
"$wau=\ue014;"
|
||||
"$ka=\ue015;"
|
||||
"$kha=\ue016;"
|
||||
"$ga=\ue017;"
|
||||
"$gha=\ue018;"
|
||||
"$nga=\ue019;"
|
||||
"$ca=\ue01a;"
|
||||
"$cha=\ue01b;"
|
||||
"$ja=\ue01c;"
|
||||
"$jha=\ue01d;"
|
||||
"$nya=\ue01e;"
|
||||
"$tta=\ue01f;"
|
||||
"$ttha=\ue020;"
|
||||
"$dda=\ue021;"
|
||||
"$ddha=\ue022;"
|
||||
"$nna=\ue023;"
|
||||
"$ta=\ue024;"
|
||||
"$tha=\ue025;"
|
||||
"$da=\ue026;"
|
||||
"$dha=\ue027;"
|
||||
"$na=\ue028;"
|
||||
"$ena=\ue029;" //compatibility
|
||||
"$pa=\ue02a;"
|
||||
"$pha=\ue02b;"
|
||||
"$ba=\ue02c;"
|
||||
"$bha=\ue02d;"
|
||||
"$ma=\ue02e;"
|
||||
"$ya=\ue02f;"
|
||||
"$ra=\ue030;"
|
||||
"$rra=\ue031;"
|
||||
"$la=\ue032;"
|
||||
"$lla=\ue033;"
|
||||
"$ela=\ue034;" //compatibility
|
||||
"$va=\ue035;"
|
||||
"$sha=\ue036;"
|
||||
"$ssa=\ue037;"
|
||||
"$sa=\ue038;"
|
||||
"$ha=\ue039;"
|
||||
//\u093a Reserved
|
||||
//\u093b Reserved
|
||||
"$nukta=\ue03c;"
|
||||
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
||||
// <vowel> represents the dependent form
|
||||
"$aa=\ue03e;"
|
||||
"$i=\ue03f;"
|
||||
"$ii=\ue040;"
|
||||
"$u=\ue041;"
|
||||
"$uu=\ue042;"
|
||||
"$rh=\ue043;"
|
||||
"$lh=\ue044;"
|
||||
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
||||
"$se=\ue046;" //VOWEL SIGN SHORT E
|
||||
"$e=\ue047;"
|
||||
"$ai=\ue048;"
|
||||
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
||||
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
||||
"$o=\ue04b;" // \u094b
|
||||
"$au=\ue04c;"
|
||||
"$virama=\ue04d;"
|
||||
// \u094e Reserved
|
||||
// \u094f Reserved
|
||||
//\u0950>\ue050; # OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"$lm = \ue055;"// Telugu Length Mark
|
||||
"$ailm=\ue056;"// AI Length Mark
|
||||
"$aulm=\ue057;"// AU Length Mark
|
||||
//urdu compatibity forms
|
||||
"$uka=\ue058;"
|
||||
"$ukha=\ue059;"
|
||||
"$ugha=\ue05a;"
|
||||
"$ujha=\ue05b;"
|
||||
"$uddha=\ue05c;"
|
||||
"$udha=\ue05d;"
|
||||
"$ufa=\ue05e;"
|
||||
"$uya=\ue05f;"
|
||||
"$wrr=\ue060;"
|
||||
"$wll=\ue061;"
|
||||
"$rrh=\ue062;"
|
||||
"$llh=\ue063;"
|
||||
"$danda=\ue064;"
|
||||
"$doubleDanda=\ue065;"
|
||||
"$zero=\ue066;" // DIGIT ZERO
|
||||
"$one=\ue067;" // DIGIT ONE
|
||||
"$two=\ue068;" // DIGIT TWO
|
||||
"$three=\ue069;" // DIGIT THREE
|
||||
"$four=\ue06a;" // DIGIT FOUR
|
||||
"$five=\ue06b;" // DIGIT FIVE
|
||||
"$six=\ue06c;" // DIGIT SIX
|
||||
"$seven=\ue06d;" // DIGIT SEVEN
|
||||
"$eight=\ue06e;" // DIGIT EIGHT
|
||||
"$nine=\ue06f;" // DIGIT NINE
|
||||
// For all other scripts
|
||||
"$ecp0=\ue070;"
|
||||
"$ecp1=\ue071;"
|
||||
"$ecp2=\ue072;"
|
||||
"$ecp3=\ue073;"
|
||||
"$ecp4=\ue074;"
|
||||
"$ecp5=\ue075;"
|
||||
"$ecp6=\ue076;"
|
||||
"$ecp7=\ue077;"
|
||||
"$ecp8=\ue078;"
|
||||
"$ecp9=\ue079;"
|
||||
"$ecpA=\ue07a;"
|
||||
"$ecpB=\ue07b;"
|
||||
"$ecpC=\ue07c;"
|
||||
"$ecpD=\ue07d;"
|
||||
"$ecpE=\ue07e;"
|
||||
"$ecpF=\ue07f;"
|
||||
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
||||
"$depVowelBelow=[\ue041-\ue044];"
|
||||
"$endThing=[$danda$doubleDanda];"
|
||||
// $x was originally called '&'; $z was '%'
|
||||
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
||||
"$z=[bcdfghjklmnpqrstvwxyz];"
|
||||
//DEBUG: $consonants=[$ka-$ha $virama];
|
||||
"\u0315 > $avagraha;"
|
||||
"'-'\u0303>$chandrabindu$anusvara;"
|
||||
"'-'m\u0310>$chandrabindu;"
|
||||
"'-'h\u0323>$visarga;"
|
||||
"mm>$anusvara;"
|
||||
"x>$visarga;"
|
||||
"aa>$waa;"
|
||||
"a\u0304>$waa;"
|
||||
"ai>$wai;"
|
||||
"au>$wau;"
|
||||
"ii>$wii;"
|
||||
"i\u0304>$wii;"
|
||||
"i>$wi;"
|
||||
"uu>$wuu;"
|
||||
"u\u0304>$wuu;"
|
||||
"u>$wu;"
|
||||
"rrh>$wrr;"
|
||||
"r\u0325\u0304>$wrr;"
|
||||
"rh>$wr;"
|
||||
"r\u0325>$wr;"
|
||||
"l\u0325\u0304>$wll;"
|
||||
"lh>$wl;l\u0325>$wl;"
|
||||
"e\u0304>$we;"
|
||||
"o\u0304>$wo;"
|
||||
"a>$wa;"
|
||||
"e\u0306>$wce;"
|
||||
"o\u0306>$wco;"
|
||||
"e>$wse;"
|
||||
"o>$wso;"
|
||||
"n}na > $na|$virama;"
|
||||
"n\u0307}[kg] > $anusvara;"
|
||||
"n\u0307}n\u0303 > $anusvara;"
|
||||
"n\u0304}[cj] > $anusvara;"
|
||||
"n\u0304}n\u0307 > $anusvara;"
|
||||
"n\u0323}[tdn]\u0323 > $anusvara;"
|
||||
"n}[tdn] > $anusvara;"
|
||||
"m}[pbm] > $anusvara;"
|
||||
"n} [yrlvsh] > $anusvara;"
|
||||
"'-'m\u0307 > $anusvara;"
|
||||
"y\u0307>$uya|$virama;"
|
||||
"l\u0331>$ela|$virama;"
|
||||
"n\u0331>$ena|$virama;"
|
||||
"n\u0307>$nga|$virama;"
|
||||
"n\u0303>$nya|$virama;"
|
||||
"n\u0323>$nna|$virama;"
|
||||
"t\u0323h>$ttha|$virama;"
|
||||
"t\u0323>$tta|$virama;"
|
||||
"r\u0323h>$udha|$virama;"
|
||||
"r\u0323>$uddha|$virama;"
|
||||
"d\u0323h>$ddha|$virama;"
|
||||
"d\u0323>$dda|$virama;"
|
||||
"kh>$kha|$virama;"
|
||||
"k>$ka|$virama;"
|
||||
"q>$ka|$virama;"
|
||||
"gh>$gha|$virama;"
|
||||
"g>$ga|$virama;"
|
||||
"ch>$cha|$virama;"
|
||||
"c>$ca|$virama;"
|
||||
"jh>$jha|$virama;"
|
||||
"j>$ja|$virama;"
|
||||
"ny>$nya|$virama;"
|
||||
"tth>$ttha|$virama;"
|
||||
"ddh>$ddha|$virama;"
|
||||
"th>$tha|$virama;"
|
||||
"t>$ta|$virama;"
|
||||
"dh>$dha|$virama;"
|
||||
"d>$da|$virama;"
|
||||
"n>$na|$virama;"
|
||||
"ph>$pha|$virama;"
|
||||
"p>$pa|$virama;"
|
||||
"bh>$bha|$virama;"
|
||||
"b>$ba|$virama;"
|
||||
"m>$ma|$virama;"
|
||||
"y>$ya|$virama;"
|
||||
"r>$ra|$virama;"
|
||||
"l\u0323a>$lla;"
|
||||
"l>$la|$virama;"
|
||||
"v>$va|$virama;"
|
||||
"f>$va|$virama;"
|
||||
"w>$va|$virama;"
|
||||
"sh>$sha|$virama;"
|
||||
"ss>$ssa|$virama;"
|
||||
"s\u0323>$ssa|$virama;"
|
||||
"s\u0301>$sha|$virama;"
|
||||
"s>$sa|$virama;"
|
||||
"z>$sa|$virama;"
|
||||
"h>$ha|$virama;"
|
||||
"'.'>$danda;"
|
||||
"$danda'.'>$doubleDanda;"
|
||||
"$depVowelAbove{'~'>$anusvara;"
|
||||
"$depVowelBelow{'~'>$chandrabindu;"
|
||||
"$virama aa>$aa;"
|
||||
"$virama a\u0304>$aa;"
|
||||
"$virama ai>$ai;"
|
||||
"$virama au>$au;"
|
||||
"$virama ii>$ii;"
|
||||
"$virama i\u0304>$ii;"
|
||||
"$virama i>$i;"
|
||||
"$virama uu>$uu;"
|
||||
"$virama u\u0304>$uu;"
|
||||
"$virama u>$u;"
|
||||
"$virama rrh>$rrh;"
|
||||
"$virama r\u0325\u0304>$rrh;"
|
||||
"$virama rh>$rh;"
|
||||
"$virama r\u0325a>$rh;"
|
||||
"$virama r\u0325>$rh;"
|
||||
"$virama l\u0325\u0304>$llh;"
|
||||
"$virama lh>$lh;"
|
||||
"$virama l\u0325>$lh;"
|
||||
"$virama e\u0304>$e;"
|
||||
"$virama o\u0304>$o;"
|
||||
"$virama a>;"
|
||||
"$virama e\u0306>$ce;"
|
||||
"$virama o\u0306>$co;"
|
||||
"$virama e>$se;"
|
||||
"$virama o>$so;"
|
||||
"$virama''aa>$waa;"
|
||||
"$virama''a\u0304>$waa;"
|
||||
"$virama''ai>$wai;"
|
||||
"$virama''au>$wau;"
|
||||
"$virama''ii>$wii;"
|
||||
"$virama''i\u0304>$wii;"
|
||||
"$virama''i>$wi;"
|
||||
"$virama''uu>$wuu;"
|
||||
"$virama''u\u0304>$wuu;"
|
||||
"$virama''u>$wu;"
|
||||
"$virama''rrh>$wrr;"
|
||||
"$virama''r\u0325\u0304>$wrr;"
|
||||
"$virama''rh>$wr;"
|
||||
"$virama''r\u0325>$wr;"
|
||||
"$virama''l\u0325\u0304>$wll;"
|
||||
"$virama''lh>$wl;"
|
||||
"$virama''l\u0325>$wl;"
|
||||
"$virama''e\u0304>$we;"
|
||||
"$virama''o\u0304>$wo;"
|
||||
"$virama''a>$wa;"
|
||||
"$virama''e\u0306>$wce;"
|
||||
"$virama''o\u0306>$wco;"
|
||||
"$virama''e>$wse;"
|
||||
"$virama''o>$wso;"
|
||||
"$virama } [$z] > $virama;"
|
||||
"$virama } ' ' > $virama ;"
|
||||
"$virama}$endThing>;"
|
||||
"0>$zero;"
|
||||
"1>$one;"
|
||||
"2>$two;"
|
||||
"3>$three;"
|
||||
"4>$four;"
|
||||
"5>$five;"
|
||||
"6>$six;"
|
||||
"7>$seven;"
|
||||
"8>$eight;"
|
||||
"9>$nine;"
|
||||
"''>;"
|
||||
//:: NFC (NFD) ;
|
||||
}
|
||||
}
|
528
icu4c/data/translit_Latin_Jamo.txt
Normal file
528
icu4c/data/translit_Latin_Jamo.txt
Normal file
@ -0,0 +1,528 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Latin_Jamo.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_Jamo
|
||||
|
||||
translit_Latin_Jamo {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Jamo
|
||||
|
||||
// Transliteration from Latin characters to Korean script is done in
|
||||
// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
|
||||
// transliteration is done algorithmically following Unicode 3.0
|
||||
// section 3.11. This file implements the Latin to Jamo
|
||||
// transliteration using rules.
|
||||
|
||||
// Jamo occupy the block 1100-11FF. Within this block there are three
|
||||
// groups of characters: initial consonants or choseong (I), medial
|
||||
// vowels or jungseong (M), and trailing consonants or jongseong (F).
|
||||
// Standard Korean syllables are of the form I+M+F*.
|
||||
|
||||
// Section 3.11 describes the use of 'filler' jamo to convert
|
||||
// nonstandard syllables to standard form: the choseong filler 115F and
|
||||
// the junseong filler 1160. In this transliterator, we will not use
|
||||
// 115F or 1160.
|
||||
|
||||
// We will, however, insert two 'null' jamo to make foreign words
|
||||
// conform to Korean syllable structure. These are the null initial
|
||||
// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
|
||||
// we will use the hyphen in order to disambiguate strings,
|
||||
// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
|
||||
|
||||
// We will not use all of the characters in the jamo block. We will
|
||||
// only use the 19 initials, 21 medials, and 27 finals possessing a
|
||||
// jamo short name as defined in section 4.4 of the Unicode book.
|
||||
|
||||
// Rules of thumb. These guidelines provide the basic framework
|
||||
// for the rules. They are phrased in terms of Latin-Jamo transliteration.
|
||||
// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
|
||||
// just context-free transliteration of jamo to corresponding short names,
|
||||
// with the addition of hyphens to maintain round-trip integrity
|
||||
// in the context of the Latin-Jamo rules.
|
||||
|
||||
// A sequence of vowels:
|
||||
// - Take the longest sequence you can. If there are too many, or you don't
|
||||
// have a starting consonant, introduce a 110B necessary.
|
||||
|
||||
// A sequence of consonants.
|
||||
// - First join the double consonants: G + G -> GG
|
||||
// - In the remaining list,
|
||||
// -- If there is no preceding vowel, take the first consonant, and insert EU
|
||||
// after it. Continue with the rest of the consonants.
|
||||
// -- If there is one consonant, attach to the following vowel
|
||||
// -- If there are two consonants and a following vowel, attach one to the
|
||||
// preceeding vowel, and one to the following vowel.
|
||||
// -- If there are more than two consonants, join the first two together if you
|
||||
// can: L + G => LG
|
||||
// -- If you still end up with more than 2 consonants, insert EU after the
|
||||
// first one, and continue with the rest of the consonants.
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Variables
|
||||
|
||||
// Some latin consonants or consonant pairs only occur as initials, and
|
||||
// some only as finals, but some occur as both. This makes some jamo
|
||||
// consonants ambiguous when transliterated into latin.
|
||||
// Initial only: IEUNG BB DD JJ R
|
||||
// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
|
||||
// Initial and Final: B C D G GG H J K M N P S SS T
|
||||
|
||||
"$Gi = \u1100;"
|
||||
"$GGi = \u1101;"
|
||||
"$Ni = \u1102;"
|
||||
"$Di = \u1103;"
|
||||
"$DD = \u1104;"
|
||||
"$R = \u1105;"
|
||||
"$Mi = \u1106;"
|
||||
"$Bi = \u1107;"
|
||||
"$BB = \u1108;"
|
||||
"$Si = \u1109;"
|
||||
"$SSi = \u110A;"
|
||||
"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
|
||||
"$Ji = \u110C;"
|
||||
"$JJ = \u110D;"
|
||||
"$Ci = \u110E;"
|
||||
"$Ki = \u110F;"
|
||||
"$Ti = \u1110;"
|
||||
"$Pi = \u1111;"
|
||||
"$Hi = \u1112;"
|
||||
|
||||
"$A = \u1161;"
|
||||
"$AE = \u1162;"
|
||||
"$YA = \u1163;"
|
||||
"$YAE = \u1164;"
|
||||
"$EO = \u1165;"
|
||||
"$E = \u1166;"
|
||||
"$YEO = \u1167;"
|
||||
"$YE = \u1168;"
|
||||
"$O = \u1169;"
|
||||
"$WA = \u116A;"
|
||||
"$WAE = \u116B;"
|
||||
"$OE = \u116C;"
|
||||
"$YO = \u116D;"
|
||||
"$U = \u116E;"
|
||||
"$WEO = \u116F;"
|
||||
"$WE = \u1170;"
|
||||
"$WI = \u1171;"
|
||||
"$YU = \u1172;"
|
||||
"$EU = \u1173;" // null medial, inserted during Latin-Jamo
|
||||
"$YI = \u1174;"
|
||||
"$I = \u1175;"
|
||||
|
||||
"$Gf = \u11A8;"
|
||||
"$GGf = \u11A9;"
|
||||
"$GS = \u11AA;"
|
||||
"$Nf = \u11AB;"
|
||||
"$NJ = \u11AC;"
|
||||
"$NH = \u11AD;"
|
||||
"$Df = \u11AE;"
|
||||
"$L = \u11AF;"
|
||||
"$LG = \u11B0;"
|
||||
"$LM = \u11B1;"
|
||||
"$LB = \u11B2;"
|
||||
"$LS = \u11B3;"
|
||||
"$LT = \u11B4;"
|
||||
"$LP = \u11B5;"
|
||||
"$LH = \u11B6;"
|
||||
"$Mf = \u11B7;"
|
||||
"$Bf = \u11B8;"
|
||||
"$BS = \u11B9;"
|
||||
"$Sf = \u11BA;"
|
||||
"$SSf = \u11BB;"
|
||||
"$NG = \u11BC;"
|
||||
"$Jf = \u11BD;"
|
||||
"$Cf = \u11BE;"
|
||||
"$Kf = \u11BF;"
|
||||
"$Tf = \u11C0;"
|
||||
"$Pf = \u11C1;"
|
||||
"$Hf = \u11C2;"
|
||||
|
||||
"$jamoInitial = [\u1100-\u1112];"
|
||||
|
||||
"$jamoMedial = [\u1161-\u1175];"
|
||||
|
||||
"$latinInitial = [bcdghjkmnprst];"
|
||||
|
||||
// Any character in the latin transliteration of a medial
|
||||
"$latinMedial = [aeiouwy];"
|
||||
|
||||
// The last character of the latin transliteration of a medial
|
||||
"$latinMedialEnd = [aeiou];"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Jamo-Latin
|
||||
|
||||
// Jamo to latin is relatively simple, since it is the latin that is
|
||||
// ambiguous. Most rules are straightforward, and we encode them below
|
||||
// as simple add-on back rule, e.g.:
|
||||
|
||||
// $jamoMedial {bs} > $BS;
|
||||
|
||||
// becomes
|
||||
|
||||
// $jamoMedial {bs} <> $BS;
|
||||
|
||||
// Furthermore, we don't care about the ordering for Jamo-Latin because
|
||||
// we are going from single characters, so we can very easily piggyback
|
||||
// on the Latin-Jamo.
|
||||
|
||||
// The main issue with Jamo-Latin is when to insert hyphens.
|
||||
// Hyphens are inserted to obtain correct round trip behavior. For
|
||||
// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
|
||||
// would then round trip to Ki A GGi E. To prevent this, we insert a
|
||||
// hyphen: "kag-ge". IMPORTANT: The need for hyphens depends
|
||||
// very specifically on the behavior of the Latin-Jamo rules. A change
|
||||
// in the Latin-Jamo behavior can completely change the way the
|
||||
// hyphen insertion must be done.
|
||||
|
||||
// First try to preserve actual hyphens in the jamo text by doubling
|
||||
// them. This fixes problems like:
|
||||
// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
|
||||
// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
|
||||
// -- if we don't care about losing hyphens in the jamo, we can delete
|
||||
// this rule.
|
||||
|
||||
"'--' <> '-';"
|
||||
|
||||
// Triple consonants. For three consonants "axxx" we insert a
|
||||
// hyphen between the first and second "x" if XXf, Xf, and Xi all
|
||||
// exist, and we have A Xf XXi. This prevents the reverse
|
||||
// transliteration to A XXf Xi.
|
||||
|
||||
"'-' < $latinMedialEnd g {} $GGi;"
|
||||
"'-' < $latinMedialEnd s {} $SSi;"
|
||||
|
||||
// For vowels the rule is similar. If there is a vowel "ae" such that
|
||||
// "a" by itself and "e" by itself are vowels, then we want to map A E
|
||||
// to "a-e" so as not to round trip to AE. However, in the text Ki EO
|
||||
// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
|
||||
// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
|
||||
// tested. NOTE: These rules used to have a left context of
|
||||
// $latinInitial instead of [^$latinMedial]. The problem with this is
|
||||
// sequences where an initial IEUNG is transliterated away:
|
||||
// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
|
||||
|
||||
"'-' < [^$latinMedial] [y w] e {} [$O $OE];"
|
||||
"'-' < [^$latinMedial] e {} [$O $OE $U];"
|
||||
"'-' < [^$latinMedial] [o a] {} [$E $EO $EU];"
|
||||
"'-' < [^$latinMedial] [w y] a {} [$E $EO $EU];"
|
||||
|
||||
// Similar to the above, but with an intervening $IEUNG.
|
||||
|
||||
"'-' < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
|
||||
"'-' < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
|
||||
"'-' < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
|
||||
"'-' < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"
|
||||
|
||||
// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
|
||||
// where Xi also exists, must be transliterated as "ax-e" to prevent
|
||||
// the round trip conversion to A Xi E.
|
||||
|
||||
"'-' < $latinMedialEnd b {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd c {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd d {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd h {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd j {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd k {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd m {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd p {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd t {} $IEUNG $jamoMedial;"
|
||||
|
||||
// Double finals followed by IEUNG. Similar to the single finals
|
||||
// followed by IEUNG. Any latin consonant pair X Y, between medials,
|
||||
// that we would split by Latin-Jamo, we must handle when it occurs as
|
||||
// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
|
||||
// E.
|
||||
|
||||
"'-' < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd s s {} $IEUNG $jamoMedial;"
|
||||
|
||||
// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
|
||||
// we transliterate as "ax-xe" to prevent round trip transliteration as
|
||||
// A XXi E.
|
||||
|
||||
"'-' < $latinMedialEnd b {} $Bi $jamoMedial;"
|
||||
"'-' < $latinMedialEnd d {} $Di $jamoMedial;"
|
||||
"'-' < $latinMedialEnd j {} $Ji $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g {} $Gi $jamoMedial;"
|
||||
"'-' < $latinMedialEnd s {} $Si $jamoMedial;"
|
||||
|
||||
// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
|
||||
// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
|
||||
// "xyy" forms that correspond to XYf Yi must be transliterated as
|
||||
// "xy-y".
|
||||
|
||||
"'-' < $latinMedialEnd b s {} [$Si $SSi];"
|
||||
"'-' < $latinMedialEnd g s {} [$Si $SSi];"
|
||||
"'-' < $latinMedialEnd l b {} [$Bi $BB];"
|
||||
"'-' < $latinMedialEnd l g {} [$Gi $GGi];"
|
||||
"'-' < $latinMedialEnd l s {} [$Si $SSi];"
|
||||
"'-' < $latinMedialEnd n g {} [$Gi $GGi];"
|
||||
"'-' < $latinMedialEnd n j {} [$Ji $JJ];"
|
||||
|
||||
// Deletion of IEUNG is handled below.
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Latin-Jamo
|
||||
|
||||
// [Basic, context-free Jamo-Latin rules are embedded here too. See
|
||||
// above.]
|
||||
|
||||
// Split digraphs: Text of the form 'axye', where 'xy' is a final
|
||||
// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
|
||||
// 'e' are medials, we want to transliterate this as A Xf Yi E rather
|
||||
// than A XYf IEUNG E. We do NOT include text of the form "axxe",
|
||||
// since that is handled differently below. These rules are generated
|
||||
// programmatically from the jamo data.
|
||||
|
||||
"$jamoMedial {b s} $latinMedial > $Bf $Si;"
|
||||
"$jamoMedial {g s} $latinMedial > $Gf $Si;"
|
||||
"$jamoMedial {l b} $latinMedial > $L $Bi;"
|
||||
"$jamoMedial {l g} $latinMedial > $L $Gi;"
|
||||
"$jamoMedial {l h} $latinMedial > $L $Hi;"
|
||||
"$jamoMedial {l m} $latinMedial > $L $Mi;"
|
||||
"$jamoMedial {l p} $latinMedial > $L $Pi;"
|
||||
"$jamoMedial {l s} $latinMedial > $L $Si;"
|
||||
"$jamoMedial {l t} $latinMedial > $L $Ti;"
|
||||
"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
|
||||
"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
|
||||
"$jamoMedial {n j} $latinMedial > $Nf $Ji;"
|
||||
|
||||
// Single consonants are initials: Text of the form 'axe', where 'x'
|
||||
// can be an initial or a final, and 'a' and 'e' are medials, we want
|
||||
// to transliterate as A Xi E rather than A Xf IEUNG E.
|
||||
|
||||
"$jamoMedial {b} $latinMedial > $Bi;"
|
||||
"$jamoMedial {c} $latinMedial > $Ci;"
|
||||
"$jamoMedial {d} $latinMedial > $Di;"
|
||||
"$jamoMedial {g} $latinMedial > $Gi;"
|
||||
"$jamoMedial {h} $latinMedial > $Hi;"
|
||||
"$jamoMedial {j} $latinMedial > $Ji;"
|
||||
"$jamoMedial {k} $latinMedial > $Ki;"
|
||||
"$jamoMedial {m} $latinMedial > $Mi;"
|
||||
"$jamoMedial {n} $latinMedial > $Ni;"
|
||||
"$jamoMedial {p} $latinMedial > $Pi;"
|
||||
"$jamoMedial {s} $latinMedial > $Si;"
|
||||
"$jamoMedial {t} $latinMedial > $Ti;"
|
||||
|
||||
// Doubled initials. The sequence "axxe", where XX exists as an initial
|
||||
// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
|
||||
// to transliterate as A XXi E, rather than split to A Xf Xi E.
|
||||
|
||||
"$jamoMedial {b b} $latinMedial > $BB;"
|
||||
"$jamoMedial {d d} $latinMedial > $DD;"
|
||||
"$jamoMedial {j j} $latinMedial > $JJ;"
|
||||
"$jamoMedial {g g} $latinMedial > $GGi;"
|
||||
"$jamoMedial {s s} $latinMedial > $SSi;"
|
||||
|
||||
// XYY. Because doubled consonants bind more strongly than XY
|
||||
// consonants, we must handle the sequence "axyy" specially. Here XYf
|
||||
// and YYi must exist. In these cases, we map to Xf YYi rather than
|
||||
// XYf.
|
||||
|
||||
"$jamoMedial {b} s s > $Bf;"
|
||||
"$jamoMedial {g} s s > $Gf;"
|
||||
"$jamoMedial {l} b b > $L;"
|
||||
"$jamoMedial {l} g g > $L;"
|
||||
"$jamoMedial {l} s s > $L;"
|
||||
"$jamoMedial {n} g g > $Nf;"
|
||||
"$jamoMedial {n} j j > $Nf;"
|
||||
|
||||
// Finals: Attach consonant with preceding medial to preceding medial.
|
||||
// Do this BEFORE mapping consonants to initials. Longer keys must
|
||||
// precede shorter keys that they start with, e.g., the rule for 'bs'
|
||||
// must precede 'b'.
|
||||
|
||||
// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
|
||||
// block for Jamo-Latin.]
|
||||
|
||||
"$jamoMedial {bs} <> $BS;"
|
||||
"$jamoMedial {b} <> $Bf;"
|
||||
"$jamoMedial {c} <> $Cf;"
|
||||
"$jamoMedial {d} <> $Df;"
|
||||
"$jamoMedial {gg} <> $GGf;"
|
||||
"$jamoMedial {gs} <> $GS;"
|
||||
"$jamoMedial {g} <> $Gf;"
|
||||
"$jamoMedial {h} <> $Hf;"
|
||||
"$jamoMedial {j} <> $Jf;"
|
||||
"$jamoMedial {k} <> $Kf;"
|
||||
"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
|
||||
"$jamoMedial {lh} <> $LH;"
|
||||
"$jamoMedial {lm} <> $LM;"
|
||||
"$jamoMedial {lp} <> $LP;"
|
||||
"$jamoMedial {ls} <> $LS;"
|
||||
"$jamoMedial {lt} <> $LT;"
|
||||
"$jamoMedial {l} <> $L;"
|
||||
"$jamoMedial {m} <> $Mf;"
|
||||
"$jamoMedial {ng} <> $NG;"
|
||||
"$jamoMedial {nh} <> $NH;"
|
||||
"$jamoMedial {nj} <> $NJ;"
|
||||
"$jamoMedial {n} <> $Nf;"
|
||||
"$jamoMedial {p} <> $Pf;"
|
||||
"$jamoMedial {ss} <> $SSf;"
|
||||
"$jamoMedial {s} <> $Sf;"
|
||||
"$jamoMedial {t} <> $Tf;"
|
||||
|
||||
// Initials: Attach single consonant to following medial. Do this
|
||||
// AFTER mapping finals. Longer keys must precede shorter keys that
|
||||
// they start with, e.g., the rule for 'gg' must precede 'g'.
|
||||
|
||||
// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
|
||||
// this block for Jamo-Latin.]
|
||||
|
||||
"{gg} $latinMedial <> $GGi;"
|
||||
"{g} $latinMedial <> $Gi;"
|
||||
"{n} $latinMedial <> $Ni;"
|
||||
"{dd} $latinMedial <> $DD;"
|
||||
"{d} $latinMedial <> $Di;"
|
||||
"{r} $latinMedial <> $R;"
|
||||
"{m} $latinMedial <> $Mi;"
|
||||
"{bb} $latinMedial <> $BB;"
|
||||
"{b} $latinMedial <> $Bi;"
|
||||
"{ss} $latinMedial <> $SSi;"
|
||||
"{s} $latinMedial <> $Si;"
|
||||
"{jj} $latinMedial <> $JJ;"
|
||||
"{j} $latinMedial <> $Ji;"
|
||||
"{c} $latinMedial <> $Ci;"
|
||||
"{k} $latinMedial <> $Ki;"
|
||||
"{t} $latinMedial <> $Ti;"
|
||||
"{p} $latinMedial <> $Pi;"
|
||||
"{h} $latinMedial <> $Hi;"
|
||||
|
||||
// 'r' in final position. Because of the equivalency of the 'l' and
|
||||
// 'r' jamo (the glyphs are the same), we try to provide the same
|
||||
// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
|
||||
// below. If we see an 'r' in an apparent final position, treat it
|
||||
// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
|
||||
// Instead, we want Ki A L Ki A.
|
||||
|
||||
"$jamoMedial {r} $latinInitial > | l;"
|
||||
|
||||
// Initial + Final: If we match the next rule, we have initial then
|
||||
// final consonant with no intervening medial. We insert the null
|
||||
// vowel BEFORE it to create a well-formed syllable. (In the next rule
|
||||
// we insert a null vowel AFTER an anomalous initial.)
|
||||
|
||||
"$jamoInitial {} [bcdghjklmnpst] > $EU;"
|
||||
|
||||
// Initial + X: This block matches an initial consonant not followed by
|
||||
// a medial. We insert the null vowel after it. We handle double
|
||||
// initials explicitly here; for single initial consonants we insert EU
|
||||
// (as Latin) after them and let standard rules do the rest.
|
||||
|
||||
// BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
"gg > $GGi $EU;"
|
||||
"dd > $DD $EU;"
|
||||
"bb > $BB $EU;"
|
||||
"ss > $SSi $EU;"
|
||||
"jj > $JJ $EU;"
|
||||
|
||||
"([bcdghjkmnprst]) > | $1 eu;"
|
||||
|
||||
// X + Final: Finally we have to deal with a consonant that can only be
|
||||
// interpreted as a final (not an initial) and which is preceded
|
||||
// neither by an initial nor a medial. It is the start of the
|
||||
// syllable, but cannot be. Most of these will already be handled by
|
||||
// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
|
||||
// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
|
||||
// For this isolated case, we could add a null initial and medial,
|
||||
// which would give "la" => IEUNG EU L IEUNG A, for example. A more
|
||||
// economical solution is to transliterate isolated "l" (that is,
|
||||
// initial "l") to "r". (Other similar conversions of consonants that
|
||||
// occur neither as initials nor as finals are handled below.)
|
||||
|
||||
"l > | r;"
|
||||
|
||||
// Medials. If a medial is preceded by an initial, then we proceed
|
||||
// normally. As usual, longer keys must precede shorter ones.
|
||||
|
||||
// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
|
||||
// this block for Jamo-Latin.]
|
||||
|
||||
"$jamoInitial {ae} <> $AE;"
|
||||
"$jamoInitial {a} <> $A;"
|
||||
"$jamoInitial {eo} <> $EO;"
|
||||
"$jamoInitial {eu} <> $EU;"
|
||||
"$jamoInitial {e} <> $E;"
|
||||
"$jamoInitial {i} <> $I;"
|
||||
"$jamoInitial {oe} <> $OE;"
|
||||
"$jamoInitial {o} <> $O;"
|
||||
"$jamoInitial {u} <> $U;"
|
||||
"$jamoInitial {wae} <> $WAE;"
|
||||
"$jamoInitial {wa} <> $WA;"
|
||||
"$jamoInitial {weo} <> $WEO;"
|
||||
"$jamoInitial {we} <> $WE;"
|
||||
"$jamoInitial {wi} <> $WI;"
|
||||
"$jamoInitial {yae} <> $YAE;"
|
||||
"$jamoInitial {ya} <> $YA;"
|
||||
"$jamoInitial {yeo} <> $YEO;"
|
||||
"$jamoInitial {ye} <> $YE;"
|
||||
"$jamoInitial {yi} <> $YI;"
|
||||
"$jamoInitial {yo} <> $YO;"
|
||||
"$jamoInitial {yu} <> $YU;"
|
||||
|
||||
// We may see an anomalous isolated 'w' or 'y'. In that case, we
|
||||
// interpret it as 'wi' and 'yu', respectively.
|
||||
|
||||
// BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
"$jamoInitial {w} > | wi;"
|
||||
"$jamoInitial {y} > | yu;"
|
||||
|
||||
// Otherwise, insert a null consonant IEUNG before the medial (which is
|
||||
// still an untransliterated latin vowel).
|
||||
|
||||
"($latinMedial) > $IEUNG | $1;"
|
||||
|
||||
// Convert non-jamo latin consonants to equivalents. These occur as
|
||||
// neither initials nor finals in jamo. 'l' occurs as a final, but not
|
||||
// an initial; it is handled above. The following letters (left hand
|
||||
// side) will never be output by Jamo-Latin.
|
||||
|
||||
"f > | p;"
|
||||
"q > | k;"
|
||||
"v > | b;"
|
||||
"x > | ks;"
|
||||
"z > | s;"
|
||||
|
||||
// Delete hyphens (Latin-Jamo).
|
||||
|
||||
"'-' > ;"
|
||||
|
||||
// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
|
||||
// since these may also occur in text.
|
||||
|
||||
"< $IEUNG;"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
470
icu4c/data/translit_Latin_Katakana.txt
Normal file
470
icu4c/data/translit_Latin_Katakana.txt
Normal file
@ -0,0 +1,470 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Latin_Katakana.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_Katakana
|
||||
|
||||
translit_Latin_Katakana {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Latin_Katakana.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
"::NFD (NFC) ;"
|
||||
":: [:Latin:] Lower ();"
|
||||
|
||||
// Uses modified Hepburn. Small changes to make unambiguous.
|
||||
|
||||
// | Kunrei-shiki: Hepburn/MHepburn
|
||||
// | ------------------------------
|
||||
// | si: shi
|
||||
// | si ~ya: sha
|
||||
// | si ~yu: shu
|
||||
// | si ~yo: sho
|
||||
// | zi: ji
|
||||
// | zi ~ya: ja
|
||||
// | zi ~yu: ju
|
||||
// | zi ~yo: jo
|
||||
// | ti: chi
|
||||
// | ti ~ya: cha
|
||||
// | ti ~yu: chu
|
||||
// | ti ~yu: cho
|
||||
// | tu: tsu
|
||||
// | di: ji/dji
|
||||
// | du: zu/dzu
|
||||
// | hu: fu
|
||||
|
||||
// | For foreign words:
|
||||
// | -----------------
|
||||
// | se ~i si
|
||||
// | si ~e she
|
||||
// |
|
||||
// | ze ~i zi
|
||||
// | zi ~e je
|
||||
// |
|
||||
// | te ~i ti
|
||||
// | ti ~e che
|
||||
// | te ~u tu
|
||||
// |
|
||||
// | de ~i di
|
||||
// | de ~u du
|
||||
// | de ~i di
|
||||
// |
|
||||
// | he ~u: hu
|
||||
// | hu ~a fa
|
||||
// | hu ~i fi
|
||||
// | hu ~e he
|
||||
// | hu ~o ho
|
||||
|
||||
// Most small forms are generated, but if necessary
|
||||
// explicit small forms are given with ~a, ~ya, etc.
|
||||
|
||||
//------------------------------------------------------
|
||||
// Variables
|
||||
|
||||
"$vowel = [aeiou] ;"
|
||||
"$macron = \u0304 ;"
|
||||
|
||||
// Variables used for doubled-consonants with tsu
|
||||
|
||||
"$kana = [\u3041-\u3094] ;"
|
||||
|
||||
"$voice = [\u3099\u309B];"
|
||||
"$semivoice = [\u309A\u309C];"
|
||||
|
||||
"$k_start = [カキクケコかきくけこ] ;"
|
||||
|
||||
"$s_start = [サシスセソさしすせそ] ;"
|
||||
|
||||
"$j_start = [シし] $voice ;"
|
||||
|
||||
"$t_start = [タチツテトたちつてと] ;"
|
||||
|
||||
"$n_start = [ナニヌネノンなにぬねの] ;"
|
||||
|
||||
"$h_start = [ハヒヘホはひへほ] ;"
|
||||
"$f_start = [フふ] ;"
|
||||
|
||||
"$m_start = [マミムメモまみむめも] ;"
|
||||
|
||||
"$y_start = [ヤユヨやゆよ] ;"
|
||||
|
||||
"$r_start = [ラリルレロらりるれろ] ;"
|
||||
|
||||
"$w_start = [ワヰヱヲわゐゑを] ;"
|
||||
|
||||
"$v_start = [ワヰヱヲ]゙ ;"
|
||||
|
||||
// if ン is followed by $n_quoter, then it needs an
|
||||
// apostrophe after its romaji form to disambiguate it.
|
||||
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
|
||||
|
||||
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;"
|
||||
|
||||
"$small_y = [ャィュェョ] ;"
|
||||
|
||||
"$iteration = \u309D ;"
|
||||
|
||||
//------------------------------------------------------
|
||||
// katakana rules
|
||||
|
||||
// Punctuation
|
||||
|
||||
"'.' <> 。;"
|
||||
"',' <> 、;"
|
||||
// ' ' } [a-z] > ; # delete spaces before latin
|
||||
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
|
||||
|
||||
// Iteration Mark
|
||||
// Copy previous letter & marks
|
||||
|
||||
// TODO
|
||||
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
|
||||
|
||||
// Specials for katakana -- not shared with hiragana
|
||||
|
||||
"va <> ヷ ;"
|
||||
"vi <> ヸ ;"
|
||||
"ve <> ヹ ;"
|
||||
"vo <> ヺ ;"
|
||||
"'~ka' <> ヵ ;"
|
||||
"'~ke' <> ヶ ;"
|
||||
|
||||
// ~~~ begin shared rules ~~~
|
||||
|
||||
//special
|
||||
|
||||
"ya < '~'ャ;"
|
||||
"yi < '~'ィ ;"
|
||||
"yu < '~'ュ;"
|
||||
"ye < '~'ェ;"
|
||||
"yo < '~'ョ;"
|
||||
|
||||
//normal
|
||||
|
||||
"a <> ア ;"
|
||||
|
||||
"b | '~' < ヒ ゙} $small_y ;"
|
||||
"by } $vowel > ビ | '~y' ;"
|
||||
|
||||
"ba <> バ ;"
|
||||
"bi <> ビ ;"
|
||||
"bu <> ブ ;"
|
||||
"be <> ベ ;"
|
||||
"bo <> ボ ;"
|
||||
|
||||
"c } i > | s ;"
|
||||
"c } e > | s ;"
|
||||
|
||||
"da <> ダ ;"
|
||||
"di <> ディ ;"
|
||||
"du <> デゥ ;"
|
||||
"de <> デ ;"
|
||||
"do <> ド ;"
|
||||
"dzu <> ヅ ;"
|
||||
"dja < ヂャ ;"
|
||||
"dji'~i' < ヂィ ;" // liu
|
||||
"dju < ヂュ ;"
|
||||
"dje < ヂェ ;"
|
||||
"djo < ヂョ ;"
|
||||
"dji <> ヂ ;"
|
||||
"dj } $vowel > ヂ | '~y' ;"
|
||||
|
||||
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
|
||||
|
||||
"cha < チャ ;"
|
||||
"chi'~i' < チィ ;" // liu
|
||||
"chu < チュ ;"
|
||||
"che < チェ ;"
|
||||
"cho < チョ ;"
|
||||
"chi <> チ ;"
|
||||
"ch } $vowel > チ | '~y' ;"
|
||||
|
||||
"e <> エ ;"
|
||||
|
||||
"g | '~' < ギ} $small_y ;"
|
||||
"gy } $vowel > ギ | '~y' ;"
|
||||
|
||||
"ga <> ガ ;"
|
||||
"gi <> ギ ;"
|
||||
"gu <> グ ;"
|
||||
"ge <> ゲ ;"
|
||||
"go <> ゴ ;"
|
||||
|
||||
"i <> イ ;"
|
||||
|
||||
// j } $vowel > ジ | '~y' ;
|
||||
|
||||
"ja <> ジャ ;"
|
||||
"ji'~i' < ジィ ;" // liu
|
||||
"ju <> ジュ ;"
|
||||
"je <> ジェ ;"
|
||||
"jo <> ジョ ;"
|
||||
"ji <> ジ ;"
|
||||
|
||||
"k | '~' < キ} $small_y ;"
|
||||
"ky } $vowel > キ | '~y' ;"
|
||||
|
||||
"ka <> カ ;"
|
||||
"ki <> キ ;"
|
||||
"ku <> ク ;"
|
||||
"ke <> ケ ;"
|
||||
"ko <> コ ;"
|
||||
|
||||
"m | '~' < ミ} $small_y ;"
|
||||
"my } $vowel > ミ | '~y' ;"
|
||||
|
||||
"ma <> マ ;"
|
||||
"mi <> ミ ;"
|
||||
"mu <> ム ;"
|
||||
"me <> メ ;"
|
||||
"mo <> モ ;"
|
||||
|
||||
"m } [pbfv] > ン ;"
|
||||
|
||||
"n | '~' < ニ } $small_y ;"
|
||||
"ny } $vowel > ニ | '~y' ;"
|
||||
|
||||
"na <> ナ ;"
|
||||
"ni <> ニ ;"
|
||||
"nu <> ヌ ;"
|
||||
"ne <> ネ ;"
|
||||
"no <> ノ ;"
|
||||
|
||||
"o <> オ ;"
|
||||
|
||||
"p | '~' < ピ } $small_y ;"
|
||||
"py } $vowel > ピ | '~y' ;"
|
||||
|
||||
"pa <> パ ;"
|
||||
"pi <> ピ ;"
|
||||
"pu <> プ ;"
|
||||
"pe <> ペ ;"
|
||||
"po <> ポ ;"
|
||||
|
||||
"h | '~' < ヒ } $small_y ;"
|
||||
"hy } $vowel > ヒ | '~y' ;"
|
||||
|
||||
"ha <> ハ ;"
|
||||
"hi <> ヒ ;"
|
||||
"hu <> ヘゥ ;"
|
||||
"he <> ヘ ;"
|
||||
"ho <> ホ ;"
|
||||
|
||||
// f | '~' < フ } $small_y ;
|
||||
// f } $vowel > フ | '~' ;
|
||||
|
||||
"fa <> ファ ;"
|
||||
"fi <> フィ ;"
|
||||
"fe <> フェ ;"
|
||||
"fo <> フォ ;"
|
||||
"fu <> フ ;"
|
||||
|
||||
"r | '~' < リ } $small_y ;"
|
||||
"ry } $vowel > リ | '~y' ;"
|
||||
|
||||
"ra <> ラ ;"
|
||||
"ri <> リ ;"
|
||||
"ru <> ル ;"
|
||||
"re <> レ ;"
|
||||
"ro <> ロ ;"
|
||||
|
||||
"za <> ザ ;"
|
||||
"zi <> ゼィ ;"
|
||||
"zu <> ズ ;"
|
||||
"ze <> ゼ ;"
|
||||
"zo <> ゾ ;"
|
||||
|
||||
"sa <> サ ;"
|
||||
"si <> セィ ;"
|
||||
"su <> ス ;"
|
||||
"se <> セ ;"
|
||||
"so <> ソ ;"
|
||||
|
||||
"sha < シャ ;"
|
||||
"shi'~i' < シィ ;" // liu
|
||||
"shu < シュ ;"
|
||||
"she < シェ ;"
|
||||
"sho < ショ ;"
|
||||
"shi <> シ ;"
|
||||
"sh } $vowel > シ | '~y' ;"
|
||||
|
||||
"ta <> タ ;"
|
||||
"ti <> ティ ;"
|
||||
"tu <> テゥ ;"
|
||||
"te <> テ ;"
|
||||
"to <> ト ;"
|
||||
|
||||
"tsu <> ツ ;"
|
||||
|
||||
// v } $vowel > ヴ | '~' ;
|
||||
|
||||
//'v~a' < ヴァ ; # liu
|
||||
//'v~i' < ヴィ ; # liu
|
||||
//'v~e' < ヴェ ; # liu
|
||||
//'v~o' < ヴォ ; # liu
|
||||
"vu <> ヴ ;"
|
||||
|
||||
"u <> ウ ;"
|
||||
|
||||
// w } $vowel > ウ | '~' ;
|
||||
|
||||
"wa <> ワ ;"
|
||||
"wi <> ヰ ;"
|
||||
"wu > ウ ;"
|
||||
"we <> ヱ ;"
|
||||
"wo <> ヲ ;"
|
||||
|
||||
"ya <> ヤ ;"
|
||||
"yi > イ ;"
|
||||
"yu <> ユ ;"
|
||||
"ye > エ ;"
|
||||
"yo <> ヨ ;"
|
||||
|
||||
// double consonants
|
||||
|
||||
//specials
|
||||
"s } sh > ッ ;"
|
||||
"t } ch > ッ ;"
|
||||
|
||||
//voiced
|
||||
|
||||
"j } j <> ッ } $j_start ;"
|
||||
"b } b <> ッ } [$h_start$f_start] $voice;"
|
||||
"d } d <> ッ } $t_start $voice;"
|
||||
"g } g <> ッ } $k_start $voice;"
|
||||
"p } p <> ッ } [$h_start$f_start] $semivoice;"
|
||||
// v } v <> ッ } [ワヰウヱヲう] $voice ;
|
||||
"z } z <> ッ } $s_start $voice;"
|
||||
"v } v <> ッ } $v_start;"
|
||||
|
||||
// normal
|
||||
|
||||
"k } k <> ッ } $k_start ;"
|
||||
"m } m <> ッ } $m_start ;"
|
||||
"n } n <> ッ } $n_start ;"
|
||||
"h } h <> ッ } $h_start ;"
|
||||
"f } f <> ッ } $f_start ;"
|
||||
"r } r <> ッ } $r_start ;"
|
||||
"t } t <> ッ } $t_start ;"
|
||||
"s } s <> ッ } $s_start ;"
|
||||
|
||||
"w } w <> ッ } $w_start;"
|
||||
"y } y <> ッ } $y_start;"
|
||||
|
||||
// completeness
|
||||
"x } x > ッ ;"
|
||||
"c } k > ッ ;"
|
||||
"c } c > ッ ;"
|
||||
"c } q > ッ ;"
|
||||
"l } l > ッ ;"
|
||||
"q } q > ッ ;"
|
||||
// y } y > ッ ;
|
||||
// w } w > ッ ;
|
||||
|
||||
// prolonged vowel mark. this indicates a doubling of
|
||||
// the preceding vowel sound
|
||||
|
||||
//a < a { ー ; # liu
|
||||
//e < e { ー ; # liu
|
||||
//i < i { ー ; # liu
|
||||
//o < o { ー ; # liu
|
||||
//u < u { ー ; # liu
|
||||
|
||||
"$macron <> ー ;"
|
||||
|
||||
// small forms
|
||||
|
||||
"'~a' <> ァ ;"
|
||||
"'~i' <> ィ ;"
|
||||
"'~u' <> ゥ ;"
|
||||
"'~e' <> ェ ;"
|
||||
"'~o' <> ォ ;"
|
||||
"'~tsu' <> ッ ;"
|
||||
"'~wa' <> ヮ ;"
|
||||
"'~ya' <> ャ ;"
|
||||
"'~yi' > ィ ;"
|
||||
"'~yu' <> ュ ;"
|
||||
"'~ye' > ェ ;"
|
||||
"'~yo' <> ョ ;"
|
||||
|
||||
// h- rule: lengthens vowel if not followed by a vowel
|
||||
|
||||
"[aeiou] } h > ー ;"
|
||||
|
||||
// one-way latin- > kana rules. these do not occur in
|
||||
// well-formed romaji representing actual japanese text.
|
||||
// their purpose is to make all romaji map to kana of
|
||||
// some sort.
|
||||
|
||||
// the following are not really necessary, but produce
|
||||
// slightly more natural results.
|
||||
|
||||
"cy > セィ ;"
|
||||
"dy > ディ ;"
|
||||
"hy > ヒ ;"
|
||||
"sy > セィ ;"
|
||||
"ty > ティ ;"
|
||||
"zy > ゼィ ;"
|
||||
|
||||
"h > ヘ ;"
|
||||
|
||||
// isolated consonants listed here so as not to mask
|
||||
// longer rules above.
|
||||
|
||||
"ch > チ;"
|
||||
"sh > シ ;"
|
||||
"dz > ヅ ;"
|
||||
"dj > ヂ;"
|
||||
|
||||
"b > ブ ;"
|
||||
"d > デ ;"
|
||||
"g > グ ;"
|
||||
"k > ク ;"
|
||||
"m > ム ;"
|
||||
"n'' < ン } $n_quoter ;"
|
||||
"n <> ン ;"
|
||||
"p > プ ;"
|
||||
"r > ル ;"
|
||||
"s > ス ;"
|
||||
"t > テ ;"
|
||||
"y > イ ;"
|
||||
"z > ズ ;"
|
||||
"v > ヴ ;"
|
||||
|
||||
"f > フ;"
|
||||
"j > ジ;"
|
||||
"w > ウ;"
|
||||
|
||||
// simple substitutions using backup
|
||||
|
||||
"c > | k ;"
|
||||
"l > | r ;"
|
||||
"q > | k ;"
|
||||
"x > | ks ;"
|
||||
|
||||
// ~~~ END shared rules ~~~
|
||||
|
||||
//------------------------------------------------------
|
||||
// Final cleanup
|
||||
|
||||
"'~' > ;" // delete stray tildes between letters
|
||||
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
115
icu4c/data/translit_Malayalam_InterIndic.txt
Normal file
115
icu4c/data/translit_Malayalam_InterIndic.txt
Normal file
@ -0,0 +1,115 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Malayalam_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Malayalam_InterIndic
|
||||
|
||||
translit_Malayalam_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Malayalam_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Malayalam_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:06 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Malayalam-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0d02>\ue002;" // SIGN ANUSVARA
|
||||
"\u0d03>\ue003;" // SIGN VISARGA
|
||||
"\u0d05>\ue005;" // LETTER A
|
||||
"\u0d06>\ue006;" // LETTER AA
|
||||
"\u0d07>\ue007;" // LETTER I
|
||||
"\u0d08>\ue008;" // LETTER II
|
||||
"\u0d09>\ue009;" // LETTER U
|
||||
"\u0d0a>\ue00a;" // LETTER UU
|
||||
"\u0d0b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0d0c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0d0e>\ue00e;" // LETTER E
|
||||
"\u0d0f>\ue00f;" // LETTER EE
|
||||
"\u0d10>\ue010;" // LETTER AI
|
||||
"\u0d12>\ue012;" // LETTER O
|
||||
"\u0d13>\ue013;" // LETTER OO
|
||||
"\u0d14>\ue014;" // LETTER AU
|
||||
"\u0d15>\ue015;" // LETTER KA
|
||||
"\u0d16>\ue016;" // LETTER KHA
|
||||
"\u0d17>\ue017;" // LETTER GA
|
||||
"\u0d18>\ue018;" // LETTER GHA
|
||||
"\u0d19>\ue019;" // LETTER NGA
|
||||
"\u0d1a>\ue01a;" // LETTER CA
|
||||
"\u0d1b>\ue01b;" // LETTER CHA
|
||||
"\u0d1c>\ue01c;" // LETTER JA
|
||||
"\u0d1d>\ue01d;" // LETTER JHA
|
||||
"\u0d1e>\ue01e;" // LETTER NYA
|
||||
"\u0d1f>\ue01f;" // LETTER TTA
|
||||
"\u0d20>\ue020;" // LETTER TTHA
|
||||
"\u0d21>\ue021;" // LETTER DDA
|
||||
"\u0d22>\ue022;" // LETTER DDHA
|
||||
"\u0d23>\ue023;" // LETTER NNA
|
||||
"\u0d24>\ue024;" // LETTER TA
|
||||
"\u0d25>\ue025;" // LETTER THA
|
||||
"\u0d26>\ue026;" // LETTER DA
|
||||
"\u0d27>\ue027;" // LETTER DHA
|
||||
"\u0d28>\ue028;" // LETTER NA
|
||||
"\u0d2a>\ue02a;" // LETTER PA
|
||||
"\u0d2b>\ue02b;" // LETTER PHA
|
||||
"\u0d2c>\ue02c;" // LETTER BA
|
||||
"\u0d2d>\ue02d;" // LETTER BHA
|
||||
"\u0d2e>\ue02e;" // LETTER MA
|
||||
"\u0d2f>\ue02f;" // LETTER YA
|
||||
"\u0d30>\ue030;" // LETTER RA
|
||||
"\u0d31>\ue031;" // LETTER RRA
|
||||
"\u0d32>\ue032;" // LETTER LA
|
||||
"\u0d33>\ue033;" // LETTER LLA
|
||||
"\u0d34>\ue034;" // LETTER LLLA
|
||||
"\u0d35>\ue035;" // LETTER VA
|
||||
"\u0d36>\ue036;" // LETTER SHA
|
||||
"\u0d37>\ue037;" // LETTER SSA
|
||||
"\u0d38>\ue038;" // LETTER SA
|
||||
"\u0d39>\ue039;" // LETTER HA
|
||||
"\u0d3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0d3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0d40>\ue040;" // VOWEL SIGN II
|
||||
"\u0d41>\ue041;" // VOWEL SIGN U
|
||||
"\u0d42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0d43>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0d46>\ue046;" // VOWEL SIGN E
|
||||
"\u0d47>\ue047;" // VOWEL SIGN EE
|
||||
"\u0d48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0d4a>\ue04a;" // VOWEL SIGN O
|
||||
"\u0d4b>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0d4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0d4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0d57>\ue057;" // AU LENGTH MARK
|
||||
"\u0d60>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0d61>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0d66>\ue066;" // DIGIT ZERO
|
||||
"\u0d67>\ue067;" // DIGIT ONE
|
||||
"\u0d68>\ue068;" // DIGIT TWO
|
||||
"\u0d69>\ue069;" // DIGIT THREE
|
||||
"\u0d6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0d6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0d6c>\ue06c;" // DIGIT SIX
|
||||
"\u0d6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0d6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0d6f>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
116
icu4c/data/translit_Oriya_InterIndic.txt
Normal file
116
icu4c/data/translit_Oriya_InterIndic.txt
Normal file
@ -0,0 +1,116 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Oriya_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Oriya_InterIndic
|
||||
|
||||
translit_Oriya_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Oriya_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Oriya_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:07 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Oriya-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0b01>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0b02>\ue002;" // SIGN ANUSVARA
|
||||
"\u0b03>\ue003;" // SIGN VISARGA
|
||||
"\u0b05>\ue005;" // LETTER A
|
||||
"\u0b06>\ue006;" // LETTER AA
|
||||
"\u0b07>\ue007;" // LETTER I
|
||||
"\u0b08>\ue008;" // LETTER II
|
||||
"\u0b09>\ue009;" // LETTER U
|
||||
"\u0b0a>\ue00a;" // LETTER UU
|
||||
"\u0b0b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0b0c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0b0f>\ue00f;" // LETTER E
|
||||
"\u0b10>\ue010;" // LETTER AI
|
||||
"\u0b13>\ue013;" // LETTER O
|
||||
"\u0b14>\ue014;" // LETTER AU
|
||||
"\u0b15>\ue015;" // LETTER KA
|
||||
"\u0b16>\ue016;" // LETTER KHA
|
||||
"\u0b17>\ue017;" // LETTER GA
|
||||
"\u0b18>\ue018;" // LETTER GHA
|
||||
"\u0b19>\ue019;" // LETTER NGA
|
||||
"\u0b1a>\ue01a;" // LETTER CA
|
||||
"\u0b1b>\ue01b;" // LETTER CHA
|
||||
"\u0b1c>\ue01c;" // LETTER JA
|
||||
"\u0b1d>\ue01d;" // LETTER JHA
|
||||
"\u0b1e>\ue01e;" // LETTER NYA
|
||||
"\u0b1f>\ue01f;" // LETTER TTA
|
||||
"\u0b20>\ue020;" // LETTER TTHA
|
||||
"\u0b21>\ue021;" // LETTER DDA
|
||||
"\u0b22>\ue022;" // LETTER DDHA
|
||||
"\u0b23>\ue023;" // LETTER NNA
|
||||
"\u0b24>\ue024;" // LETTER TA
|
||||
"\u0b25>\ue025;" // LETTER THA
|
||||
"\u0b26>\ue026;" // LETTER DA
|
||||
"\u0b27>\ue027;" // LETTER DHA
|
||||
"\u0b28>\ue028;" // LETTER NA
|
||||
"\u0b2a>\ue02a;" // LETTER PA
|
||||
"\u0b2b>\ue02b;" // LETTER PHA
|
||||
"\u0b2c>\ue02c;" // LETTER BA
|
||||
"\u0b2d>\ue02d;" // LETTER BHA
|
||||
"\u0b2e>\ue02e;" // LETTER MA
|
||||
"\u0b2f>\ue02f;" // LETTER YA
|
||||
"\u0b30>\ue030;" // LETTER RA
|
||||
"\u0b32>\ue032;" // LETTER LA
|
||||
"\u0b33>\ue033;" // LETTER LLA
|
||||
"\u0b36>\ue036;" // LETTER SHA
|
||||
"\u0b37>\ue037;" // LETTER SSA
|
||||
"\u0b38>\ue038;" // LETTER SA
|
||||
"\u0b39>\ue039;" // LETTER HA
|
||||
"\u0b3c>\ue03c;" // SIGN NUKTA
|
||||
"\u0b3d>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u0b3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0b3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0b40>\ue040;" // VOWEL SIGN II
|
||||
"\u0b41>\ue041;" // VOWEL SIGN U
|
||||
"\u0b42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0b43>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0b47>\ue047;" // VOWEL SIGN E
|
||||
"\u0b48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0b4b>\ue04b;" // VOWEL SIGN O
|
||||
"\u0b4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0b4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0b56>\ue056;" // AI LENGTH MARK
|
||||
"\u0b57>\ue057;" // AU LENGTH MARK
|
||||
"\u0b5c>\ue05c;" // LETTER RRA
|
||||
"\u0b5d>\ue05d;" // LETTER RHA
|
||||
"\u0b5f>\ue05f;" // LETTER YYA
|
||||
"\u0b60>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0b61>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0b66>\ue066;" // DIGIT ZERO
|
||||
"\u0b67>\ue067;" // DIGIT ONE
|
||||
"\u0b68>\ue068;" // DIGIT TWO
|
||||
"\u0b69>\ue069;" // DIGIT THREE
|
||||
"\u0b6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0b6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0b6c>\ue06c;" // DIGIT SIX
|
||||
"\u0b6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0b6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0b6f>\ue06f;" // DIGIT NINE
|
||||
"\u0b70>\ue070;" // ISSHAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
98
icu4c/data/translit_Tamil_InterIndic.txt
Normal file
98
icu4c/data/translit_Tamil_InterIndic.txt
Normal file
@ -0,0 +1,98 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Tamil_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Tamil_InterIndic
|
||||
|
||||
translit_Tamil_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Tamil_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Tamil_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:07 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Tamil-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0b82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0b83>\ue003;" // SIGN VISARGA
|
||||
"\u0b85>\ue005;" // LETTER A
|
||||
"\u0b86>\ue006;" // LETTER AA
|
||||
"\u0b87>\ue007;" // LETTER I
|
||||
"\u0b88>\ue008;" // LETTER II
|
||||
"\u0b89>\ue009;" // LETTER U
|
||||
"\u0b8a>\ue00a;" // LETTER UU
|
||||
"\u0b8e>\ue00e;" // LETTER E
|
||||
"\u0b8f>\ue00f;" // LETTER EE
|
||||
"\u0b90>\ue010;" // LETTER AI
|
||||
"\u0b92>\ue012;" // LETTER O
|
||||
"\u0b93>\ue013;" // LETTER OO
|
||||
"\u0b94>\ue014;" // LETTER AU
|
||||
"\u0b95>\ue015;" // LETTER KA
|
||||
"\u0b99>\ue019;" // LETTER NGA
|
||||
"\u0b9a>\ue01a;" // LETTER CA
|
||||
"\u0b9c>\ue01c;" // LETTER JA
|
||||
"\u0b9e>\ue01e;" // LETTER NYA
|
||||
"\u0b9f>\ue01f;" // LETTER TTA
|
||||
"\u0ba3>\ue023;" // LETTER NNA
|
||||
"\u0ba4>\ue024;" // LETTER TA
|
||||
"\u0ba8>\ue028;" // LETTER NA
|
||||
"\u0ba9>\ue029;" // LETTER NNNA
|
||||
"\u0baa>\ue02a;" // LETTER PA
|
||||
"\u0bae>\ue02e;" // LETTER MA
|
||||
"\u0baf>\ue02f;" // LETTER YA
|
||||
"\u0bb0>\ue030;" // LETTER RA
|
||||
"\u0bb1>\ue031;" // LETTER RRA
|
||||
"\u0bb2>\ue032;" // LETTER LA
|
||||
"\u0bb3>\ue033;" // LETTER LLA
|
||||
"\u0bb4>\ue034;" // LETTER LLLA
|
||||
"\u0bb5>\ue035;" // LETTER VA
|
||||
"\u0bb7>\ue037;" // LETTER SSA
|
||||
"\u0bb8>\ue038;" // LETTER SA
|
||||
"\u0bb9>\ue039;" // LETTER HA
|
||||
"\u0bbe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0bbf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0bc0>\ue040;" // VOWEL SIGN II
|
||||
"\u0bc1>\ue041;" // VOWEL SIGN U
|
||||
"\u0bc2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0bc6>\ue046;" // VOWEL SIGN E
|
||||
"\u0bc7>\ue047;" // VOWEL SIGN EE
|
||||
"\u0bc8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0bca>\ue04a;" // VOWEL SIGN O
|
||||
"\u0bcb>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0bcc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0bcd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0bd7>\ue057;" // AU LENGTH MARK
|
||||
"\u0be7>\ue067;" // DIGIT ONE
|
||||
"\u0be8>\ue068;" // DIGIT TWO
|
||||
"\u0be9>\ue069;" // DIGIT THREE
|
||||
"\u0bea>\ue06a;" // DIGIT FOUR
|
||||
"\u0beb>\ue06b;" // DIGIT FIVE
|
||||
"\u0bec>\ue06c;" // DIGIT SIX
|
||||
"\u0bed>\ue06d;" // DIGIT SEVEN
|
||||
"\u0bee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0bef>\ue06f;" // DIGIT NINE
|
||||
// \u0bf0>; # UNMAPPED Tamil-InterIndic: NUMBER TEN
|
||||
// \u0bf1>; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
|
||||
// \u0bf2>; # UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
117
icu4c/data/translit_Telugu_InterIndic.txt
Normal file
117
icu4c/data/translit_Telugu_InterIndic.txt
Normal file
@ -0,0 +1,117 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Telugu_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Telugu_InterIndic
|
||||
|
||||
translit_Telugu_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Telugu_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Telugu_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:07 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Telugu-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0c01>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0c02>\ue002;" // SIGN ANUSVARA
|
||||
"\u0c03>\ue003;" // SIGN VISARGA
|
||||
"\u0c05>\ue005;" // LETTER A
|
||||
"\u0c06>\ue006;" // LETTER AA
|
||||
"\u0c07>\ue007;" // LETTER I
|
||||
"\u0c08>\ue008;" // LETTER II
|
||||
"\u0c09>\ue009;" // LETTER U
|
||||
"\u0c0a>\ue00a;" // LETTER UU
|
||||
"\u0c0b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0c0c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0c0e>\ue00e;" // LETTER E
|
||||
"\u0c0f>\ue00f;" // LETTER EE
|
||||
"\u0c10>\ue010;" // LETTER AI
|
||||
"\u0c12>\ue012;" // LETTER O
|
||||
"\u0c13>\ue013;" // LETTER OO
|
||||
"\u0c14>\ue014;" // LETTER AU
|
||||
"\u0c15>\ue015;" // LETTER KA
|
||||
"\u0c16>\ue016;" // LETTER KHA
|
||||
"\u0c17>\ue017;" // LETTER GA
|
||||
"\u0c18>\ue018;" // LETTER GHA
|
||||
"\u0c19>\ue019;" // LETTER NGA
|
||||
"\u0c1a>\ue01a;" // LETTER CA
|
||||
"\u0c1b>\ue01b;" // LETTER CHA
|
||||
"\u0c1c>\ue01c;" // LETTER JA
|
||||
"\u0c1d>\ue01d;" // LETTER JHA
|
||||
"\u0c1e>\ue01e;" // LETTER NYA
|
||||
"\u0c1f>\ue01f;" // LETTER TTA
|
||||
"\u0c20>\ue020;" // LETTER TTHA
|
||||
"\u0c21>\ue021;" // LETTER DDA
|
||||
"\u0c22>\ue022;" // LETTER DDHA
|
||||
"\u0c23>\ue023;" // LETTER NNA
|
||||
"\u0c24>\ue024;" // LETTER TA
|
||||
"\u0c25>\ue025;" // LETTER THA
|
||||
"\u0c26>\ue026;" // LETTER DA
|
||||
"\u0c27>\ue027;" // LETTER DHA
|
||||
"\u0c28>\ue028;" // LETTER NA
|
||||
"\u0c2a>\ue02a;" // LETTER PA
|
||||
"\u0c2b>\ue02b;" // LETTER PHA
|
||||
"\u0c2c>\ue02c;" // LETTER BA
|
||||
"\u0c2d>\ue02d;" // LETTER BHA
|
||||
"\u0c2e>\ue02e;" // LETTER MA
|
||||
"\u0c2f>\ue02f;" // LETTER YA
|
||||
"\u0c30>\ue030;" // LETTER RA
|
||||
"\u0c31>\ue031;" // LETTER RRA
|
||||
"\u0c32>\ue032;" // LETTER LA
|
||||
"\u0c33>\ue033;" // LETTER LLA
|
||||
"\u0c35>\ue035;" // LETTER VA
|
||||
"\u0c36>\ue036;" // LETTER SHA
|
||||
"\u0c37>\ue037;" // LETTER SSA
|
||||
"\u0c38>\ue038;" // LETTER SA
|
||||
"\u0c39>\ue039;" // LETTER HA
|
||||
"\u0c3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0c3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0c40>\ue040;" // VOWEL SIGN II
|
||||
"\u0c41>\ue041;" // VOWEL SIGN U
|
||||
"\u0c42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0c43>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0c44>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0c46>\ue046;" // VOWEL SIGN E
|
||||
"\u0c47>\ue047;" // VOWEL SIGN EE
|
||||
"\u0c48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0c4a>\ue04a;" // VOWEL SIGN O
|
||||
"\u0c4b>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0c4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0c4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0c55>\ue055;" // LENGTH MARK
|
||||
"\u0c56>\ue056;" // AI LENGTH MARK
|
||||
"\u0c60>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0c61>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0c66>\ue066;" // DIGIT ZERO
|
||||
"\u0c67>\ue067;" // DIGIT ONE
|
||||
"\u0c68>\ue068;" // DIGIT TWO
|
||||
"\u0c69>\ue069;" // DIGIT THREE
|
||||
"\u0c6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0c6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0c6c>\ue06c;" // DIGIT SIX
|
||||
"\u0c6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0c6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0c6f>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: Transliterator_index.txt
|
||||
// Date: Fri Mar 2 12:50:49 2001
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
@ -61,77 +61,70 @@ translit_index {
|
||||
|
||||
// Bidirectional rule files
|
||||
|
||||
{ "Fullwidth-Halfwidth", "file", "fullhalf", "FORWARD" },
|
||||
{ "Halfwidth-Fullwidth", "file", "fullhalf", "REVERSE" },
|
||||
{ "Fullwidth-Halfwidth", "file", "translit_Fullwidth_Halfwidth", "FORWARD" },
|
||||
{ "Halfwidth-Fullwidth", "file", "translit_Fullwidth_Halfwidth", "REVERSE" },
|
||||
|
||||
{ "Latin-Arabic", "file", "larabic", "FORWARD" },
|
||||
{ "Arabic-Latin", "file", "larabic", "REVERSE" },
|
||||
{ "Latin-Cyrillic", "file", "translit_Cyrillic_Latin", "REVERSE" },
|
||||
{ "Cyrillic-Latin", "file", "translit_Cyrillic_Latin", "FORWARD" },
|
||||
|
||||
{ "Latin-Cyrillic", "file", "lcyril", "FORWARD" },
|
||||
{ "Cyrillic-Latin", "file", "lcyril", "REVERSE" },
|
||||
{ "Latin-Greek", "file", "translit_Greek_Latin", "REVERSE" },
|
||||
{ "Greek-Latin", "file", "translit_Greek_Latin", "FORWARD" },
|
||||
|
||||
//{ "Latin-Devanagari", "file", "ldevan", "FORWARD" },
|
||||
//{ "Devanagari-Latin", "file", "ldevan", "REVERSE" },
|
||||
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
|
||||
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
|
||||
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },
|
||||
|
||||
{ "Latin-Greek", "file", "lgreek", "FORWARD" },
|
||||
{ "Greek-Latin", "file", "lgreek", "REVERSE" },
|
||||
{ "Latin-Katakana", "file", "translit_Latin_Katakana", "FORWARD" },
|
||||
{ "Katakana-Latin", "file", "translit_Latin_Katakana", "REVERSE" },
|
||||
|
||||
{ "Latin-Hebrew", "file", "lhebrew", "FORWARD" },
|
||||
{ "Hebrew-Latin", "file", "lhebrew", "REVERSE" },
|
||||
{ "Latin-Hiragana", "file", "translit_Hiragana_Latin", "REVERSE" },
|
||||
{ "Hiragana-Latin", "file", "translit_Hiragana_Latin", "FORWARD" },
|
||||
|
||||
{ "Latin-Jamo", "file", "ljamo", "FORWARD" },
|
||||
{ "Jamo-Latin", "file", "ljamo", "REVERSE" },
|
||||
{ "Hiragana-Katakana", "file", "translit_Hiragana_Katakana", "FORWARD" },
|
||||
{ "Katakana-Hiragana", "file", "translit_Hiragana_Katakana", "REVERSE" },
|
||||
|
||||
{ "Latin-Kana", "file", "lkana", "FORWARD" },
|
||||
{ "Kana-Latin", "file", "lkana", "REVERSE" },
|
||||
{ "Any-Accents", "file", "translit_Any_Accents", "FORWARD" },
|
||||
{ "Accents-Any", "file", "translit_Any_Accents", "REVERSE" },
|
||||
|
||||
{ "Hiragana-Katakana", "file", "kana", "FORWARD" },
|
||||
{ "Katakana-Hiragana", "file", "kana", "REVERSE" },
|
||||
|
||||
{ "StraightQuotes-CurlyQuotes", "file", "quotes", "FORWARD" },
|
||||
{ "CurlyQuotes-StraightQuotes", "file", "quotes", "REVERSE" },
|
||||
{ "Any-Publishing", "file", "translit_Any_Publishing", "FORWARD" },
|
||||
{ "Publishing-Any", "file", "translit_Any_Publishing", "REVERSE" },
|
||||
|
||||
// One way rules (forward only)
|
||||
|
||||
// Java only: { "Han-Pinyin", "file", "-", "FORWARD" },
|
||||
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
|
||||
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
|
||||
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
|
||||
|
||||
// Replaced by algorithmic transliterator:
|
||||
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
|
||||
|
||||
// Compound rules
|
||||
|
||||
/// TODO
|
||||
{ "Latin-Hangul", "alias", "[:Latin:];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
|
||||
{ "Latin-Hangul", "alias", "[\p{Latin}];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "[\uAC00-\uD7AF];NFD;Jamo-Latin", "" },
|
||||
|
||||
// Inter-Indic composed rules
|
||||
{ "Latin-InterIndic", "internal", "Latin_InterIndic", "FORWARD" },
|
||||
{ "Devanagari-InterIndic", "internal", "Devanagari_InterIndic", "FORWARD" },
|
||||
{ "Bengali-InterIndic", "internal", "Bengali_InterIndic", "FORWARD" },
|
||||
{ "Gurmukhi-InterIndic", "internal", "Gurmukhi_InterIndic", "FORWARD" },
|
||||
{ "Gujarati-InterIndic", "internal", "Gujarati_InterIndic", "FORWARD" },
|
||||
{ "Oriya-InterIndic", "internal", "Oriya_InterIndic", "FORWARD" },
|
||||
{ "Tamil-InterIndic", "internal", "Tamil_InterIndic", "FORWARD" },
|
||||
{ "Telugu-InterIndic", "internal", "Telugu_InterIndic", "FORWARD" },
|
||||
{ "Kannada-InterIndic", "internal", "Kannada_InterIndic", "FORWARD" },
|
||||
{ "Malayalam-InterIndic", "internal", "Malayalam_InterIndic", "FORWARD" },
|
||||
{ "Latin-InterIndic", "internal", "translit_Latin_InterIndic", "FORWARD" },
|
||||
{ "Devanagari-InterIndic", "internal", "translit_Devanagari_InterIndic", "FORWARD" },
|
||||
{ "Bengali-InterIndic", "internal", "translit_Bengali_InterIndic", "FORWARD" },
|
||||
{ "Gurmukhi-InterIndic", "internal", "translit_Gurmukhi_InterIndic", "FORWARD" },
|
||||
{ "Gujarati-InterIndic", "internal", "translit_Gujarati_InterIndic", "FORWARD" },
|
||||
{ "Oriya-InterIndic", "internal", "translit_Oriya_InterIndic", "FORWARD" },
|
||||
{ "Tamil-InterIndic", "internal", "translit_Tamil_InterIndic", "FORWARD" },
|
||||
{ "Telugu-InterIndic", "internal", "translit_Telugu_InterIndic", "FORWARD" },
|
||||
{ "Kannada-InterIndic", "internal", "translit_Kannada_InterIndic", "FORWARD" },
|
||||
{ "Malayalam-InterIndic", "internal", "translit_Malayalam_InterIndic", "FORWARD" },
|
||||
|
||||
{ "InterIndic-Latin", "internal", "InterIndic_Latin", "FORWARD" },
|
||||
{ "InterIndic-Devanagari", "internal", "InterIndic_Devanagari", "FORWARD" },
|
||||
{ "InterIndic-Bengali", "internal", "InterIndic_Bengali", "FORWARD" },
|
||||
{ "InterIndic-Gurmukhi", "internal", "InterIndic_Gurmukhi", "FORWARD" },
|
||||
{ "InterIndic-Gujarati", "internal", "InterIndic_Gujarati", "FORWARD" },
|
||||
{ "InterIndic-Oriya", "internal", "InterIndic_Oriya", "FORWARD" },
|
||||
{ "InterIndic-Tamil", "internal", "InterIndic_Tamil", "FORWARD" },
|
||||
{ "InterIndic-Telugu", "internal", "InterIndic_Telugu", "FORWARD" },
|
||||
{ "InterIndic-Kannada", "internal", "InterIndic_Kannada", "FORWARD" },
|
||||
{ "InterIndic-Malayalam", "internal", "InterIndic_Malayalam", "FORWARD" },
|
||||
{ "InterIndic-Latin", "internal", "translit_InterIndic_Latin", "FORWARD" },
|
||||
{ "InterIndic-Devanagari", "internal", "translit_InterIndic_Devanagari", "FORWARD" },
|
||||
{ "InterIndic-Bengali", "internal", "translit_InterIndic_Bengali", "FORWARD" },
|
||||
{ "InterIndic-Gurmukhi", "internal", "translit_InterIndic_Gurmukhi", "FORWARD" },
|
||||
{ "InterIndic-Gujarati", "internal", "translit_InterIndic_Gujarati", "FORWARD" },
|
||||
{ "InterIndic-Oriya", "internal", "translit_InterIndic_Oriya", "FORWARD" },
|
||||
{ "InterIndic-Tamil", "internal", "translit_InterIndic_Tamil", "FORWARD" },
|
||||
{ "InterIndic-Telugu", "internal", "translit_InterIndic_Telugu", "FORWARD" },
|
||||
{ "InterIndic-Kannada", "internal", "translit_InterIndic_Kannada", "FORWARD" },
|
||||
{ "InterIndic-Malayalam", "internal", "translit_InterIndic_Malayalam", "FORWARD" },
|
||||
|
||||
//Latin-X transliterators
|
||||
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
//Latin-Indic transliterators
|
||||
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "NFD;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "NFD;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "NFD;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
@ -141,8 +134,8 @@ translit_index {
|
||||
{ "Latin-Kannada", "alias", "NFD;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "NFD;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
//X-Latin transliterators
|
||||
{ "Devanagari-Latin","alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
//Indic-Latin transliterators
|
||||
{ "Devanagari-Latin", "alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Bengali-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Gurmukhi-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Gujarati-Latin", "alias", "NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
@ -152,7 +145,6 @@ translit_index {
|
||||
{ "Kannada-Latin", "alias", "NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Malayalam-Latin", "alias", "NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
|
||||
|
||||
{ "Devanagari-Bengali", "alias", "NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Devanagari-Gurmukhi", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Devanagari-Gujarati", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
@ -177,7 +169,7 @@ translit_index {
|
||||
{ "Gurmukhi-Telugu", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Gurmukhi-Kannada", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Gurmukhi-Malayalam", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
{ "Gujarati-Devanagari", "alias", "NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Gujarati-Devanagari", "alias", "Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Gujarati-Bengali", "alias", "NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Gujarati-Gurmukhi", "alias", "NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Gujarati-Oriya", "alias", "NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
@ -227,5 +219,6 @@ translit_index {
|
||||
{ "Malayalam-Kannada", "alias", "NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
|
||||
// eof
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -103,28 +103,34 @@ uk.txt uk_UA.txt\
|
||||
vi.txt vi_VN.txt\
|
||||
zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
|
||||
|
||||
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
|
||||
larabic.txt lcyril.txt\
|
||||
lgreek.txt lhebrew.txt ljamo.txt\
|
||||
lkana.txt quotes.txt\
|
||||
Bengali_InterIndic.txt\
|
||||
Devanagari_InterIndic.txt\
|
||||
Gujarati_InterIndic.txt\
|
||||
Gurmukhi_InterIndic.txt\
|
||||
Kannada_InterIndic.txt\
|
||||
Malayalam_InterIndic.txt\
|
||||
Oriya_InterIndic.txt\
|
||||
Tamil_InterIndic.txt\
|
||||
Telugu_InterIndic.txt\
|
||||
InterIndic_Bengali.txt\
|
||||
InterIndic_Devanagari.txt\
|
||||
InterIndic_Gujarati.txt\
|
||||
InterIndic_Gurmukhi.txt\
|
||||
InterIndic_Kannada.txt\
|
||||
InterIndic_Malayalam.txt\
|
||||
InterIndic_Oriya.txt\
|
||||
InterIndic_Tamil.txt\
|
||||
InterIndic_Telugu.txt\
|
||||
Latin_InterIndic.txt\
|
||||
InterIndic_Latin.txt
|
||||
TRANSLIT_SOURCE=translit_Any_Accents.txt\
|
||||
translit_Any_Publishing.txt\
|
||||
translit_Bengali_InterIndic.txt\
|
||||
translit_Cyrillic_Latin.txt\
|
||||
translit_Devanagari_InterIndic.txt\
|
||||
translit_Fullwidth_Halfwidth.txt\
|
||||
translit_Greek_Latin.txt\
|
||||
translit_Gujarati_InterIndic.txt\
|
||||
translit_Gurmukhi_InterIndic.txt\
|
||||
translit_Hiragana_Katakana.txt\
|
||||
translit_Hiragana_Latin.txt\
|
||||
translit_InterIndic_Bengali.txt\
|
||||
translit_InterIndic_Devanagari.txt\
|
||||
translit_InterIndic_Gujarati.txt\
|
||||
translit_InterIndic_Gurmukhi.txt\
|
||||
translit_InterIndic_Kannada.txt\
|
||||
translit_InterIndic_Latin.txt\
|
||||
translit_InterIndic_Malayalam.txt\
|
||||
translit_InterIndic_Oriya.txt\
|
||||
translit_InterIndic_Tamil.txt\
|
||||
translit_InterIndic_Telugu.txt\
|
||||
translit_Kannada_InterIndic.txt\
|
||||
translit_Latin_InterIndic.txt\
|
||||
translit_Latin_Jamo.txt\
|
||||
translit_Latin_Katakana.txt\
|
||||
translit_Malayalam_InterIndic.txt\
|
||||
translit_Oriya_InterIndic.txt\
|
||||
translit_Tamil_InterIndic.txt\
|
||||
translit_Telugu_InterIndic.txt\
|
||||
translit_index.txt
|
||||
|
||||
|
311
icu4c/source/data/translit/t_Any_Accents.txt
Normal file
311
icu4c/source/data/translit/t_Any_Accents.txt
Normal file
@ -0,0 +1,311 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Accents.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Accents
|
||||
|
||||
translit_Any_Accents {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Any_Accents.txt,v $
|
||||
// $Date: 2001/10/26 05:41:15 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
// to do: make reversible
|
||||
|
||||
// define special conversion characters.
|
||||
// varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
"$pre = \< ;"
|
||||
"$post = \> ;"
|
||||
|
||||
// Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
"$pre \` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
|
||||
"$pre \. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
|
||||
// Combine common characters
|
||||
|
||||
"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE
|
||||
"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE
|
||||
"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH
|
||||
"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH
|
||||
"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE
|
||||
"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE
|
||||
"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN
|
||||
"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN
|
||||
"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE
|
||||
"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE
|
||||
|
||||
"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S
|
||||
|
||||
"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG
|
||||
"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG
|
||||
|
||||
"$pre T $post <> \u0398 ;" // THETA
|
||||
"$pre t $post <> \u03B8 ;" // THETA
|
||||
"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH
|
||||
"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH
|
||||
"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH
|
||||
"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH
|
||||
|
||||
"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON
|
||||
"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON
|
||||
"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA
|
||||
"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA
|
||||
"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O
|
||||
"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O
|
||||
"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E
|
||||
"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E
|
||||
|
||||
// three that don't have uppercases
|
||||
|
||||
"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP
|
||||
"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I
|
||||
"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V
|
||||
|
||||
// Additional Characters that may be added in the future
|
||||
|
||||
// $pre XXX $post <> \u0306 ; # COMBINING BREVE
|
||||
// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
|
||||
// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
|
||||
// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
|
||||
// $pre XXX $post <> \u030C ; # COMBINING CARON
|
||||
// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
|
||||
// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
|
||||
// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
|
||||
// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
|
||||
// $pre XXX $post <> \u031B ; # COMBINING HORN
|
||||
// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
|
||||
// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
|
||||
// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
|
||||
// $pre XXX $post <> \u0328 ; # COMBINING OGONEK
|
||||
// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
|
||||
// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
|
||||
// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
|
||||
|
||||
// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
|
||||
// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
|
||||
// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
|
||||
// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
|
||||
// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
|
||||
// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
|
||||
// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||
// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
|
||||
// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
|
||||
// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
|
||||
// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
|
||||
// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
|
||||
// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
|
||||
// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
|
||||
// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
|
||||
// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
|
||||
// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
|
||||
// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
|
||||
// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
|
||||
// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
|
||||
// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
|
||||
// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
|
||||
// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
|
||||
// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
|
||||
// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
|
||||
// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
|
||||
// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
|
||||
// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
|
||||
// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
|
||||
// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
|
||||
// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
|
||||
// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
|
||||
// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
|
||||
// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
|
||||
// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
|
||||
// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
|
||||
// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
|
||||
// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
|
||||
// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
|
||||
// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
|
||||
// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
|
||||
// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
|
||||
// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
|
||||
// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
|
||||
// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
|
||||
// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
|
||||
// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
|
||||
// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
|
||||
// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
|
||||
// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
|
||||
// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
|
||||
// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
|
||||
// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
|
||||
// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
|
||||
// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
|
||||
// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
|
||||
// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
|
||||
// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
|
||||
// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
|
||||
// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
|
||||
// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
|
||||
// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
|
||||
// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
|
||||
// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
|
||||
// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
|
||||
// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
|
||||
// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
|
||||
// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
|
||||
// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
|
||||
// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
|
||||
// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
|
||||
// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
|
||||
// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
|
||||
// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
|
||||
// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
|
||||
// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
|
||||
// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
|
||||
// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
|
||||
// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
|
||||
// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
|
||||
// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
|
||||
// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
|
||||
// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
|
||||
// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
|
||||
// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
|
||||
// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
|
||||
// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
|
||||
// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
|
||||
// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
|
||||
// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
|
||||
// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
|
||||
// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
|
||||
// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
|
||||
// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
|
||||
// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
|
||||
// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
|
||||
// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
|
||||
// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
|
||||
// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
|
||||
// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
|
||||
// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
|
||||
// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
|
||||
// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
|
||||
// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
|
||||
// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
|
||||
// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
|
||||
// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
|
||||
// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
|
||||
// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
|
||||
// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
|
||||
// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
|
||||
// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
|
||||
// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
|
||||
// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
|
||||
// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
|
||||
// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
|
||||
// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
|
||||
// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
|
||||
// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
|
||||
// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
|
||||
// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
|
||||
// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
|
||||
// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
|
||||
// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
|
||||
// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
|
||||
// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
|
||||
// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
|
||||
// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
|
||||
// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
|
||||
// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
|
||||
// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
|
||||
// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
|
||||
// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
|
||||
// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
|
||||
// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
|
||||
// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
|
||||
// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
|
||||
// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
|
||||
// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
|
||||
// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
|
||||
// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
|
||||
// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
|
||||
// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
|
||||
// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
|
||||
// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
|
||||
// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
|
||||
// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
|
||||
// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
|
||||
// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
|
||||
// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
|
||||
// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
|
||||
// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
|
||||
// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
|
||||
// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
|
||||
// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
|
||||
// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
|
||||
// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
|
||||
// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
}
|
||||
}
|
55
icu4c/source/data/translit/t_Any_Publishing.txt
Normal file
55
icu4c/source/data/translit/t_Any_Publishing.txt
Normal file
@ -0,0 +1,55 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Publishing.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Publishing
|
||||
|
||||
translit_Any_Publishing {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Any_Publishing.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Test case
|
||||
// "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
|
||||
|
||||
// Variables
|
||||
|
||||
"$single = \' ;"
|
||||
"$space = ' ' ;"
|
||||
"$double = \" ;"
|
||||
"$back = \` ;"
|
||||
"$tab = '\u0008' ;"
|
||||
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
|
||||
|
||||
// fix UNIX quotes
|
||||
|
||||
"$back $back > “ ;"
|
||||
"$back > ‘ ;"
|
||||
|
||||
// fix typewriter quotes, by context
|
||||
|
||||
"$makeRight {$double} <> “ ;"
|
||||
"$double <> ” ;"
|
||||
|
||||
"$makeRight {$single} <> ‘ ;"
|
||||
"$single <> ’;"
|
||||
|
||||
// fix multiple spaces and hyphens
|
||||
|
||||
"$space {$space} > ;"
|
||||
"'--' <> — ;"
|
||||
}
|
||||
}
|
121
icu4c/source/data/translit/t_Beng_InterIndic.txt
Normal file
121
icu4c/source/data/translit/t_Beng_InterIndic.txt
Normal file
@ -0,0 +1,121 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Bengali_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Bengali_InterIndic
|
||||
|
||||
translit_Bengali_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Bengali_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:57 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Bengali-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0981>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0982>\ue002;" // SIGN ANUSVARA
|
||||
"\u0983>\ue003;" // SIGN VISARGA
|
||||
"\u0985>\ue005;" // LETTER A
|
||||
"\u0986>\ue006;" // LETTER AA
|
||||
"\u0987>\ue007;" // LETTER I
|
||||
"\u0988>\ue008;" // LETTER II
|
||||
"\u0989>\ue009;" // LETTER U
|
||||
"\u098a>\ue00a;" // LETTER UU
|
||||
"\u098b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u098c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u098f>\ue00f;" // LETTER E
|
||||
"\u0990>\ue010;" // LETTER AI
|
||||
"\u0993>\ue013;" // LETTER O
|
||||
"\u0994>\ue014;" // LETTER AU
|
||||
"\u0995>\ue015;" // LETTER KA
|
||||
"\u0996>\ue016;" // LETTER KHA
|
||||
"\u0997>\ue017;" // LETTER GA
|
||||
"\u0998>\ue018;" // LETTER GHA
|
||||
"\u0999>\ue019;" // LETTER NGA
|
||||
"\u099a>\ue01a;" // LETTER CA
|
||||
"\u099b>\ue01b;" // LETTER CHA
|
||||
"\u099c>\ue01c;" // LETTER JA
|
||||
"\u099d>\ue01d;" // LETTER JHA
|
||||
"\u099e>\ue01e;" // LETTER NYA
|
||||
"\u099f>\ue01f;" // LETTER TTA
|
||||
"\u09a0>\ue020;" // LETTER TTHA
|
||||
"\u09a1>\ue021;" // LETTER DDA
|
||||
"\u09a2>\ue022;" // LETTER DDHA
|
||||
"\u09a3>\ue023;" // LETTER NNA
|
||||
"\u09a4>\ue024;" // LETTER TA
|
||||
"\u09a5>\ue025;" // LETTER THA
|
||||
"\u09a6>\ue026;" // LETTER DA
|
||||
"\u09a7>\ue027;" // LETTER DHA
|
||||
"\u09a8>\ue028;" // LETTER NA
|
||||
"\u09aa>\ue02a;" // LETTER PA
|
||||
"\u09ab>\ue02b;" // LETTER PHA
|
||||
"\u09ac>\ue02c;" // LETTER BA
|
||||
"\u09ad>\ue02d;" // LETTER BHA
|
||||
"\u09ae>\ue02e;" // LETTER MA
|
||||
"\u09af>\ue02f;" // LETTER YA
|
||||
"\u09b0>\ue030;" // LETTER RA
|
||||
"\u09b2>\ue032;" // LETTER LA
|
||||
"\u09b6>\ue036;" // LETTER SHA
|
||||
"\u09b7>\ue037;" // LETTER SSA
|
||||
"\u09b8>\ue038;" // LETTER SA
|
||||
"\u09b9>\ue039;" // LETTER HA
|
||||
"\u09bc>\ue03c;" // SIGN NUKTA
|
||||
"\u09be>\ue03e;" // VOWEL SIGN AA
|
||||
"\u09bf>\ue03f;" // VOWEL SIGN I
|
||||
"\u09c0>\ue040;" // VOWEL SIGN II
|
||||
"\u09c1>\ue041;" // VOWEL SIGN U
|
||||
"\u09c2>\ue042;" // VOWEL SIGN UU
|
||||
"\u09c3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u09c4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u09c7>\ue047;" // VOWEL SIGN E
|
||||
"\u09c8>\ue048;" // VOWEL SIGN AI
|
||||
"\u09cb>\ue04b;" // VOWEL SIGN O
|
||||
"\u09cc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u09cd>\ue04d;" // SIGN VIRAMA
|
||||
"\u09d7>\ue057;" // AU LENGTH MARK
|
||||
"\u09dc>\ue053;" // LETTER RRA
|
||||
"\u09dd>\ue05d;" // LETTER RHA
|
||||
"\u09df>\ue05f;" // LETTER YYA
|
||||
"\u09e0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u09e1>\ue061;" // LETTER VOCALIC LL
|
||||
"\u09e2>\ue062;" // VOWEL SIGN VOCALIC L
|
||||
"\u09e3>\ue063;" // VOWEL SIGN VOCALIC LL
|
||||
"\u09e6>\ue066;" // DIGIT ZERO
|
||||
"\u09e7>\ue067;" // DIGIT ONE
|
||||
"\u09e8>\ue068;" // DIGIT TWO
|
||||
"\u09e9>\ue069;" // DIGIT THREE
|
||||
"\u09ea>\ue06a;" // DIGIT FOUR
|
||||
"\u09eb>\ue06b;" // DIGIT FIVE
|
||||
"\u09ec>\ue06c;" // DIGIT SIX
|
||||
"\u09ed>\ue06d;" // DIGIT SEVEN
|
||||
"\u09ee>\ue06e;" // DIGIT EIGHT
|
||||
"\u09ef>\ue06f;" // DIGIT NINE
|
||||
// \u09f0>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
|
||||
// \u09f1>; # UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
|
||||
// \u09f2>; # UNMAPPED Bengali-InterIndic: RUPEE MARK
|
||||
// \u09f3>; # UNMAPPED Bengali-InterIndic: RUPEE SIGN
|
||||
// \u09f4>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
|
||||
// \u09f5>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR TWO
|
||||
// \u09f6>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR THREE
|
||||
// \u09f7>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
|
||||
// \u09f8>; # UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
// \u09f9>; # UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
|
||||
"\u09fa>\ue070;" // ISSHAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
316
icu4c/source/data/translit/t_Cyrl_Latn.txt
Normal file
316
icu4c/source/data/translit/t_Cyrl_Latn.txt
Normal file
@ -0,0 +1,316 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Cyrillic_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Cyrillic_Latin
|
||||
|
||||
translit_Cyrillic_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Cyrl_Latn.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
// TODO: add remaining characters
|
||||
// Should add variants for Russian-English, Russian-German
|
||||
// Those can use this as a base, and then remap cases
|
||||
// like a $hat to ya or ja.
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
"$modprime = \u02B9;"
|
||||
"$modprime2 = \u02BA;"
|
||||
|
||||
"$grave = \u0300;"
|
||||
"$acute = \u0301;"
|
||||
"$hat = \u0302;"
|
||||
"$breve = \u0306 ;"
|
||||
"$dot = \u0307 ;"
|
||||
"$caron = \u030C ;"
|
||||
"$comma = \u0326 ;"
|
||||
|
||||
// move up so not masked
|
||||
|
||||
"я <> a $hat ;" // CYRILLIC SMALL LETTER YA
|
||||
"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA
|
||||
|
||||
"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE
|
||||
"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE
|
||||
// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
"э <> e $acute;" // CYRILLIC SMALL LETTER E
|
||||
"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E
|
||||
"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
|
||||
"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA
|
||||
"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA
|
||||
"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA
|
||||
"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA
|
||||
|
||||
"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE
|
||||
"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE
|
||||
// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
|
||||
"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU
|
||||
"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU
|
||||
|
||||
"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
"ј <> j $caron;" // CYRILLIC SMALL LETTER JE
|
||||
"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE
|
||||
|
||||
"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE
|
||||
"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE
|
||||
"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE
|
||||
"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE
|
||||
|
||||
"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE
|
||||
"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE
|
||||
|
||||
"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE
|
||||
"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE
|
||||
|
||||
// Normal order
|
||||
|
||||
"а <> a ;" // CYRILLIC SMALL LETTER A
|
||||
"А <> A ;" // CYRILLIC CAPITAL LETTER A
|
||||
"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA
|
||||
"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA
|
||||
"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE
|
||||
"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE
|
||||
"б <> b ;" // CYRILLIC SMALL LETTER BE
|
||||
"Б <> B ;" // CYRILLIC CAPITAL LETTER BE
|
||||
"в <> v ;" // CYRILLIC SMALL LETTER VE
|
||||
"В <> V ;" // CYRILLIC CAPITAL LETTER VE
|
||||
|
||||
"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE
|
||||
"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
|
||||
"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
"г <> g ;" // CYRILLIC SMALL LETTER GHE
|
||||
"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE
|
||||
|
||||
"д <> d;" // CYRILLIC SMALL LETTER DE
|
||||
"Д <> D;" // CYRILLIC CAPITAL LETTER DE
|
||||
"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE
|
||||
"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE
|
||||
"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER
|
||||
"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
"е <> e ;" // CYRILLIC SMALL LETTER IE
|
||||
"Е <> E;" // CYRILLIC CAPITAL LETTER IE
|
||||
|
||||
"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE
|
||||
"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE
|
||||
|
||||
// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
"з <> z ;" // CYRILLIC SMALL LETTER ZE
|
||||
"З <> Z;" // CYRILLIC CAPITAL LETTER ZE
|
||||
|
||||
"й <> j ;" // CYRILLIC SMALL LETTER I
|
||||
"Й <> J ;" // CYRILLIC CAPITAL LETTER I
|
||||
"и <> i ;" // CYRILLIC SMALL LETTER I
|
||||
"И <> I ;" // CYRILLIC CAPITAL LETTER I
|
||||
|
||||
"к <> k ;" // CYRILLIC SMALL LETTER KA
|
||||
"К <> K;" // CYRILLIC CAPITAL LETTER KA
|
||||
|
||||
// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
"л <> l ;" // CYRILLIC SMALL LETTER EL
|
||||
"Л <> L;" // CYRILLIC CAPITAL LETTER EL
|
||||
|
||||
"м <> m ;" // CYRILLIC SMALL LETTER EM
|
||||
"М <> M ;" // CYRILLIC CAPITAL LETTER EM
|
||||
"н <> n ;" // CYRILLIC SMALL LETTER EN
|
||||
"Н <> N;" // CYRILLIC CAPITAL LETTER EN
|
||||
// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
"о <> o ;" // CYRILLIC SMALL LETTER O
|
||||
"О <> O ;" // CYRILLIC CAPITAL LETTER O
|
||||
// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
"п <> p ;" // CYRILLIC SMALL LETTER PE
|
||||
"П <> P ;" // CYRILLIC CAPITAL LETTER PE
|
||||
// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
||||
// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
"р <> r ;" // CYRILLIC SMALL LETTER ER
|
||||
"Р <> R ;" // CYRILLIC CAPITAL LETTER ER
|
||||
// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
"с <> s ;" // CYRILLIC SMALL LETTER ES
|
||||
"С <> S ;" // CYRILLIC CAPITAL LETTER ES
|
||||
// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
"т <> t ;" // CYRILLIC SMALL LETTER TE
|
||||
"Т <> T ;" // CYRILLIC CAPITAL LETTER TE
|
||||
// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
"у <> u ;" // CYRILLIC SMALL LETTER U
|
||||
"У <> U ;" // CYRILLIC CAPITAL LETTER U
|
||||
// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
||||
// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
||||
"ф <> f ;" // CYRILLIC SMALL LETTER EF
|
||||
"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF
|
||||
"х <> h ;" // CYRILLIC SMALL LETTER HA
|
||||
"Х <> H;" // CYRILLIC CAPITAL LETTER HA
|
||||
// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
"ц <> c ;" // CYRILLIC SMALL LETTER TSE
|
||||
"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE
|
||||
// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||||
// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
|
||||
|
||||
"ъ <> [:Ll:] { $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN
|
||||
"Ъ <> $modprime2 ;" // CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
"ы <> y ;" // CYRILLIC SMALL LETTER YERU
|
||||
"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU
|
||||
"ь <> [:Ll:] { $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN
|
||||
"Ь <> $modprime ;" // CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
|
||||
// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
||||
// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
||||
|
||||
// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
||||
// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
||||
// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
|
||||
// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
|
||||
// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
||||
// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
||||
// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
||||
// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
||||
// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
||||
// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
||||
// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
||||
// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
||||
// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
||||
//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
|
||||
//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
|
||||
//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
|
||||
//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ё <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
|
||||
//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
|
||||
//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O
|
||||
//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
|
||||
//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA
|
||||
//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
|
||||
//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ў <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
|
||||
//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
|
||||
//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
|
||||
//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
|
||||
//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E
|
||||
//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
|
||||
//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
|
||||
// Completeness
|
||||
"$ignore = [[:Mark:]''] * ;"
|
||||
"| k < q ;"
|
||||
"| K < Q ;"
|
||||
"| u < w ;"
|
||||
"| U < W ;"
|
||||
"| KS < X } $ignore [:UppercaseLetter:] ;"
|
||||
"| KS < [:UppercaseLetter:] $ignore { X ;"
|
||||
"| Ks < X ;"
|
||||
"| ks < x ;"
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
}
|
||||
}
|
147
icu4c/source/data/translit/t_Deva_InterIndic.txt
Normal file
147
icu4c/source/data/translit/t_Deva_InterIndic.txt
Normal file
@ -0,0 +1,147 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Devanagari_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Devanagari_InterIndic
|
||||
|
||||
translit_Devanagari_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Devanagari_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:57 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Devanagari-InterIndic
|
||||
// :: NFD;
|
||||
//Rules for Decomposed characters
|
||||
"\u0928\u093c > \ue029;" //\u0929
|
||||
"\u0930\u093c > \ue031;" //\u0932
|
||||
"\u0933\u093c > \ue034;" //\u0934
|
||||
"\u0915\u093c > \ue058;" //\u0958 LETTER QA (For Urdu)
|
||||
"\u0916\u093c > \ue059;" //\u0959 LETTER KHHA (For Urdu)
|
||||
"\u0917\u093c > \ue05a;" //\u095a LETTER GHHA (For Urdu)
|
||||
"\u091c\u093c > \ue05b;" //\u095b LETTER ZA (For Urdu)
|
||||
"\u0921\u093c > \ue05c;" //\u095c LETTER DDDHA (pronounced RRA)
|
||||
"\u0922\u093c > \ue05d;" //\u095d LETTER RHA (pronounced RRHA)
|
||||
"\u092b\u093c > \ue05e;" //\u095e LETTER FA
|
||||
"\u092f\u093c > \ue05f;" //\u095f LETTER YYA
|
||||
"\u0901>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0902>\ue002;" // SIGN ANUSVARA
|
||||
"\u0903>\ue003;" // SIGN VISARGA
|
||||
"\u0905>\ue005;" // LETTER A
|
||||
"\u0906>\ue006;" // LETTER AA
|
||||
"\u0907>\ue007;" // LETTER I
|
||||
"\u0908>\ue008;" // LETTER II
|
||||
"\u0909>\ue009;" // LETTER U
|
||||
"\u090a>\ue00a;" // LETTER UU
|
||||
"\u090b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u090c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u090d>\ue00d;" // LETTER CANDRA E (For representing English sounds)
|
||||
//\u090e>\ue00e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
"\u090f>\ue00f;" // LETTER E
|
||||
"\u0910>\ue010;" // LETTER AI
|
||||
"\u0911>\ue011;" // LETTER CANDRA O (For representing English sounds)
|
||||
//\u0912>\ue012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
"\u0913>\ue013;" // LETTER O
|
||||
"\u0914>\ue014;" // LETTER AU
|
||||
"\u0915>\ue015;" // LETTER KA
|
||||
"\u0916>\ue016;" // LETTER KHA
|
||||
"\u0917>\ue017;" // LETTER GA
|
||||
"\u0918>\ue018;" // LETTER GHA
|
||||
"\u0919>\ue019;" // LETTER NGA
|
||||
"\u091a>\ue01a;" // LETTER CA
|
||||
"\u091b>\ue01b;" // LETTER CHA
|
||||
"\u091c>\ue01c;" // LETTER JA
|
||||
"\u091d>\ue01d;" // LETTER JHA
|
||||
"\u091e>\ue01e;" // LETTER NYA
|
||||
"\u091f>\ue01f;" // LETTER TTA
|
||||
"\u0920>\ue020;" // LETTER TTHA
|
||||
"\u0921>\ue021;" // LETTER DDA
|
||||
"\u0922>\ue022;" // LETTER DDHA
|
||||
"\u0923>\ue023;" // LETTER NNA
|
||||
"\u0924>\ue024;" // LETTER TA
|
||||
"\u0925>\ue025;" // LETTER THA
|
||||
"\u0926>\ue026;" // LETTER DA
|
||||
"\u0927>\ue027;" // LETTER DHA
|
||||
"\u0928>\ue028;" // LETTER NA
|
||||
"\u0929>\ue029;" // LETTER NNNA
|
||||
"\u092a>\ue02a;" // LETTER PA
|
||||
"\u092b>\ue02b;" // LETTER PHA
|
||||
"\u092c>\ue02c;" // LETTER BA
|
||||
"\u092d>\ue02d;" // LETTER BHA
|
||||
"\u092e>\ue02e;" // LETTER MA
|
||||
"\u092f>\ue02f;" // LETTER YA
|
||||
"\u0930>\ue030;" // LETTER RA
|
||||
//\u0931>\ue031; # UNMAPPED LETTER RRA (Eyelash RA for Southern scripts)
|
||||
"\u0932>\ue032;" // LETTER LA
|
||||
"\u0933>\ue033;" // LETTER LLA
|
||||
//\u0934>\ue034; # UNMAPPED LETTER LLLA (LLLA for Southern scripts)
|
||||
"\u0935>\ue035;" // LETTER VA
|
||||
"\u0936>\ue036;" // LETTER SHA
|
||||
"\u0937>\ue037;" // LETTER SSA
|
||||
"\u0938>\ue038;" // LETTER SA
|
||||
"\u0939>\ue039;" // LETTER HA
|
||||
"\u093c>\ue03c;" // SIGN NUKTA
|
||||
"\u093d>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u093e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u093f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0940>\ue040;" // VOWEL SIGN II
|
||||
"\u0941>\ue041;" // VOWEL SIGN U
|
||||
"\u0942>\ue042;" // VOWEL SIGN UU
|
||||
"\u0943>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0944>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0945>\ue045;" // VOWEL SIGN CANDRA E
|
||||
//\u0946>\ue046; # UNMAPPED VOWEL SIGN SHORT E
|
||||
"\u0947>\ue047;" // VOWEL SIGN E
|
||||
"\u0948>\ue048;" // VOWEL SIGN AI
|
||||
"\u0949>\ue049;" // VOWEL SIGN CANDRA O
|
||||
//\u094a>\ue04a; # UNMAPPED VOWEL SIGN SHORT O
|
||||
"\u094b>\ue04b;" // VOWEL SIGN O
|
||||
"\u094c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u094d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0950>\ue050;" // OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"\u0958>\ue058;" // LETTER QA (For Urdu)
|
||||
"\u0959>\ue059;" // LETTER KHHA (For Urdu)
|
||||
"\u095a>\ue05a;" // LETTER GHHA (For Urdu)
|
||||
"\u095b>\ue05b;" // LETTER ZA (For Urdu)
|
||||
"\u095c>\ue05c;" // LETTER DDDHA (pronounced RRA)
|
||||
"\u095d>\ue05d;" // LETTER RHA (pronounced RRHA)
|
||||
"\u095e>\ue05e;" // LETTER FA
|
||||
"\u095f>\ue05f;" // LETTER YYA
|
||||
"\u0960>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0961>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0962>\ue062;" // VOWEL SIGN VOCALIC L
|
||||
"\u0963>\ue063;" // VOWEL SIGN VOCALIC LL
|
||||
// \u0964>; # UNMAPPED Devanagari-InterIndic: DANDA
|
||||
// \u0965>; # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
|
||||
"\u0966>\ue066;" // DIGIT ZERO
|
||||
"\u0967>\ue067;" // DIGIT ONE
|
||||
"\u0968>\ue068;" // DIGIT TWO
|
||||
"\u0969>\ue069;" // DIGIT THREE
|
||||
"\u096a>\ue06a;" // DIGIT FOUR
|
||||
"\u096b>\ue06b;" // DIGIT FIVE
|
||||
"\u096c>\ue06c;" // DIGIT SIX
|
||||
"\u096d>\ue06d;" // DIGIT SEVEN
|
||||
"\u096e>\ue06e;" // DIGIT EIGHT
|
||||
"\u096f>\ue06f;" // DIGIT NINE
|
||||
// \u0970>; # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
// :: NFC (NFD) ;
|
||||
}
|
||||
}
|
287
icu4c/source/data/translit/t_FWidth_HWidth.txt
Normal file
287
icu4c/source/data/translit/t_FWidth_HWidth.txt
Normal file
@ -0,0 +1,287 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth_Halfwidth
|
||||
|
||||
translit_Fullwidth_Halfwidth {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:57 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth-Halfwidth
|
||||
|
||||
// Mechanically generated from Unicode Character Database
|
||||
|
||||
// multicharacter
|
||||
|
||||
"ガ<>ガ;" // to KATAKANA LETTER GA
|
||||
"ギ<>ギ;" // to KATAKANA LETTER GI
|
||||
"グ<>グ;" // to KATAKANA LETTER GU
|
||||
"ゲ<>ゲ;" // to KATAKANA LETTER GE
|
||||
"ゴ<>ゴ;" // to KATAKANA LETTER GO
|
||||
"ザ<>ザ;" // to KATAKANA LETTER ZA
|
||||
"ジ<>ジ;" // to KATAKANA LETTER ZI
|
||||
"ズ<>ズ;" // to KATAKANA LETTER ZU
|
||||
"ゼ<>ゼ;" // to KATAKANA LETTER ZE
|
||||
"ゾ<>ゾ;" // to KATAKANA LETTER ZO
|
||||
"ダ<>ダ;" // to KATAKANA LETTER DA
|
||||
"ヂ<>ヂ;" // to KATAKANA LETTER DI
|
||||
"ヅ<>ヅ;" // to KATAKANA LETTER DU
|
||||
"デ<>デ;" // to KATAKANA LETTER DE
|
||||
"ド<>ド;" // to KATAKANA LETTER DO
|
||||
"バ<>バ;" // to KATAKANA LETTER BA
|
||||
"パ<>パ;" // to KATAKANA LETTER PA
|
||||
"ビ<>ビ;" // to KATAKANA LETTER BI
|
||||
"ピ<>ピ;" // to KATAKANA LETTER PI
|
||||
"ブ<>ブ;" // to KATAKANA LETTER BU
|
||||
"プ<>プ;" // to KATAKANA LETTER PU
|
||||
"ベ<>ベ;" // to KATAKANA LETTER BE
|
||||
"ペ<>ペ;" // to KATAKANA LETTER PE
|
||||
"ボ<>ボ;" // to KATAKANA LETTER BO
|
||||
"ポ<>ポ;" // to KATAKANA LETTER PO
|
||||
"ヴ<>ヴ;" // to KATAKANA LETTER VU
|
||||
"ヷ<>ヷ;" // to KATAKANA LETTER VA
|
||||
"ヺ<>ヺ;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
"!<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
""<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"#<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"$<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"%<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
"&<>'&';" // from FULLWIDTH AMPERSAND
|
||||
"'<>'';" // from FULLWIDTH APOSTROPHE
|
||||
"(<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
")<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
"*<>'*';" // from FULLWIDTH ASTERISK
|
||||
"+<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
",<>',';" // from FULLWIDTH COMMA
|
||||
"-<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
".<>'.';" // from FULLWIDTH FULL STOP
|
||||
"/<>'/';" // from FULLWIDTH SOLIDUS
|
||||
"0<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
"1<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
"2<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
"3<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
"4<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
"5<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
"6<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
"7<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
"8<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
"9<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
":<>':';" // from FULLWIDTH COLON
|
||||
";<>';';" // from FULLWIDTH SEMICOLON
|
||||
"<<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
"=<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
"><>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
"?<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
"@<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
"A<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
"B<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
"C<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
"D<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
"E<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
"F<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
"G<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
"H<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
"I<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
"J<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
"K<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
"L<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
"M<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
"N<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
"O<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
"P<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
"Q<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
"R<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
"S<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
"T<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
"U<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
"V<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
"W<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
"X<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
"Y<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
"Z<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
"[<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
"\<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
"]<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
"^<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
"_<>'_';" // from FULLWIDTH LOW LINE
|
||||
"`<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
"a<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
"b<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
"c<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
"d<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
"e<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
"f<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
"g<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
"h<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
"i<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
"j<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
"k<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
"l<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
"m<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
"n<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
"o<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
"p<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
"q<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
"r<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
"s<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
"t<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
"u<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
"v<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
"w<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
"x<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
"y<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
"z<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
"{<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
"|<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
"}<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
"~<>'~';" // from FULLWIDTH TILDE
|
||||
"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A
|
||||
"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I
|
||||
"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U
|
||||
"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E
|
||||
"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O
|
||||
"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
"ノ<>ノ;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N
|
||||
"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
"ᅠ<>ᅠ;" // to HALFWIDTH HANGUL FILLER
|
||||
"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A
|
||||
"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE
|
||||
"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA
|
||||
"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO
|
||||
"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E
|
||||
"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE
|
||||
"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O
|
||||
"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA
|
||||
"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE
|
||||
"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO
|
||||
"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U
|
||||
"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE
|
||||
"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI
|
||||
"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU
|
||||
"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU
|
||||
"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI
|
||||
"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I
|
||||
"¢<>'¢';" // from FULLWIDTH CENT SIGN
|
||||
"£<>'£';" // from FULLWIDTH POUND SIGN
|
||||
"¬<>'¬';" // from FULLWIDTH NOT SIGN
|
||||
" ̄<>' '̄;" // from FULLWIDTH MACRON
|
||||
"' '<>' ';" // ideographic space (place this after MACRON)
|
||||
"¦<>'¦';" // from FULLWIDTH BROKEN BAR
|
||||
"¥<>'¥';" // from FULLWIDTH YEN SIGN
|
||||
"₩<>₩;" // from FULLWIDTH WON SIGN
|
||||
"│<>│;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
"←<>←;" // to HALFWIDTH LEFTWARDS ARROW
|
||||
"↑<>↑;" // to HALFWIDTH UPWARDS ARROW
|
||||
"→<>→;" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
"■<>■;" // to HALFWIDTH BLACK SQUARE
|
||||
"○<>○;" // to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
283
icu4c/source/data/translit/t_Grek_Latn.txt
Normal file
283
icu4c/source/data/translit/t_Grek_Latn.txt
Normal file
@ -0,0 +1,283 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin
|
||||
|
||||
translit_Greek_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Grek_Latn.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
"::NFD (NFC) ;"
|
||||
|
||||
// TEST CASES
|
||||
|
||||
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
||||
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
||||
// ᾳ ῃ ῳ ὃ ὄ
|
||||
// ὠς ὡς ὢς ὣς
|
||||
// Ὠς Ὡς Ὢς Ὣς
|
||||
// ὨΣ ὩΣ ὪΣ ὫΣ
|
||||
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
||||
|
||||
// Useful variables
|
||||
|
||||
"$lower = [:Ll:] ;"
|
||||
"$upper = [:Lu:] ;"
|
||||
"$accent = [:M:] ;"
|
||||
|
||||
"$macron = \u0304 ;"
|
||||
"$ddot = \u0308 ;"
|
||||
|
||||
"$lcgvowel = [αεηιουω] ;"
|
||||
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
||||
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
||||
"$lcgvowelC = [$lcgvowel $accent] ;"
|
||||
|
||||
"$vowel = [ AEIOUaeiou $gvowel] ;"
|
||||
|
||||
"$beforeLower = $accent * $lower ;"
|
||||
|
||||
"$gammaLike = [ΓΚΞΧγκξχ] ;"
|
||||
"$smooth = ̓ ;"
|
||||
"$rough = ̔ ;"
|
||||
"$iotasub = ͅ ;"
|
||||
|
||||
// Fix punctuation
|
||||
|
||||
"\; <> \? ;"
|
||||
"· <> \: ;"
|
||||
|
||||
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
"\u0342 <> \u0302 ;"
|
||||
|
||||
// IOTA: convert iota subscript to iota
|
||||
// first make previous alpha long!
|
||||
|
||||
"Α } $accent * $iotasub > A $macron ;"
|
||||
"α } $accent * $iotasub > a $macron ;"
|
||||
|
||||
// now convert to uppercase if after uppercase, ow to lowercase
|
||||
|
||||
"$upper $accent * { $iotasub > I ;"
|
||||
"$iotasub > i ;"
|
||||
|
||||
"| $1 $iotasub < ([:L:] $macron [:M:]*) i ;"
|
||||
|
||||
// BREATHING
|
||||
|
||||
// Convert rough breathing to h, and move before letters.
|
||||
|
||||
// Make A ` x = > H a x
|
||||
|
||||
"Α $rough } $beforeLower > H | α ;"
|
||||
"Ε $rough } $beforeLower > H | ε;"
|
||||
"Η $rough } $beforeLower > H | η ;"
|
||||
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
|
||||
"Ο $rough } $beforeLower > H | ο ;"
|
||||
"Υ $rough } $beforeLower > H | υ ;"
|
||||
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
|
||||
|
||||
// Make A x ` = > H a x
|
||||
|
||||
"Α ($lower) $rough > H | α $1 ;"
|
||||
"Ε ($lower) $rough > H | ε $1 ;"
|
||||
"Η ($lower) $rough > H | η $1 ;"
|
||||
"Ι ($lower $ddot?) $rough > H | ι $1 ;"
|
||||
"Ο ($lower) $rough > H | ο $1 ;"
|
||||
"Υ ($lower) $rough > H | υ $1 ;"
|
||||
"Ω ($lower $ddot?) $rough > H | ω $1 ;"
|
||||
|
||||
//Otherwise, make x ` into h x and X ` into H X
|
||||
|
||||
"($lcgvowel + $ddot? ) $rough > h | $1 ;"
|
||||
"($gvowel + $ddot? ) $rough > H | $1 ;"
|
||||
|
||||
// Go backwards with H
|
||||
|
||||
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < h ([aeiouyAEIOUY] $macron? $ddot?) ;"
|
||||
|
||||
"| $1 $rough < H ([AEIOUY] $macron? $ddot?[aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
|
||||
|
||||
// titlecase, have to fix individually
|
||||
"| $1 $rough < H (a $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (e $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (i $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (o $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (u $macron? $ddot? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (y $macron? [aeiouyAEIOUY] $macron?) ;"
|
||||
"| $1 $rough < H (a $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (e $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (i $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (o $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (u $macron? $ddot? ) ;"
|
||||
"| $1 $rough < H (y $macron? $ddot? ) ;"
|
||||
|
||||
// Now do smooth
|
||||
|
||||
//delete smooth breathing for Latin
|
||||
"$smooth > ;"
|
||||
|
||||
// insert in Greek
|
||||
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron? [aeiouyAEIOUY] $macron?) } [^[$smooth]] ;"
|
||||
"| $1 $smooth < [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] ;"
|
||||
|
||||
// TODO: preserve smooth/rough breathing if not
|
||||
// on initial vowel sequence
|
||||
|
||||
// need to have these up here so the rules don't mask
|
||||
|
||||
"η <> e $macron ;"
|
||||
"Η <> E $macron ;"
|
||||
|
||||
"φ <> ph ;"
|
||||
"Ψ } $beforeLower <> Ps ;"
|
||||
"Ψ <> PS ;"
|
||||
|
||||
"Φ } $beforeLower <> Ph ;"
|
||||
"Φ <> PH ;"
|
||||
"ψ <> ps ;"
|
||||
|
||||
"ω <> o $macron ;"
|
||||
"Ω <> O $macron;"
|
||||
|
||||
// NORMAL
|
||||
|
||||
"α <> a ;"
|
||||
"Α <> A ;"
|
||||
|
||||
"β <> b ;"
|
||||
"Β <> B ;"
|
||||
|
||||
"γ } $gammaLike <> n } [gkc] ;"
|
||||
"γ <> g ;"
|
||||
"Γ } $gammaLike <> N } [gkc] ;"
|
||||
"Γ <> G ;"
|
||||
|
||||
"δ <> d ;"
|
||||
"Δ <> D ;"
|
||||
|
||||
"ε <> e ;"
|
||||
"Ε <> E ;"
|
||||
|
||||
"ζ <> z ;"
|
||||
"Ζ <> Z ;"
|
||||
|
||||
"θ <> th ;"
|
||||
"Θ } $beforeLower <> Th ;"
|
||||
"Θ <> TH ;"
|
||||
|
||||
"ι <> i ;"
|
||||
"Ι <> I ;"
|
||||
|
||||
"κ <> k ;"
|
||||
"Κ <> K ;"
|
||||
|
||||
"λ <> l ;"
|
||||
"Λ <> L ;"
|
||||
|
||||
"μ <> m ;"
|
||||
"Μ <> M ;"
|
||||
|
||||
"ν } $gammaLike > n\' ;"
|
||||
"ν <> n ;"
|
||||
"Ν } $gammaLike <> N\' ;"
|
||||
"Ν <> N ;"
|
||||
|
||||
"ξ <> x ;"
|
||||
"Ξ <> X ;"
|
||||
|
||||
"ο <> o ;"
|
||||
"Ο <> O ;"
|
||||
|
||||
"π <> p ;"
|
||||
"Π <> P ;"
|
||||
|
||||
"ρ $rough <> rh;"
|
||||
"Ρ $rough } $beforeLower <> Rh ;"
|
||||
"Ρ $rough <> RH ;"
|
||||
"ρ <> r ;"
|
||||
"Ρ <> R ;"
|
||||
|
||||
"[Pp] {ς > \'s ;"
|
||||
"[Pp] {σ > \'s ;"
|
||||
"σ < [:^L:] [:M:]* { s } [:^L:] ;"
|
||||
"ς <> s } [:^L:] ;"
|
||||
"σ <> s ;"
|
||||
"[Pp] { Σ <> \'S ;"
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
"Τ <> T ;"
|
||||
|
||||
"$vowel {υ } <> u ;"
|
||||
"υ <> y ;"
|
||||
"$vowel { Υ <> U ;"
|
||||
"Υ <> Y ;"
|
||||
|
||||
"χ <> ch ;"
|
||||
"Χ } $beforeLower <> Ch ;"
|
||||
"Χ <> CH ;"
|
||||
|
||||
// Completeness for ASCII
|
||||
|
||||
"$ignore = [[:Mark:]''] * ;"
|
||||
|
||||
"| k < c ;"
|
||||
"| ph < f ;"
|
||||
"| i < j ;"
|
||||
"| k < q ;"
|
||||
"| u < v ;"
|
||||
"| u < w ;"
|
||||
"| K < C ;"
|
||||
"| PH < F } $ignore [:UppercaseLetter:] ;"
|
||||
"| PH < [:UppercaseLetter:] $ignore { F ;"
|
||||
"| PH < F ;"
|
||||
"| I < J ;"
|
||||
"| K < Q ;"
|
||||
"| U < V ;"
|
||||
"| U < W ;"
|
||||
|
||||
"$rough } $ignore [:UppercaseLetter:] > H ;"
|
||||
"$ignore [:UppercaseLetter:] { $rough > H ;"
|
||||
"$rough < H ;"
|
||||
"$rough <> h ;"
|
||||
|
||||
// Completeness for Greek
|
||||
|
||||
"ϐ > | β ;"
|
||||
"ϑ > | θ ;"
|
||||
"ϒ > | Υ ;"
|
||||
"ϕ > | φ ;"
|
||||
"ϖ > | π ;"
|
||||
|
||||
"ϰ > | κ ;"
|
||||
"ϱ > | ρ ;"
|
||||
"ϲ > | σ ;"
|
||||
"ϳ > j ;"
|
||||
"ϴ > | Θ ;"
|
||||
"ϵ > | ε ;"
|
||||
|
||||
"ͺ > i;"
|
||||
|
||||
"::NFC (NFD) ;"
|
||||
}
|
||||
}
|
115
icu4c/source/data/translit/t_Gujr_InterIndic.txt
Normal file
115
icu4c/source/data/translit/t_Gujr_InterIndic.txt
Normal file
@ -0,0 +1,115 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Gujarati_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gujarati_InterIndic
|
||||
|
||||
translit_Gujarati_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gujarati_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:54 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gujarati_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:58 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gujarati-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0a81>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0a82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0a83>\ue003;" // SIGN VISARGA
|
||||
"\u0a85>\ue005;" // LETTER A
|
||||
"\u0a86>\ue006;" // LETTER AA
|
||||
"\u0a87>\ue007;" // LETTER I
|
||||
"\u0a88>\ue008;" // LETTER II
|
||||
"\u0a89>\ue009;" // LETTER U
|
||||
"\u0a8a>\ue00a;" // LETTER UU
|
||||
"\u0a8b>\ue00b;" // LETTER VOCALIC R
|
||||
// \u0a8d>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E
|
||||
"\u0a8f>\ue00f;" // LETTER E
|
||||
"\u0a90>\ue010;" // LETTER AI
|
||||
// \u0a91>; # UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O
|
||||
"\u0a93>\ue013;" // LETTER O
|
||||
"\u0a94>\ue014;" // LETTER AU
|
||||
"\u0a95>\ue015;" // LETTER KA
|
||||
"\u0a96>\ue016;" // LETTER KHA
|
||||
"\u0a97>\ue017;" // LETTER GA
|
||||
"\u0a98>\ue018;" // LETTER GHA
|
||||
"\u0a99>\ue019;" // LETTER NGA
|
||||
"\u0a9a>\ue01a;" // LETTER CA
|
||||
"\u0a9b>\ue01b;" // LETTER CHA
|
||||
"\u0a9c>\ue01c;" // LETTER JA
|
||||
"\u0a9d>\ue01d;" // LETTER JHA
|
||||
"\u0a9e>\ue01e;" // LETTER NYA
|
||||
"\u0a9f>\ue01f;" // LETTER TTA
|
||||
"\u0aa0>\ue020;" // LETTER TTHA
|
||||
"\u0aa1>\ue021;" // LETTER DDA
|
||||
"\u0aa2>\ue022;" // LETTER DDHA
|
||||
"\u0aa3>\ue023;" // LETTER NNA
|
||||
"\u0aa4>\ue024;" // LETTER TA
|
||||
"\u0aa5>\ue025;" // LETTER THA
|
||||
"\u0aa6>\ue026;" // LETTER DA
|
||||
"\u0aa7>\ue027;" // LETTER DHA
|
||||
"\u0aa8>\ue028;" // LETTER NA
|
||||
"\u0aaa>\ue02a;" // LETTER PA
|
||||
"\u0aab>\ue02b;" // LETTER PHA
|
||||
"\u0aac>\ue02c;" // LETTER BA
|
||||
"\u0aad>\ue02d;" // LETTER BHA
|
||||
"\u0aae>\ue02e;" // LETTER MA
|
||||
"\u0aaf>\ue02f;" // LETTER YA
|
||||
"\u0ab0>\ue030;" // LETTER RA
|
||||
"\u0ab2>\ue032;" // LETTER LA
|
||||
"\u0ab3>\ue033;" // LETTER LLA
|
||||
"\u0ab5>\ue035;" // LETTER VA
|
||||
"\u0ab6>\ue036;" // LETTER SHA
|
||||
"\u0ab7>\ue037;" // LETTER SSA
|
||||
"\u0ab8>\ue038;" // LETTER SA
|
||||
"\u0ab9>\ue039;" // LETTER HA
|
||||
"\u0abc>\ue03c;" // SIGN NUKTA
|
||||
"\u0abd>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u0abe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0abf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0ac0>\ue040;" // VOWEL SIGN II
|
||||
"\u0ac1>\ue041;" // VOWEL SIGN U
|
||||
"\u0ac2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E
|
||||
"\u0ac7>\ue047;" // VOWEL SIGN E
|
||||
"\u0ac8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O
|
||||
"\u0acb>\ue04b;" // VOWEL SIGN O
|
||||
"\u0acc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0acd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0ad0>\ue050;" // OM
|
||||
"\u0ae0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0ae6>\ue066;" // DIGIT ZERO
|
||||
"\u0ae7>\ue067;" // DIGIT ONE
|
||||
"\u0ae8>\ue068;" // DIGIT TWO
|
||||
"\u0ae9>\ue069;" // DIGIT THREE
|
||||
"\u0aea>\ue06a;" // DIGIT FOUR
|
||||
"\u0aeb>\ue06b;" // DIGIT FIVE
|
||||
"\u0aec>\ue06c;" // DIGIT SIX
|
||||
"\u0aed>\ue06d;" // DIGIT SEVEN
|
||||
"\u0aee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0aef>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
112
icu4c/source/data/translit/t_Guru_InterIndic.txt
Normal file
112
icu4c/source/data/translit/t_Guru_InterIndic.txt
Normal file
@ -0,0 +1,112 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Gurmukhi_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gurmukhi_InterIndic
|
||||
|
||||
translit_Gurmukhi_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Gurmukhi_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:54 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gurmukhi_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:58 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Gurmukhi-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0a02>\ue001;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
|
||||
"\u0a05>\ue005;" // LETTER A
|
||||
"\u0a06>\ue006;" // LETTER AA
|
||||
"\u0a07>\ue007;" // LETTER I
|
||||
"\u0a08>\ue008;" // LETTER II
|
||||
"\u0a09>\ue009;" // LETTER U
|
||||
"\u0a0a>\ue00a;" // LETTER UU
|
||||
"\u0a0f>\ue00f;" // LETTER EE
|
||||
"\u0a10>\ue010;" // LETTER AI
|
||||
"\u0a13>\ue013;" // LETTER OO
|
||||
"\u0a14>\ue014;" // LETTER AU
|
||||
"\u0a15>\ue015;" // LETTER KA
|
||||
"\u0a16>\ue016;" // LETTER KHA
|
||||
"\u0a17>\ue017;" // LETTER GA
|
||||
"\u0a18>\ue018;" // LETTER GHA
|
||||
"\u0a19>\ue019;" // LETTER NGA
|
||||
"\u0a1a>\ue01a;" // LETTER CA
|
||||
"\u0a1b>\ue01b;" // LETTER CHA
|
||||
"\u0a1c>\ue01c;" // LETTER JA
|
||||
"\u0a1d>\ue01d;" // LETTER JHA
|
||||
"\u0a1e>\ue01e;" // LETTER NYA
|
||||
"\u0a1f>\ue01f;" // LETTER TTA
|
||||
"\u0a20>\ue020;" // LETTER TTHA
|
||||
"\u0a21>\ue021;" // LETTER DDA
|
||||
"\u0a22>\ue022;" // LETTER DDHA
|
||||
"\u0a23>\ue023;" // LETTER NNA
|
||||
"\u0a24>\ue024;" // LETTER TA
|
||||
"\u0a25>\ue025;" // LETTER THA
|
||||
"\u0a26>\ue026;" // LETTER DA
|
||||
"\u0a27>\ue027;" // LETTER DHA
|
||||
"\u0a28>\ue028;" // LETTER NA
|
||||
"\u0a2a>\ue02a;" // LETTER PA
|
||||
"\u0a2b>\ue02b;" // LETTER PHA
|
||||
"\u0a2c>\ue02c;" // LETTER BA
|
||||
"\u0a2d>\ue02d;" // LETTER BHA
|
||||
"\u0a2e>\ue02e;" // LETTER MA
|
||||
"\u0a2f>\ue02f;" // LETTER YA
|
||||
"\u0a30>\ue030;" // LETTER RA
|
||||
"\u0a32>\ue032;" // LETTER LA
|
||||
"\u0a33>\ue033;" // LETTER LLA
|
||||
"\u0a35>\ue035;" // LETTER VA
|
||||
"\u0a36>\ue036;" // LETTER SHA
|
||||
"\u0a38>\ue038;" // LETTER SA
|
||||
"\u0a39>\ue039;" // LETTER HA
|
||||
"\u0a3c>\ue03c;" // SIGN NUKTA
|
||||
"\u0a3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0a3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0a40>\ue040;" // VOWEL SIGN II
|
||||
"\u0a41>\ue041;" // VOWEL SIGN U
|
||||
"\u0a42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0a47>\ue047;" // VOWEL SIGN EE
|
||||
"\u0a48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0a4b>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0a4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0a4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0a59>\ue059;" // LETTER KHHA
|
||||
"\u0a5a>\ue05a;" // LETTER GHHA
|
||||
"\u0a5b>\ue05b;" // LETTER ZA
|
||||
"\u0a5c>\ue05c;" // LETTER RRA
|
||||
"\u0a5e>\ue05e;" // LETTER FA
|
||||
"\u0a66>\ue066;" // DIGIT ZERO
|
||||
"\u0a67>\ue067;" // DIGIT ONE
|
||||
"\u0a68>\ue068;" // DIGIT TWO
|
||||
"\u0a69>\ue069;" // DIGIT THREE
|
||||
"\u0a6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0a6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0a6c>\ue06c;" // DIGIT SIX
|
||||
"\u0a6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0a6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0a6f>\ue06f;" // DIGIT NINE
|
||||
// \u0a70>; # UNMAPPED Gurmukhi-InterIndic: TIPPI
|
||||
// \u0a71>; # UNMAPPED Gurmukhi-InterIndic: ADDAK
|
||||
// \u0a72>; # UNMAPPED Gurmukhi-InterIndic: IRI
|
||||
// \u0a73>; # UNMAPPED Gurmukhi-InterIndic: URA
|
||||
// \u0a74>; # UNMAPPED Gurmukhi-InterIndic: EK ONKAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
217
icu4c/source/data/translit/t_Hira_Kana.txt
Normal file
217
icu4c/source/data/translit/t_Hira_Kana.txt
Normal file
@ -0,0 +1,217 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Hiragana_Katakana.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana_Katakana
|
||||
|
||||
translit_Hiragana_Katakana {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana-Katana
|
||||
|
||||
// This is largely a one-to-one mapping, but it has a
|
||||
// few kinks:
|
||||
|
||||
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||||
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||||
// (308F-3092) with a voicing mark (3099), which is
|
||||
// semantically equivalent. However, this is a non-
|
||||
// roundtripping transformation.
|
||||
|
||||
// 2. The Katakana small ka/ke (30F5,30F6) have no
|
||||
// Hiragana equiavlents. We convert them to normal
|
||||
// Hiragana ka/ke (304B,3051). This is a one-way
|
||||
// information-losing transformation and precludes
|
||||
// round-tripping of 30F5 and 30F6.
|
||||
|
||||
// 3. The combining marks 3099-309C are in the Hiragana
|
||||
// block, but they apply to Katakana as well, so we
|
||||
// leave them untouched.
|
||||
|
||||
// 4. The Katakana prolonged sound mark 30FC doubles the
|
||||
// preceding vowel. This is a one-way information-
|
||||
// losing transformation from Katakana to Hiragana.
|
||||
|
||||
// 5. The Katakana middle dot separates words in foreign
|
||||
// expressions; we leave this unmodified.
|
||||
|
||||
// The above points preclude successful round-trip
|
||||
// transformations of arbitrary input text. However,
|
||||
// they provide naturalistic results that should conform
|
||||
// to user expectations.
|
||||
|
||||
|
||||
// Combining equivalents va/vi/ve/vo
|
||||
"わ゙ <> ヷ;"
|
||||
"ゐ゙ <> ヸ;"
|
||||
"ゑ゙ <> ヹ;"
|
||||
"を゙ <> ヺ;"
|
||||
|
||||
// One-to-one mappings, main block
|
||||
// 3041:3094 <> 30A1:30F4
|
||||
// 309D,E <> 30FD,E
|
||||
"ぁ <> ァ;"
|
||||
"あ <> ア;"
|
||||
"ぃ <> ィ;"
|
||||
"い <> イ;"
|
||||
"ぅ <> ゥ;"
|
||||
"う <> ウ;"
|
||||
"ぇ <> ェ;"
|
||||
"え <> エ;"
|
||||
"ぉ <> ォ;"
|
||||
"お <> オ;"
|
||||
"か <> カ;"
|
||||
"が <> ガ;"
|
||||
"き <> キ;"
|
||||
"ぎ <> ギ;"
|
||||
"く <> ク;"
|
||||
"ぐ <> グ;"
|
||||
"け <> ケ;"
|
||||
"げ <> ゲ;"
|
||||
"こ <> コ;"
|
||||
"ご <> ゴ;"
|
||||
"さ <> サ;"
|
||||
"ざ <> ザ;"
|
||||
"し <> シ;"
|
||||
"じ <> ジ;"
|
||||
"す <> ス;"
|
||||
"ず <> ズ;"
|
||||
"せ <> セ;"
|
||||
"ぜ <> ゼ;"
|
||||
"そ <> ソ;"
|
||||
"ぞ <> ゾ;"
|
||||
"た <> タ;"
|
||||
"だ <> ダ;"
|
||||
"ち <> チ;"
|
||||
"ぢ <> ヂ;"
|
||||
"っ <> ッ;"
|
||||
"つ <> ツ;"
|
||||
"づ <> ヅ;"
|
||||
"て <> テ;"
|
||||
"で <> デ;"
|
||||
"と <> ト;"
|
||||
"ど <> ド;"
|
||||
"な <> ナ;"
|
||||
"に <> ニ;"
|
||||
"ぬ <> ヌ;"
|
||||
"ね <> ネ;"
|
||||
"の <> ノ;"
|
||||
"は <> ハ;"
|
||||
"ば <> バ;"
|
||||
"ぱ <> パ;"
|
||||
"ひ <> ヒ;"
|
||||
"び <> ビ;"
|
||||
"ぴ <> ピ;"
|
||||
"ふ <> フ;"
|
||||
"ぶ <> ブ;"
|
||||
"ぷ <> プ;"
|
||||
"へ <> ヘ;"
|
||||
"べ <> ベ;"
|
||||
"ぺ <> ペ;"
|
||||
"ほ <> ホ;"
|
||||
"ぼ <> ボ;"
|
||||
"ぽ <> ポ;"
|
||||
"ま <> マ;"
|
||||
"み <> ミ;"
|
||||
"む <> ム;"
|
||||
"め <> メ;"
|
||||
"も <> モ;"
|
||||
"ゃ <> ャ;"
|
||||
"や <> ヤ;"
|
||||
"ゅ <> ュ;"
|
||||
"ゆ <> ユ;"
|
||||
"ょ <> ョ;"
|
||||
"よ <> ヨ;"
|
||||
"ら <> ラ;"
|
||||
"り <> リ;"
|
||||
"る <> ル;"
|
||||
"れ <> レ;"
|
||||
"ろ <> ロ;"
|
||||
"ゎ <> ヮ;"
|
||||
"わ <> ワ;"
|
||||
"ゐ <> ヰ;"
|
||||
"ゑ <> ヱ;"
|
||||
"を <> ヲ;"
|
||||
"ん <> ン;"
|
||||
"ゔ <> ヴ;"
|
||||
"ゝ <> ヽ;"
|
||||
"ゞ <> ヾ;"
|
||||
|
||||
// One-way Katakana-Hiragana xform of small K ka/ke to
|
||||
// normal H ka/ke.
|
||||
"か < ヵ;"
|
||||
"け < ヶ;"
|
||||
|
||||
// Katakana followed by a prolonged sound mark 30FC has
|
||||
// its final vowel doubled. This is a Katakana-Hiragana
|
||||
// one-way information-losing transformation. We
|
||||
// include the small Katakana (e.g., small A 3041) and
|
||||
// do not distinguish them from their large
|
||||
// counterparts. It doesn't make sense to double a
|
||||
// small counterpart vowel as a small Hiragana vowel, so
|
||||
// we don't do so. In natural text this should never
|
||||
// occur anyway. If a 30FC is seen without a preceding
|
||||
// vowel sound (e.g., after n 30F3) we do not change it.
|
||||
|
||||
//## $long = ー;
|
||||
|
||||
// The following categories are Hiragana, not Katakana
|
||||
// as might be expected, since by the time we get to the
|
||||
// 30FC, the preceding character will have already been
|
||||
// transformed to Hiragana.
|
||||
|
||||
// {The following mechanically generated from the
|
||||
// Unicode 3.0 data:}
|
||||
|
||||
"$xa = ["
|
||||
"ぁ あ か が さ ざ"
|
||||
"た だ な は ば ぱ"
|
||||
"ま ゃ や ら ゎ わ"
|
||||
"];"
|
||||
|
||||
"$xi = ["
|
||||
"ぃ い き ぎ し じ"
|
||||
"ち ぢ に ひ び ぴ"
|
||||
"み り ゐ"
|
||||
"];"
|
||||
|
||||
"$xu = ["
|
||||
"ぅ う く ぐ す ず"
|
||||
"っ つ づ ぬ ふ ぶ"
|
||||
"ぷ む ゅ ゆ る ゔ"
|
||||
"];"
|
||||
|
||||
"$xe = ["
|
||||
"ぇ え け げ せ ぜ"
|
||||
"て で ね へ べ ぺ"
|
||||
"め れ ゑ"
|
||||
"];"
|
||||
|
||||
"$xo = ["
|
||||
"ぉ お こ ご そ ぞ"
|
||||
"と ど の ほ ぼ ぽ"
|
||||
"も ょ よ ろ を"
|
||||
"];"
|
||||
|
||||
"あ < $xa {ー};"
|
||||
"い < $xi {ー};"
|
||||
"う < $xu {ー};"
|
||||
"え < $xe {ー};"
|
||||
"お < $xo {ー};"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
32
icu4c/source/data/translit/t_Hira_Latn.txt
Normal file
32
icu4c/source/data/translit/t_Hira_Latn.txt
Normal file
@ -0,0 +1,32 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Hiragana_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana_Latin
|
||||
|
||||
translit_Hiragana_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Hira_Latn.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: [:^Katakana:] ;" // don't touch any katakana that was in the text!
|
||||
|
||||
":: Hiragana-Katakana;"
|
||||
":: Katakana-Latin;"
|
||||
|
||||
":: ([:^Katakana:]) ;" // don't touch any katakana that was in the text!
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Beng.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Beng.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Bengali.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Bengali
|
||||
|
||||
translit_InterIndic_Bengali {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Bengali.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Bengali
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:59 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Bengali
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0981;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0982;" // SIGN ANUSVARA
|
||||
"\ue003>\u0983;" // SIGN VISARGA
|
||||
"\ue005>\u0985;" // LETTER A
|
||||
"\ue006>\u0986;" // LETTER AA
|
||||
"\ue007>\u0987;" // LETTER I
|
||||
"\ue008>\u0988;" // LETTER II
|
||||
"\ue009>\u0989;" // LETTER U
|
||||
"\ue00a>\u098a;" // LETTER UU
|
||||
"\ue00b>\u098b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u098c;" // LETTER VOCALIC L
|
||||
// \ue00f>; # UNMAPPED InterIndic-Bengali: LETTER EE (\u098f = LETTER E)
|
||||
"\ue010>\u0990;" // LETTER AI
|
||||
// \ue013>; # UNMAPPED InterIndic-Bengali: LETTER OO (\u0993 = LETTER O)
|
||||
"\ue014>\u0994;" // LETTER AU
|
||||
"\ue015>\u0995;" // LETTER KA
|
||||
"\ue016>\u0996;" // LETTER KHA
|
||||
"\ue017>\u0997;" // LETTER GA
|
||||
"\ue018>\u0998;" // LETTER GHA
|
||||
"\ue019>\u0999;" // LETTER NGA
|
||||
"\ue01a>\u099a;" // LETTER CA
|
||||
"\ue01b>\u099b;" // LETTER CHA
|
||||
"\ue01c>\u099c;" // LETTER JA
|
||||
"\ue01d>\u099d;" // LETTER JHA
|
||||
"\ue01e>\u099e;" // LETTER NYA
|
||||
"\ue01f>\u099f;" // LETTER TTA
|
||||
"\ue020>\u09a0;" // LETTER TTHA
|
||||
"\ue021>\u09a1;" // LETTER DDA
|
||||
"\ue022>\u09a2;" // LETTER DDHA
|
||||
"\ue023>\u09a3;" // LETTER NNA
|
||||
"\ue024>\u09a4;" // LETTER TA
|
||||
"\ue025>\u09a5;" // LETTER THA
|
||||
"\ue026>\u09a6;" // LETTER DA
|
||||
"\ue027>\u09a7;" // LETTER DHA
|
||||
"\ue028>\u09a8;" // LETTER NA
|
||||
"\ue029>\u09a8;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u09aa;" // LETTER PA
|
||||
"\ue02b>\u09ab;" // LETTER PHA
|
||||
"\ue02c>\u09ac;" // LETTER BA
|
||||
"\ue02d>\u09ad;" // LETTER BHA
|
||||
"\ue02e>\u09ae;" // LETTER MA
|
||||
"\ue02f>\u09af;" // LETTER YA
|
||||
"\ue030>\u09b0;" // LETTER RA
|
||||
"\ue032>\u09b2;" // LETTER LA
|
||||
"\ue033>\u09b2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
|
||||
"\ue034>\u09b2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
|
||||
"\ue035>\u09ac;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
|
||||
"\ue036>\u09b6;" // LETTER SHA
|
||||
"\ue037>\u09b7;" // LETTER SSA
|
||||
"\ue038>\u09b8;" // LETTER SA
|
||||
"\ue039>\u09b9;" // LETTER HA
|
||||
"\ue03c>\u09bc;" // SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA
|
||||
"\ue03e>\u09be;" // VOWEL SIGN AA
|
||||
"\ue03f>\u09bf;" // VOWEL SIGN I
|
||||
"\ue040>\u09c0;" // VOWEL SIGN II
|
||||
"\ue041>\u09c1;" // VOWEL SIGN U
|
||||
"\ue042>\u09c2;" // VOWEL SIGN UU
|
||||
"\ue043>\u09c3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u09c4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u09c7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
// \ue047>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (\u09c7 = VOWEL SIGN E)
|
||||
"\ue048>\u09c8;" // VOWEL SIGN AI
|
||||
"\ue049>\u09cb;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
// \ue04b>; # UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (\u09cb = VOWEL SIGN O)
|
||||
"\ue04c>\u09cc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u09cd;" // SIGN VIRAMA
|
||||
// \ue050>; # UNMAPPED InterIndic-Bengali: OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Bengali: LENGTH MARK
|
||||
"\ue056>\u09c8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u09d7;" // AU LENGTH MARK
|
||||
"\ue059>\u0996;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0997;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u099c;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u09dd;" // LETTER RHA
|
||||
"\ue05e>\u09ab;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u09df;" // LETTER YYA
|
||||
"\ue060>\u09e0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u09e1;" // LETTER VOCALIC LL
|
||||
"\ue062>\u09e2;" // VOWEL SIGN VOCALIC L
|
||||
"\ue063>\u09e3;" // VOWEL SIGN VOCALIC LL
|
||||
"\ue066>\u09e6;" // DIGIT ZERO
|
||||
"\ue067>\u09e7;" // DIGIT ONE
|
||||
"\ue068>\u09e8;" // DIGIT TWO
|
||||
"\ue069>\u09e9;" // DIGIT THREE
|
||||
"\ue06a>\u09ea;" // DIGIT FOUR
|
||||
"\ue06b>\u09eb;" // DIGIT FIVE
|
||||
"\ue06c>\u09ec;" // DIGIT SIX
|
||||
"\ue06d>\u09ed;" // DIGIT SEVEN
|
||||
"\ue06e>\u09ee;" // DIGIT EIGHT
|
||||
"\ue06f>\u09ef;" // DIGIT NINE
|
||||
"\ue0fa>\u09fa;" // ISSHAR
|
||||
"\ue00f>\u098f;" // LETTER E
|
||||
"\ue013>\u0993;" // LETTER O
|
||||
"\ue031>\u09dc;" // LETTER RRA
|
||||
"\ue047>\u09c7;" // VOWEL SIGN E
|
||||
"\ue04b>\u09cb;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
159
icu4c/source/data/translit/t_InterIndic_Deva.txt
Normal file
159
icu4c/source/data/translit/t_InterIndic_Deva.txt
Normal file
@ -0,0 +1,159 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Devanagari.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Devanagari
|
||||
|
||||
translit_InterIndic_Devanagari {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Devanagari.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Devanagari
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:59 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Devanagari
|
||||
//:: NFD (NFC) ;
|
||||
//Rules for Decomposed characters
|
||||
"\ue028\ue03c > \u0929;" //\ue029
|
||||
"\ue030\ue03c > \u0931;" //\ue031
|
||||
"\ue033\ue03c > \u0934;" //\ue034
|
||||
"\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu)
|
||||
"\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu)
|
||||
"\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu)
|
||||
"\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu)
|
||||
"\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA)
|
||||
"\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA)
|
||||
"\ue02b\ue03c > \u095e;" //\ue05e LETTER FA
|
||||
"\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA
|
||||
"\ue001 > \u0901;" // SIGN CANDRABINDU
|
||||
"\ue002 > \u0902;" // SIGN ANUSVARA
|
||||
"\ue003 > \u0903;" // SIGN VISARGA
|
||||
"\ue005 > \u0905;" // LETTER A
|
||||
"\ue006 > \u0906;" // LETTER AA
|
||||
"\ue007 > \u0907;" // LETTER I
|
||||
"\ue008 > \u0908;" // LETTER II
|
||||
"\ue009 > \u0909;" // LETTER U
|
||||
"\ue00a > \u090a;" // LETTER UU
|
||||
"\ue00b > \u090b;" // LETTER VOCALIC R
|
||||
"\ue00c > \u090c;" // LETTER VOCALIC L
|
||||
"\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds)
|
||||
//\ue00e > \u090e; # UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
"\ue00e > \u090f;"
|
||||
"\ue00f > \u090f;" // LETTER E
|
||||
"\ue010 > \u0910;" // LETTER AI
|
||||
"\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds)
|
||||
//\ue012 > \u0912; # UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
"\ue012 > \u0913;"
|
||||
"\ue013 > \u0913;" // LETTER O
|
||||
"\ue014 > \u0914;" // LETTER AU
|
||||
"\ue015 > \u0915;" // LETTER KA
|
||||
"\ue016 > \u0916;" // LETTER KHA
|
||||
"\ue017 > \u0917;" // LETTER GA
|
||||
"\ue018 > \u0918;" // LETTER GHA
|
||||
"\ue019 > \u0919;" // LETTER NGA
|
||||
"\ue01a > \u091a;" // LETTER CA
|
||||
"\ue01b > \u091b;" // LETTER CHA
|
||||
"\ue01c > \u091c;" // LETTER JA
|
||||
"\ue01d > \u091d;" // LETTER JHA
|
||||
"\ue01e > \u091e;" // LETTER NYA
|
||||
"\ue01f > \u091f;" // LETTER TTA
|
||||
"\ue020 > \u0920;" // LETTER TTHA
|
||||
"\ue021 > \u0921;" // LETTER DDA
|
||||
"\ue022 > \u0922;" // LETTER DDHA
|
||||
"\ue023 > \u0923;" // LETTER NNA
|
||||
"\ue024 > \u0924;" // LETTER TA
|
||||
"\ue025 > \u0925;" // LETTER THA
|
||||
"\ue026 > \u0926;" // LETTER DA
|
||||
"\ue027 > \u0927;" // LETTER DHA
|
||||
"\ue028 > \u0928;" // LETTER NA
|
||||
"\ue029 > \u0929;" // LETTER NNNA
|
||||
"\ue02a > \u092a;" // LETTER PA
|
||||
"\ue02b > \u092b;" // LETTER PHA
|
||||
"\ue02c > \u092c;" // LETTER BA
|
||||
"\ue02d > \u092d;" // LETTER BHA
|
||||
"\ue02e > \u092e;" // LETTER MA
|
||||
"\ue02f > \u092f;" // LETTER YA
|
||||
"\ue030 > \u0930;" // LETTER RA
|
||||
//\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
|
||||
"\ue031 > \u0930;"
|
||||
"\ue032 > \u0932;" // LETTER LA
|
||||
"\ue033 > \u0933;" // LETTER LLA
|
||||
//\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
|
||||
"\ue034 > \u0933;"
|
||||
"\ue035 > \u0935;" // LETTER VA
|
||||
"\ue036 > \u0936;" // LETTER SHA
|
||||
"\ue037 > \u0937;" // LETTER SSA
|
||||
"\ue038 > \u0938;" // LETTER SA
|
||||
"\ue039 > \u0939;" // LETTER HA
|
||||
"\ue03c > \u093c;" // SIGN NUKTA
|
||||
"\ue03d > \u093d;" // SIGN AVAGRAHA
|
||||
"\ue03e > \u093e;" // VOWEL SIGN AA
|
||||
"\ue03f > \u093f;" // VOWEL SIGN I
|
||||
"\ue040 > \u0940;" // VOWEL SIGN II
|
||||
"\ue041 > \u0941;" // VOWEL SIGN U
|
||||
"\ue042 > \u0942;" // VOWEL SIGN UU
|
||||
"\ue043 > \u0943;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045 > \u0945;" // VOWEL SIGN CANDRA E
|
||||
//\ue046 > \u0946; # UNMAPPED VOWEL SIGN SHORT E
|
||||
"\ue046 > \u0947;"
|
||||
"\ue047 > \u0947;" // VOWEL SIGN E
|
||||
"\ue048 > \u0948;" // VOWEL SIGN AI
|
||||
"\ue049 > \u0949;" // VOWEL SIGN CANDRA O
|
||||
//\ue04a > \u094a; # UNMAPPED VOWEL SIGN SHORT O
|
||||
"\ue04a > \u094b;"
|
||||
"\ue04b > \u094b;" // VOWEL SIGN O
|
||||
"\ue04c > \u094c;" // VOWEL SIGN AU
|
||||
"\ue04d > \u094d;" // SIGN VIRAMA
|
||||
"\ue050 > \u0950;" // OM
|
||||
// \u0951 # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952 # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953 # UNMAPPED GRAVE ACCENT
|
||||
// \u0954 # UNMAPPED ACUTE ACCENT
|
||||
"\ue058 > \u0958;" // LETTER QA (For Urdu)
|
||||
"\ue059 > \u0959;" // LETTER KHHA (For Urdu)
|
||||
"\ue05a > \u095a;" // LETTER GHHA (For Urdu)
|
||||
"\ue05b > \u095b;" // LETTER ZA (For Urdu)
|
||||
"\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA)
|
||||
"\ue05d > \u095d;" // LETTER RHA (pronounced RRHA)
|
||||
"\ue05e > \u095e;" // LETTER FA
|
||||
"\ue05f > \u095f;" // LETTER YYA
|
||||
"\ue060 > \u0960;" // LETTER VOCALIC RR
|
||||
"\ue061 > \u0961;" // LETTER VOCALIC LL
|
||||
"\ue062 > \u0962;" // VOWEL SIGN VOCALIC L
|
||||
"\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL
|
||||
// > ; \u0964 # UNMAPPED Devanagari-InterIndic: DANDA
|
||||
// > ; \u0965 # UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
|
||||
"\ue066 > \u0966;" // DIGIT ZERO
|
||||
"\ue067 > \u0967;" // DIGIT ONE
|
||||
"\ue068 > \u0968;" // DIGIT TWO
|
||||
"\ue069 > \u0969;" // DIGIT THREE
|
||||
"\ue06a > \u096a;" // DIGIT FOUR
|
||||
"\ue06b > \u096b;" // DIGIT FIVE
|
||||
"\ue06c > \u096c;" // DIGIT SIX
|
||||
"\ue06d > \u096d;" // DIGIT SEVEN
|
||||
"\ue06e > \u096e;" // DIGIT EIGHT
|
||||
"\ue06f > \u096f;" // DIGIT NINE
|
||||
// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
// :: NFC;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Gujr.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Gujr.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Gujarati.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Gujarati
|
||||
|
||||
translit_InterIndic_Gujarati {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gujarati.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Gujarati
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:41:59 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Gujarati
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0a81;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0a82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0a83;" // SIGN VISARGA
|
||||
"\ue005>\u0a85;" // LETTER A
|
||||
"\ue006>\u0a86;" // LETTER AA
|
||||
"\ue007>\u0a87;" // LETTER I
|
||||
"\ue008>\u0a88;" // LETTER II
|
||||
"\ue009>\u0a89;" // LETTER U
|
||||
"\ue00a>\u0a8a;" // LETTER UU
|
||||
"\ue00b>\u0a8b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0a8c>\u0ab2\u0ac3 = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R
|
||||
// \ue00f>; # UNMAPPED InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
|
||||
"\ue010>\u0a90;" // LETTER AI
|
||||
// \ue013>; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
|
||||
"\ue014>\u0a94;" // LETTER AU
|
||||
"\ue015>\u0a95;" // LETTER KA
|
||||
"\ue016>\u0a96;" // LETTER KHA
|
||||
"\ue017>\u0a97;" // LETTER GA
|
||||
"\ue018>\u0a98;" // LETTER GHA
|
||||
"\ue019>\u0a99;" // LETTER NGA
|
||||
"\ue01a>\u0a9a;" // LETTER CA
|
||||
"\ue01b>\u0a9b;" // LETTER CHA
|
||||
"\ue01c>\u0a9c;" // LETTER JA
|
||||
"\ue01d>\u0a9d;" // LETTER JHA
|
||||
"\ue01e>\u0a9e;" // LETTER NYA
|
||||
"\ue01f>\u0a9f;" // LETTER TTA
|
||||
"\ue020>\u0aa0;" // LETTER TTHA
|
||||
"\ue021>\u0aa1;" // LETTER DDA
|
||||
"\ue022>\u0aa2;" // LETTER DDHA
|
||||
"\ue023>\u0aa3;" // LETTER NNA
|
||||
"\ue024>\u0aa4;" // LETTER TA
|
||||
"\ue025>\u0aa5;" // LETTER THA
|
||||
"\ue026>\u0aa6;" // LETTER DA
|
||||
"\ue027>\u0aa7;" // LETTER DHA
|
||||
"\ue028>\u0aa8;" // LETTER NA
|
||||
"\ue029>\u0aa8;" // REMAP (indicExceptions.txt): \u0aa9>\u0aa8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0aaa;" // LETTER PA
|
||||
"\ue02b>\u0aab;" // LETTER PHA
|
||||
"\ue02c>\u0aac;" // LETTER BA
|
||||
"\ue02d>\u0aad;" // LETTER BHA
|
||||
"\ue02e>\u0aae;" // LETTER MA
|
||||
"\ue02f>\u0aaf;" // LETTER YA
|
||||
"\ue030>\u0ab0;" // LETTER RA
|
||||
"\ue032>\u0ab2;" // LETTER LA
|
||||
"\ue033>\u0ab3;" // LETTER LLA
|
||||
"\ue034>\u0ab3;" // REMAP (indicExceptions.txt): \u0ab4>\u0ab3 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0ab5;" // LETTER VA
|
||||
"\ue036>\u0ab6;" // LETTER SHA
|
||||
"\ue037>\u0ab7;" // LETTER SSA
|
||||
"\ue038>\u0ab8;" // LETTER SA
|
||||
"\ue039>\u0ab9;" // LETTER HA
|
||||
"\ue03c>\u0abc;" // SIGN NUKTA
|
||||
"\ue03d>\u0abd;" // SIGN AVAGRAHA
|
||||
"\ue03e>\u0abe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0abf;" // VOWEL SIGN I
|
||||
"\ue040>\u0ac0;" // VOWEL SIGN II
|
||||
"\ue041>\u0ac1;" // VOWEL SIGN U
|
||||
"\ue042>\u0ac2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E
|
||||
// \ue047>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
|
||||
"\ue048>\u0ac8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O
|
||||
// \ue04b>; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
|
||||
"\ue04c>\u0acc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0acd;" // SIGN VIRAMA
|
||||
"\ue050>\u0ad0;" // OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
|
||||
"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
|
||||
"\ue060>\u0ae0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0ab2\u0ac3;" // REMAP (indicExceptions.txt): \u0ae1>\u0ab2\u0ac3 = LETTER VOCALIC LL>LETTER LA.VOWEL SIGN VOCALIC R
|
||||
"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
"\ue066>\u0ae6;" // DIGIT ZERO
|
||||
"\ue067>\u0ae7;" // DIGIT ONE
|
||||
"\ue068>\u0ae8;" // DIGIT TWO
|
||||
"\ue069>\u0ae9;" // DIGIT THREE
|
||||
"\ue06a>\u0aea;" // DIGIT FOUR
|
||||
"\ue06b>\u0aeb;" // DIGIT FIVE
|
||||
"\ue06c>\u0aec;" // DIGIT SIX
|
||||
"\ue06d>\u0aed;" // DIGIT SEVEN
|
||||
"\ue06e>\u0aee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0aef;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
|
||||
"\ue00f>\u0a8f;" // LETTER E
|
||||
"\ue013>\u0a93;" // LETTER O
|
||||
// \ue083>; # UNMAPPED InterIndic-Gujarati: LETTER RRA (\u0a83 = SIGN VISARGA)
|
||||
"\ue047>\u0ac7;" // VOWEL SIGN E
|
||||
"\ue04b>\u0acb;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Guru.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Guru.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Gurmukhi.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Gurmukhi
|
||||
|
||||
translit_InterIndic_Gurmukhi {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Gurmukhi.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Gurmukhi
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:00 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Gurmukhi
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0a02;" // REMAP (indicExceptions.txt): \u0a01>\u0a02 = SIGN CANDRABINDU>SIGN BINDI
|
||||
// \ue002>; # UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (\u0a02 = SIGN BINDI)
|
||||
// \ue003>; # UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA
|
||||
"\ue005>\u0a05;" // LETTER A
|
||||
"\ue006>\u0a06;" // LETTER AA
|
||||
"\ue007>\u0a07;" // LETTER I
|
||||
"\ue008>\u0a08;" // LETTER II
|
||||
"\ue009>\u0a09;" // LETTER U
|
||||
"\ue00a>\u0a0a;" // LETTER UU
|
||||
"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
"\ue00c>\u0a07;" // REMAP (indicExceptions.txt): \u0a0c>\u0a07 = LETTER VOCALIC L>LETTER I
|
||||
"\ue00f>\u0a0f;" // LETTER EE
|
||||
"\ue010>\u0a10;" // LETTER AI
|
||||
"\ue013>\u0a13;" // LETTER OO
|
||||
"\ue014>\u0a14;" // LETTER AU
|
||||
"\ue015>\u0a15;" // LETTER KA
|
||||
"\ue016>\u0a16;" // LETTER KHA
|
||||
"\ue017>\u0a17;" // LETTER GA
|
||||
"\ue018>\u0a18;" // LETTER GHA
|
||||
"\ue019>\u0a19;" // LETTER NGA
|
||||
"\ue01a>\u0a1a;" // LETTER CA
|
||||
"\ue01b>\u0a1b;" // LETTER CHA
|
||||
"\ue01c>\u0a1c;" // LETTER JA
|
||||
"\ue01d>\u0a1d;" // LETTER JHA
|
||||
"\ue01e>\u0a1e;" // LETTER NYA
|
||||
"\ue01f>\u0a1f;" // LETTER TTA
|
||||
"\ue020>\u0a20;" // LETTER TTHA
|
||||
"\ue021>\u0a21;" // LETTER DDA
|
||||
"\ue022>\u0a22;" // LETTER DDHA
|
||||
"\ue023>\u0a23;" // LETTER NNA
|
||||
"\ue024>\u0a24;" // LETTER TA
|
||||
"\ue025>\u0a25;" // LETTER THA
|
||||
"\ue026>\u0a26;" // LETTER DA
|
||||
"\ue027>\u0a27;" // LETTER DHA
|
||||
"\ue028>\u0a28;" // LETTER NA
|
||||
"\ue029>\u0a28;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0a2a;" // LETTER PA
|
||||
"\ue02b>\u0a2b;" // LETTER PHA
|
||||
"\ue02c>\u0a2c;" // LETTER BA
|
||||
"\ue02d>\u0a2d;" // LETTER BHA
|
||||
"\ue02e>\u0a2e;" // LETTER MA
|
||||
"\ue02f>\u0a2f;" // LETTER YA
|
||||
"\ue030>\u0a30;" // LETTER RA
|
||||
"\ue032>\u0a32;" // LETTER LA
|
||||
"\ue033>\u0a33;" // LETTER LLA
|
||||
"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0a35;" // LETTER VA
|
||||
"\ue036>\u0a36;" // LETTER SHA
|
||||
"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
|
||||
"\ue038>\u0a38;" // LETTER SA
|
||||
"\ue039>\u0a39;" // LETTER HA
|
||||
"\ue03c>\u0a3c;" // SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA
|
||||
"\ue03e>\u0a3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0a3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0a40;" // VOWEL SIGN II
|
||||
"\ue041>\u0a41;" // VOWEL SIGN U
|
||||
"\ue042>\u0a42;" // VOWEL SIGN UU
|
||||
// \ue043>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R
|
||||
// \ue044>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
|
||||
"\ue047>\u0a47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0a48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
|
||||
"\ue04b>\u0a4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0a4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0a4d;" // SIGN VIRAMA
|
||||
// \ue050>; # UNMAPPED InterIndic-Gurmukhi: OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Gurmukhi: LENGTH MARK
|
||||
"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0a59;" // LETTER KHHA
|
||||
"\ue05a>\u0a5a;" // LETTER GHHA
|
||||
"\ue05b>\u0a5b;" // LETTER ZA
|
||||
"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
"\ue05e>\u0a5e;" // LETTER FA
|
||||
"\ue05f>\u0a2f;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
"\ue061>\u0a08\u0a3c;" // REMAP (indicExceptions.txt): \u0a61>\u0a08\u0a3c = LETTER VOCALIC LL>LETTER II.SIGN NUKTA
|
||||
"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
"\ue066>\u0a66;" // DIGIT ZERO
|
||||
"\ue067>\u0a67;" // DIGIT ONE
|
||||
"\ue068>\u0a68;" // DIGIT TWO
|
||||
"\ue069>\u0a69;" // DIGIT THREE
|
||||
"\ue06a>\u0a6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0a6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0a6c;" // DIGIT SIX
|
||||
"\ue06d>\u0a6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0a6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0a6f;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Gurmukhi: ISSHAR
|
||||
// \ue081>; # UNMAPPED InterIndic-Gurmukhi: LETTER E
|
||||
// \ue082>; # UNMAPPED InterIndic-Gurmukhi: LETTER O (\u0a02 = SIGN BINDI)
|
||||
"\ue05c>\u0a5c;" // LETTER RRA
|
||||
// \ue084>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E
|
||||
// \ue085>; # UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (\u0a05 = LETTER A)
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Knda.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Knda.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Kannada.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Kannada
|
||||
|
||||
translit_InterIndic_Kannada {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Kannada.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Kannada
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:00 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Kannada
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
"\ue002>\u0c82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0c83;" // SIGN VISARGA
|
||||
"\ue005>\u0c85;" // LETTER A
|
||||
"\ue006>\u0c86;" // LETTER AA
|
||||
"\ue007>\u0c87;" // LETTER I
|
||||
"\ue008>\u0c88;" // LETTER II
|
||||
"\ue009>\u0c89;" // LETTER U
|
||||
"\ue00a>\u0c8a;" // LETTER UU
|
||||
"\ue00b>\u0c8b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0c8c;" // LETTER VOCALIC L
|
||||
"\ue00f>\u0c8f;" // LETTER EE
|
||||
"\ue010>\u0c90;" // LETTER AI
|
||||
"\ue013>\u0c93;" // LETTER OO
|
||||
"\ue014>\u0c94;" // LETTER AU
|
||||
"\ue015>\u0c95;" // LETTER KA
|
||||
"\ue016>\u0c96;" // LETTER KHA
|
||||
"\ue017>\u0c97;" // LETTER GA
|
||||
"\ue018>\u0c98;" // LETTER GHA
|
||||
"\ue019>\u0c99;" // LETTER NGA
|
||||
"\ue01a>\u0c9a;" // LETTER CA
|
||||
"\ue01b>\u0c9b;" // LETTER CHA
|
||||
"\ue01c>\u0c9c;" // LETTER JA
|
||||
"\ue01d>\u0c9d;" // LETTER JHA
|
||||
"\ue01e>\u0c9e;" // LETTER NYA
|
||||
"\ue01f>\u0c9f;" // LETTER TTA
|
||||
"\ue020>\u0ca0;" // LETTER TTHA
|
||||
"\ue021>\u0ca1;" // LETTER DDA
|
||||
"\ue022>\u0ca2;" // LETTER DDHA
|
||||
"\ue023>\u0ca3;" // LETTER NNA
|
||||
"\ue024>\u0ca4;" // LETTER TA
|
||||
"\ue025>\u0ca5;" // LETTER THA
|
||||
"\ue026>\u0ca6;" // LETTER DA
|
||||
"\ue027>\u0ca7;" // LETTER DHA
|
||||
"\ue028>\u0ca8;" // LETTER NA
|
||||
"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0caa;" // LETTER PA
|
||||
"\ue02b>\u0cab;" // LETTER PHA
|
||||
"\ue02c>\u0cac;" // LETTER BA
|
||||
"\ue02d>\u0cad;" // LETTER BHA
|
||||
"\ue02e>\u0cae;" // LETTER MA
|
||||
"\ue02f>\u0caf;" // LETTER YA
|
||||
"\ue030>\u0cb0;" // LETTER RA
|
||||
"\ue032>\u0cb2;" // LETTER LA
|
||||
"\ue033>\u0cb3;" // LETTER LLA
|
||||
"\ue034>\u0cb3;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0cb5;" // LETTER VA
|
||||
"\ue036>\u0cb6;" // LETTER SHA
|
||||
"\ue037>\u0cb7;" // LETTER SSA
|
||||
"\ue038>\u0cb8;" // LETTER SA
|
||||
"\ue039>\u0cb9;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Kannada: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA
|
||||
"\ue03e>\u0cbe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0cbf;" // VOWEL SIGN I
|
||||
"\ue040>\u0cc0;" // VOWEL SIGN II
|
||||
"\ue041>\u0cc1;" // VOWEL SIGN U
|
||||
"\ue042>\u0cc2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\ue047>\u0cc7;" // VOWEL SIGN EE
|
||||
"\ue048>\u0cc8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\ue04b>\u0ccb;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0ccc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0ccd;" // SIGN VIRAMA
|
||||
"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
|
||||
"\ue055>\u0cd5;" // LENGTH MARK
|
||||
"\ue056>\u0cd6;" // AI LENGTH MARK
|
||||
"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0cde;" // LETTER FA
|
||||
"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0ce0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0ce1;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
"\ue066>\u0ce6;" // DIGIT ZERO
|
||||
"\ue067>\u0ce7;" // DIGIT ONE
|
||||
"\ue068>\u0ce8;" // DIGIT TWO
|
||||
"\ue069>\u0ce9;" // DIGIT THREE
|
||||
"\ue06a>\u0cea;" // DIGIT FOUR
|
||||
"\ue06b>\u0ceb;" // DIGIT FIVE
|
||||
"\ue06c>\u0cec;" // DIGIT SIX
|
||||
"\ue06d>\u0ced;" // DIGIT SEVEN
|
||||
"\ue06e>\u0cee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0cef;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Kannada: ISSHAR
|
||||
"\ue00e>\u0c8e;" // LETTER E
|
||||
"\ue012>\u0c92;" // LETTER O
|
||||
"\ue031>\u0cb1;" // LETTER RRA
|
||||
"\ue046>\u0cc6;" // VOWEL SIGN E
|
||||
"\ue04a>\u0cca;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
385
icu4c/source/data/translit/t_InterIndic_Latn.txt
Normal file
385
icu4c/source/data/translit/t_InterIndic_Latn.txt
Normal file
@ -0,0 +1,385 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Latin.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Latin
|
||||
|
||||
translit_InterIndic_Latin {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2001-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Latin
|
||||
// :: NFD (NFC) ;
|
||||
//\u0e00 reserved
|
||||
//consonants
|
||||
"$chandrabindu=\ue001;"
|
||||
"$anusvara=\ue002;"
|
||||
"$visarga=\ue003;"
|
||||
//\u0e004 reserved
|
||||
// w<vowel> represents the stand-alone form
|
||||
"$wa=\ue005;"
|
||||
"$waa=\ue006;"
|
||||
"$wi=\ue007;"
|
||||
"$wii=\ue008;"
|
||||
"$wu=\ue009;"
|
||||
"$wuu=\ue00a;"
|
||||
"$wr=\ue00b;"
|
||||
"$wl=\ue00c;"
|
||||
"$wce=\ue00d;" // LETTER CANDRA E
|
||||
"$wse=\ue00e;" // LETTER SHORT E
|
||||
"$we=\ue00f;" // \u090f LETTER E
|
||||
"$wai=\ue010;"
|
||||
"$wco=\ue011;" // LETTER CANDRA O
|
||||
"$wso=\ue012;" // LETTER SHORT O
|
||||
"$wo=\ue013;" // \u0913 LETTER O
|
||||
"$wau=\ue014;"
|
||||
"$ka=\ue015;"
|
||||
"$kha=\ue016;"
|
||||
"$ga=\ue017;"
|
||||
"$gha=\ue018;"
|
||||
"$nga=\ue019;"
|
||||
"$ca=\ue01a;"
|
||||
"$cha=\ue01b;"
|
||||
"$ja=\ue01c;"
|
||||
"$jha=\ue01d;"
|
||||
"$nya=\ue01e;"
|
||||
"$tta=\ue01f;"
|
||||
"$ttha=\ue020;"
|
||||
"$dda=\ue021;"
|
||||
"$ddha=\ue022;"
|
||||
"$nna=\ue023;"
|
||||
"$ta=\ue024;"
|
||||
"$tha=\ue025;"
|
||||
"$da=\ue026;"
|
||||
"$dha=\ue027;"
|
||||
"$na=\ue028;"
|
||||
"$ena=\ue029;" //compatibility
|
||||
"$pa=\ue02a;"
|
||||
"$pha=\ue02b;"
|
||||
"$ba=\ue02c;"
|
||||
"$bha=\ue02d;"
|
||||
"$ma=\ue02e;"
|
||||
"$ya=\ue02f;"
|
||||
"$ra=\ue030;"
|
||||
"$rra=\ue031;"
|
||||
"$la=\ue032;"
|
||||
"$lla=\ue033;"
|
||||
"$ela=\ue034;" //compatibility
|
||||
"$va=\ue035;"
|
||||
"$sha=\ue036;"
|
||||
"$ssa=\ue037;"
|
||||
"$sa=\ue038;"
|
||||
"$ha=\ue039;"
|
||||
//\u093a Reserved
|
||||
//\u093b Reserved
|
||||
"$nukta=\ue03c;"
|
||||
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
||||
// <vowel> represents the dependent form
|
||||
"$aa=\ue03e;"
|
||||
"$i=\ue03f;"
|
||||
"$ii=\ue040;"
|
||||
"$u=\ue041;"
|
||||
"$uu=\ue042;"
|
||||
"$rh=\ue043;"
|
||||
"$lh=\ue044;"
|
||||
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
||||
"$se=\ue046;" //VOWEL SIGN SHORT E
|
||||
"$e=\ue047;"
|
||||
"$ai=\ue048;"
|
||||
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
||||
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
||||
"$o=\ue04b;" // \u094b
|
||||
"$au=\ue04c;"
|
||||
"$virama=\ue04d;"
|
||||
// \u094e Reserved
|
||||
// \u094f Reserved
|
||||
//\u0950>\ue050; # OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"$lm = \ue055;"// Telugu Length Mark
|
||||
"$ailm=\ue056;"// AI Length Mark
|
||||
"$aulm=\ue057;"// AU Length Mark
|
||||
//urdu compatibity forms
|
||||
"$uka=\ue058;"
|
||||
"$ukha=\ue059;"
|
||||
"$ugha=\ue05a;"
|
||||
"$ujha=\ue05b;"
|
||||
"$uddha=\ue05c;"
|
||||
"$udha=\ue05d;"
|
||||
"$ufa=\ue05e;"
|
||||
"$uya=\ue05f;"
|
||||
"$wrr=\ue060;"
|
||||
"$wll=\ue061;"
|
||||
"$rrh=\ue062;"
|
||||
"$llh=\ue063;"
|
||||
"$danda=\ue064;"
|
||||
"$doubleDanda=\ue065;"
|
||||
"$zero=\ue066;" // DIGIT ZERO
|
||||
"$one=\ue067;" // DIGIT ONE
|
||||
"$two=\ue068;" // DIGIT TWO
|
||||
"$three=\ue069;" // DIGIT THREE
|
||||
"$four=\ue06a;" // DIGIT FOUR
|
||||
"$five=\ue06b;" // DIGIT FIVE
|
||||
"$six=\ue06c;" // DIGIT SIX
|
||||
"$seven=\ue06d;" // DIGIT SEVEN
|
||||
"$eight=\ue06e;" // DIGIT EIGHT
|
||||
"$nine=\ue06f;" // DIGIT NINE
|
||||
// For all other scripts
|
||||
"$ecp0=\ue070;"
|
||||
"$ecp1=\ue071;"
|
||||
"$ecp2=\ue072;"
|
||||
"$ecp3=\ue073;"
|
||||
"$ecp4=\ue074;"
|
||||
"$ecp5=\ue075;"
|
||||
"$ecp6=\ue076;"
|
||||
"$ecp7=\ue077;"
|
||||
"$ecp8=\ue078;"
|
||||
"$ecp9=\ue079;"
|
||||
"$ecpA=\ue07a;"
|
||||
"$ecpB=\ue07b;"
|
||||
"$ecpC=\ue07c;"
|
||||
"$ecpD=\ue07d;"
|
||||
"$ecpE=\ue07e;"
|
||||
"$ecpF=\ue07f;"
|
||||
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
||||
"$depVowelBelow=[\ue041-\ue044];"
|
||||
"$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];"
|
||||
// $x was originally called '&'; $z was '%'
|
||||
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
||||
"$z=[bcdfghjklmnpqrstvwxyz];"
|
||||
"$consonants=[$ka-$ha $virama];"
|
||||
//#####################################################################
|
||||
// convert from Native letters to Latin letters
|
||||
//#####################################################################
|
||||
//transliterations for anusvara
|
||||
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
|
||||
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
|
||||
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
|
||||
"$anusvara} [$ta$tha$da$dha$na] > n ;"
|
||||
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
|
||||
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
|
||||
"$anusvara>'-'m\u0307;"
|
||||
// normal consonants
|
||||
"$cha}$x>ch;"
|
||||
"$cha>cha;"
|
||||
"$ca$virama}$ha>c'';"
|
||||
"$ca}$x>c;"
|
||||
"$ca>ca;"
|
||||
"$jha}$x>jh;"
|
||||
"$jha>jha;"
|
||||
"$ja$virama}$ha>j'';"
|
||||
"$ja}$x>j;"
|
||||
"$ja>ja;"
|
||||
//$nya}$x>ny;
|
||||
//$nya>nya;
|
||||
"$nya }$x>n\u0303 ;"
|
||||
"$nya > n\u0303a ;"
|
||||
"$ttha}$x>t\u0323h;"
|
||||
"$tta$virama}$ha>t\u0323'';"
|
||||
"$tta}$x>t\u0323;"
|
||||
"$ddha}$x>d\u0323h;"
|
||||
"$dda}$x$ha>d\u0323'';"
|
||||
"$dda}$x>d\u0323;"
|
||||
"$dha}$x>dh;"
|
||||
"$da$virama}$ha>d'';"
|
||||
"$da$virama}$ddha>d'';"
|
||||
"$da$virama}$dda>d'';"
|
||||
"$da$virama}$dha>d'';"
|
||||
//$da$virama}$da>dda;
|
||||
"$da}$x>d;"
|
||||
"$tha}$x>th;"
|
||||
"$ta$virama}$ha>t'';"
|
||||
"$ta$virama}$ttha>t'';"
|
||||
"$ta$virama}$tta>t'';"
|
||||
"$ta$virama}$tha>t'';"
|
||||
"$tta>t\u0323a;"
|
||||
"$ttha>t\u0323ha;"
|
||||
//$ta$virama}$ta>tta;
|
||||
"$ta}$x>t;"
|
||||
"$tha>tha;"
|
||||
"$ta>ta;"
|
||||
"$dda>d\u0323a;"
|
||||
"$dha>dha;"
|
||||
"$ddha>d\u0323ha;"
|
||||
"$da>da;"
|
||||
"$nna}$x>n\u0323 ;"
|
||||
"$nna>n\u0323a ;"
|
||||
"$na$virama}$ga>n'';"
|
||||
"$na$virama}$ya>n'';"
|
||||
"$na}$x>n;"
|
||||
"$na>na;"
|
||||
"$kha}$x>kh;"
|
||||
"$kha>kha;"
|
||||
"$ka$virama}$ha>k'';"
|
||||
"$ka}$x>k;"
|
||||
"$ka>ka;"
|
||||
"$gha}$x>gh;"
|
||||
"$gha>gha;"
|
||||
"$ga$virama}$ha>g'';"
|
||||
"$ga}$x>g;"
|
||||
"$ga>ga;"
|
||||
//ng<$nga}$x;
|
||||
//nga<$nga;
|
||||
"$nga}$x>n\u0307;"
|
||||
"$nga>n\u0307a ;"
|
||||
"$pha}$x>ph;"
|
||||
"$pha>pha;"
|
||||
"$pa$virama}$ha>p'';"
|
||||
"$pa}$x>p;"
|
||||
"$pa>pa;"
|
||||
"$bha}$x>bh;"
|
||||
"$bha>bha;"
|
||||
"$ba$virama}$ha>b'';"
|
||||
"$ba}$x>b;"
|
||||
"$ba>ba;"
|
||||
"$ma$virama}$ma>m'';"
|
||||
//$ma$virama}$anusvara>m'';
|
||||
"$ma}$x>m;"
|
||||
"$ma>ma;"
|
||||
"$ya}$x>y;"
|
||||
"$ya>ya;"
|
||||
"$ra$virama}$ha>r'';"
|
||||
"$ra}$x>r;"
|
||||
"$ra>ra;"
|
||||
"$la$virama}$ha>l'';"
|
||||
"$la}$x>l;"
|
||||
"$la>la;"
|
||||
"$lla$virama}$ha>l\u0323'';"
|
||||
"$lla}$x>l\u0323;"
|
||||
"$lla>l\u0323a;"
|
||||
"$va}$x>v;"
|
||||
"$va>va;"
|
||||
"$sha}$x>s\u0301;"
|
||||
"$ssa}$x>s\u0323;"
|
||||
"$sa$virama}$ha>s'';"
|
||||
"$sa$virama}$sha>s'';"
|
||||
"$sa$virama}$ssa>s'';"
|
||||
"$sa$virama}$sa>s'';"
|
||||
"$sa}$x>s;"
|
||||
"$sha>s\u0301a;"
|
||||
"$ssa>s\u0323a;"
|
||||
"$sa>sa;"
|
||||
"$ha}$x>h;"
|
||||
"$ha>ha;"
|
||||
// Urdu compatibility
|
||||
"$uya}$x > y\u0307 ;"
|
||||
"$uya > y\u0307a ;"
|
||||
"$ela}$x > l\u0331 ;"
|
||||
"$ela > l\u0331a ;"
|
||||
"$ena}$x > n\u0331 ;"
|
||||
"$ena > n\u0331a ;"
|
||||
"$uka}$x > q ;"
|
||||
"$uka > qa ;"
|
||||
"$ukha}$x > k\u0323 ;"
|
||||
"$ukha > k\u0323a ;"
|
||||
"$ugha}$x > g\u0307 ;"
|
||||
"$ugha > g\u0307a ;"
|
||||
"$ujha}$x > z ;"
|
||||
"$ujha > za ;"
|
||||
"$udha}$x > r\u0323h ;"
|
||||
"$udha > r\u0323ha;"
|
||||
"$uddha}$x> r\u0323 ;"
|
||||
"$uddha > r\u0323a ;"
|
||||
"$ufa}$x > f\u0323 ;"
|
||||
"$ufa > f\u0323a ;"
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
"$aa > a\u0304 ;"
|
||||
"$ai > ai ;"
|
||||
"$au > au ;"
|
||||
"$ii > i\u0304 ;"
|
||||
"$i > i ;"
|
||||
"$uu > u\u0304 ;"
|
||||
"$u > u ;"
|
||||
"$rrh > r\u0325\u0304 ;"
|
||||
"$rh}$consonants>r\u0325;"
|
||||
"$rh > r\u0325a ;"
|
||||
"$llh > l\u0325\u0304 ;"
|
||||
"$lh > l\u0325 ;"
|
||||
"$e > e\u0304 ;"
|
||||
"$o > o\u0304 ;"
|
||||
//extra vowels
|
||||
"$ce > e\u0306 ;"
|
||||
"$co > o\u0306 ;"
|
||||
"$se > e ;"
|
||||
"$so > o ;"
|
||||
// independent vowels (when following consonants)
|
||||
"a}$waa > ''a\u0304 ;"
|
||||
"$z}$waa > ''a\u0304 ;"
|
||||
"a}$wai > ''ai ;"
|
||||
"$z}$wai > ''ai ;"
|
||||
"a}$wau > ''au ;"
|
||||
"$z}$wau > ''au ;"
|
||||
"a}$wii > ''i\u0304 ;"
|
||||
"$z}$wii > ''i\u0304 ;"
|
||||
"a}$wi > ''i ;"
|
||||
"$z}$wi > ''i ;"
|
||||
"a}$wuu > ''u\u0304 ;"
|
||||
"$z}$wuu > ''u\u0304 ;"
|
||||
"a}$wu > ''u ;"
|
||||
"$z}$wu > ''u ;"
|
||||
"$z}$wrr > ''r\u0325\u0304 ;"
|
||||
"$z}$wr > ''r\u0325 ;"
|
||||
"$z}$wll > ''l\u0325\u0304 ;"
|
||||
"$z}$wl > ''l\u0325 ;"
|
||||
"$z}$we > ''e\u0304 ;"
|
||||
"$z}$wo > ''o\u0304 ;"
|
||||
"a}$wa > ''a ;"
|
||||
"$z}$wa > ''a ;"
|
||||
//extra vowels
|
||||
"$z}$wce > ''e\u0306 ;"
|
||||
"$z}$wco > ''o\u0306 ;"
|
||||
"$z}$wse > ''e ;"
|
||||
"$z}$wso > ''o ;"
|
||||
// independent vowels (otherwise)
|
||||
"$waa > a\u0304 ;"
|
||||
"$wai > ai ;"
|
||||
"$wau > au ;"
|
||||
"$wii > i\u0304 ;"
|
||||
"$wi > i ;"
|
||||
"$wuu > u\u0304 ;"
|
||||
"$wu > u ;"
|
||||
"$wrr > r\u0325\u0304 ;"
|
||||
"$wr > r\u0325 ;"
|
||||
"$wll > l\u0325\u0304 ;"
|
||||
"$wl > l\u0325 ;"
|
||||
"$we > e\u0304 ;"
|
||||
"$wo > o\u0304 ;"
|
||||
"$wa > a ;"
|
||||
//extra vowels
|
||||
"$wce > e\u0306 ;"
|
||||
"$wco > o\u0306 ;"
|
||||
"$wse > e ;"
|
||||
"$wso > o ;"
|
||||
//stress marks
|
||||
"$avagraha > \u0315;"
|
||||
"$chandrabindu$anusvara>'-'\u0303;"
|
||||
"$chandrabindu > '-'m\u0310;"
|
||||
"$visarga>'-'h\u0323;"
|
||||
//numbers
|
||||
"$zero > 0;"
|
||||
"$one > 1;"
|
||||
"$two > 2;"
|
||||
"$three > 3;"
|
||||
"$four > 4;"
|
||||
"$five > 5;"
|
||||
"$six > 6;"
|
||||
"$seven > 7;"
|
||||
"$eight > 8;"
|
||||
"$nine > 9;"
|
||||
// blow away any remaining viramas
|
||||
"$virama>;"
|
||||
// :: NFC;
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Mlym.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Mlym.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Malayalam.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Malayalam
|
||||
|
||||
translit_InterIndic_Malayalam {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Malayalam.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Malayalam
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:00 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Malayalam
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
"\ue002>\u0d02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0d03;" // SIGN VISARGA
|
||||
"\ue005>\u0d05;" // LETTER A
|
||||
"\ue006>\u0d06;" // LETTER AA
|
||||
"\ue007>\u0d07;" // LETTER I
|
||||
"\ue008>\u0d08;" // LETTER II
|
||||
"\ue009>\u0d09;" // LETTER U
|
||||
"\ue00a>\u0d0a;" // LETTER UU
|
||||
"\ue00b>\u0d0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0d0c;" // LETTER VOCALIC L
|
||||
"\ue00f>\u0d0f;" // LETTER EE
|
||||
"\ue010>\u0d10;" // LETTER AI
|
||||
"\ue013>\u0d13;" // LETTER OO
|
||||
"\ue014>\u0d14;" // LETTER AU
|
||||
"\ue015>\u0d15;" // LETTER KA
|
||||
"\ue016>\u0d16;" // LETTER KHA
|
||||
"\ue017>\u0d17;" // LETTER GA
|
||||
"\ue018>\u0d18;" // LETTER GHA
|
||||
"\ue019>\u0d19;" // LETTER NGA
|
||||
"\ue01a>\u0d1a;" // LETTER CA
|
||||
"\ue01b>\u0d1b;" // LETTER CHA
|
||||
"\ue01c>\u0d1c;" // LETTER JA
|
||||
"\ue01d>\u0d1d;" // LETTER JHA
|
||||
"\ue01e>\u0d1e;" // LETTER NYA
|
||||
"\ue01f>\u0d1f;" // LETTER TTA
|
||||
"\ue020>\u0d20;" // LETTER TTHA
|
||||
"\ue021>\u0d21;" // LETTER DDA
|
||||
"\ue022>\u0d22;" // LETTER DDHA
|
||||
"\ue023>\u0d23;" // LETTER NNA
|
||||
"\ue024>\u0d24;" // LETTER TA
|
||||
"\ue025>\u0d25;" // LETTER THA
|
||||
"\ue026>\u0d26;" // LETTER DA
|
||||
"\ue027>\u0d27;" // LETTER DHA
|
||||
"\ue028>\u0d28;" // LETTER NA
|
||||
"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0d2a;" // LETTER PA
|
||||
"\ue02b>\u0d2b;" // LETTER PHA
|
||||
"\ue02c>\u0d2c;" // LETTER BA
|
||||
"\ue02d>\u0d2d;" // LETTER BHA
|
||||
"\ue02e>\u0d2e;" // LETTER MA
|
||||
"\ue02f>\u0d2f;" // LETTER YA
|
||||
"\ue030>\u0d30;" // LETTER RA
|
||||
"\ue032>\u0d32;" // LETTER LA
|
||||
"\ue033>\u0d33;" // LETTER LLA
|
||||
"\ue034>\u0d34;" // LETTER LLLA
|
||||
"\ue035>\u0d35;" // LETTER VA
|
||||
"\ue036>\u0d36;" // LETTER SHA
|
||||
"\ue037>\u0d37;" // LETTER SSA
|
||||
"\ue038>\u0d38;" // LETTER SA
|
||||
"\ue039>\u0d39;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Malayalam: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA
|
||||
"\ue03e>\u0d3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0d3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0d40;" // VOWEL SIGN II
|
||||
"\ue041>\u0d41;" // VOWEL SIGN U
|
||||
"\ue042>\u0d42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R
|
||||
// \ue044>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
"\ue047>\u0d47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0d48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
|
||||
"\ue04b>\u0d4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0d4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0d4d;" // SIGN VIRAMA
|
||||
// \ue050>; # UNMAPPED InterIndic-Malayalam: OM
|
||||
// \ue055>; # UNMAPPED InterIndic-Malayalam: LENGTH MARK
|
||||
"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0d57;" // AU LENGTH MARK
|
||||
"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0d60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0d61;" // LETTER VOCALIC LL
|
||||
// \ue062>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L
|
||||
// \ue063>; # UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL
|
||||
"\ue066>\u0d66;" // DIGIT ZERO
|
||||
"\ue067>\u0d67;" // DIGIT ONE
|
||||
"\ue068>\u0d68;" // DIGIT TWO
|
||||
"\ue069>\u0d69;" // DIGIT THREE
|
||||
"\ue06a>\u0d6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0d6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0d6c;" // DIGIT SIX
|
||||
"\ue06d>\u0d6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0d6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0d6f;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Malayalam: ISSHAR
|
||||
"\ue00e>\u0d0e;" // LETTER E
|
||||
"\ue012>\u0d12;" // LETTER O
|
||||
"\ue031>\u0d31;" // LETTER RRA
|
||||
"\ue046>\u0d46;" // VOWEL SIGN E
|
||||
"\ue04a>\u0d4a;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Orya.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Orya.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Oriya.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Oriya
|
||||
|
||||
translit_InterIndic_Oriya {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Oriya.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Oriya
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:01 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Oriya
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0b01;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0b02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0b03;" // SIGN VISARGA
|
||||
"\ue005>\u0b05;" // LETTER A
|
||||
"\ue006>\u0b06;" // LETTER AA
|
||||
"\ue007>\u0b07;" // LETTER I
|
||||
"\ue008>\u0b08;" // LETTER II
|
||||
"\ue009>\u0b09;" // LETTER U
|
||||
"\ue00a>\u0b0a;" // LETTER UU
|
||||
"\ue00b>\u0b0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0b0c;" // LETTER VOCALIC L
|
||||
// \ue00f>; # UNMAPPED InterIndic-Oriya: LETTER EE (\u0b0f = LETTER E)
|
||||
"\ue010>\u0b10;" // LETTER AI
|
||||
// \ue013>; # UNMAPPED InterIndic-Oriya: LETTER OO (\u0b13 = LETTER O)
|
||||
"\ue014>\u0b14;" // LETTER AU
|
||||
"\ue015>\u0b15;" // LETTER KA
|
||||
"\ue016>\u0b16;" // LETTER KHA
|
||||
"\ue017>\u0b17;" // LETTER GA
|
||||
"\ue018>\u0b18;" // LETTER GHA
|
||||
"\ue019>\u0b19;" // LETTER NGA
|
||||
"\ue01a>\u0b1a;" // LETTER CA
|
||||
"\ue01b>\u0b1b;" // LETTER CHA
|
||||
"\ue01c>\u0b1c;" // LETTER JA
|
||||
"\ue01d>\u0b1d;" // LETTER JHA
|
||||
"\ue01e>\u0b1e;" // LETTER NYA
|
||||
"\ue01f>\u0b1f;" // LETTER TTA
|
||||
"\ue020>\u0b20;" // LETTER TTHA
|
||||
"\ue021>\u0b21;" // LETTER DDA
|
||||
"\ue022>\u0b22;" // LETTER DDHA
|
||||
"\ue023>\u0b23;" // LETTER NNA
|
||||
"\ue024>\u0b24;" // LETTER TA
|
||||
"\ue025>\u0b25;" // LETTER THA
|
||||
"\ue026>\u0b26;" // LETTER DA
|
||||
"\ue027>\u0b27;" // LETTER DHA
|
||||
"\ue028>\u0b28;" // LETTER NA
|
||||
"\ue029>\u0b28;" // REMAP (indicExceptions.txt): \u0b29>\u0b28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0b2a;" // LETTER PA
|
||||
"\ue02b>\u0b2b;" // LETTER PHA
|
||||
"\ue02c>\u0b2c;" // LETTER BA
|
||||
"\ue02d>\u0b2d;" // LETTER BHA
|
||||
"\ue02e>\u0b2e;" // LETTER MA
|
||||
"\ue02f>\u0b2f;" // LETTER YA
|
||||
"\ue030>\u0b30;" // LETTER RA
|
||||
"\ue032>\u0b32;" // LETTER LA
|
||||
"\ue033>\u0b33;" // LETTER LLA
|
||||
"\ue034>\u0b33;" // REMAP (indicExceptions.txt): \u0b34>\u0b33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0b2c;" // REMAP (indicExceptions.txt): \u0b35>\u0b2c = LETTER VA>LETTER BA
|
||||
"\ue036>\u0b36;" // LETTER SHA
|
||||
"\ue037>\u0b37;" // LETTER SSA
|
||||
"\ue038>\u0b38;" // LETTER SA
|
||||
"\ue039>\u0b39;" // LETTER HA
|
||||
"\ue03c>\u0b3c;" // SIGN NUKTA
|
||||
"\ue03d>\u0b3d;" // SIGN AVAGRAHA
|
||||
"\ue03e>\u0b3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0b3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0b40;" // VOWEL SIGN II
|
||||
"\ue041>\u0b41;" // VOWEL SIGN U
|
||||
"\ue042>\u0b42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0b43\u0b3c;" // REMAP (indicExceptions.txt): \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
|
||||
"\ue045>\u0b47;" // REMAP (indicExceptions.txt): \u0b45>\u0b47 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
// \ue047>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (\u0b47 = VOWEL SIGN E)
|
||||
"\ue048>\u0b48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0b4b;" // REMAP (indicExceptions.txt): \u0b49>\u0b4b = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
// \ue04b>; # UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (\u0b4b = VOWEL SIGN O)
|
||||
"\ue04c>\u0b4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0b4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0b13\u0b01;" // REMAP (indicExceptions.txt): \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
|
||||
// \ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
|
||||
"\ue056>\u0b56;" // AI LENGTH MARK
|
||||
"\ue057>\u0b57;" // AU LENGTH MARK
|
||||
"\ue059>\u0b16\u0b3c;" // REMAP (indicExceptions.txt): \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
"\ue05a>\u0b17\u0b3c;" // REMAP (indicExceptions.txt): \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
"\ue05b>\u0b1c\u0b3c;" // REMAP (indicExceptions.txt): \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
"\ue05d>\u0b5d;" // LETTER RHA
|
||||
"\ue05e>\u0b2b\u0b3c;" // REMAP (indicExceptions.txt): \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
"\ue05f>\u0b5f;" // LETTER YYA
|
||||
"\ue060>\u0b60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0b61;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0b56\u0b3c;" // REMAP (indicExceptions.txt): \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
|
||||
"\ue063>\u0b57\u0b3c;" // REMAP (indicExceptions.txt): \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
|
||||
"\ue066>\u0b66;" // DIGIT ZERO
|
||||
"\ue067>\u0b67;" // DIGIT ONE
|
||||
"\ue068>\u0b68;" // DIGIT TWO
|
||||
"\ue069>\u0b69;" // DIGIT THREE
|
||||
"\ue06a>\u0b6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0b6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0b6c;" // DIGIT SIX
|
||||
"\ue06d>\u0b6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0b6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0b6f;" // DIGIT NINE
|
||||
"\ue070>\u0b70;" // ISSHAR
|
||||
"\ue00e>\u0b0f;" // LETTER E
|
||||
"\ue013>\u0b13;" // LETTER O
|
||||
"\ue031>\u0b5c;" // LETTER RRA
|
||||
"\ue047>\u0b47;" // VOWEL SIGN E
|
||||
"\ue04b>\u0b4b;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Taml.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Taml.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Tamil.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Tamil
|
||||
|
||||
translit_InterIndic_Tamil {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Tamil.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Tamil
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:01 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Tamil
|
||||
//:: NFD (NFC) ;
|
||||
// \ue001>; # UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU
|
||||
"\ue002>\u0b82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0b83;" // SIGN VISARGA
|
||||
"\ue005>\u0b85;" // LETTER A
|
||||
"\ue006>\u0b86;" // LETTER AA
|
||||
"\ue007>\u0b87;" // LETTER I
|
||||
"\ue008>\u0b88;" // LETTER II
|
||||
"\ue009>\u0b89;" // LETTER U
|
||||
"\ue00a>\u0b8a;" // LETTER UU
|
||||
"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
"\ue00c>\u0b87;" // REMAP (indicExceptions.txt): \u0b8c>\u0b87 = LETTER VOCALIC L>LETTER I
|
||||
"\ue00f>\u0b8f;" // LETTER EE
|
||||
"\ue010>\u0b90;" // LETTER AI
|
||||
"\ue013>\u0b93;" // LETTER OO
|
||||
"\ue014>\u0b94;" // LETTER AU
|
||||
"\ue015>\u0b95;" // LETTER KA
|
||||
"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
|
||||
"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
|
||||
"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
|
||||
"\ue019>\u0b99;" // LETTER NGA
|
||||
"\ue01a>\u0b9a;" // LETTER CA
|
||||
"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
|
||||
"\ue01c>\u0b9c;" // LETTER JA
|
||||
"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
|
||||
"\ue01e>\u0b9e;" // LETTER NYA
|
||||
"\ue01f>\u0b9f;" // LETTER TTA
|
||||
"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
|
||||
"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
|
||||
"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
|
||||
"\ue023>\u0ba3;" // LETTER NNA
|
||||
"\ue024>\u0ba4;" // LETTER TA
|
||||
"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
|
||||
"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
|
||||
"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
|
||||
"\ue028>\u0ba8;" // LETTER NA
|
||||
"\ue029>\u0ba9;" // LETTER NNNA
|
||||
"\ue02a>\u0baa;" // LETTER PA
|
||||
"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
|
||||
"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
|
||||
"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
|
||||
"\ue02e>\u0bae;" // LETTER MA
|
||||
"\ue02f>\u0baf;" // LETTER YA
|
||||
"\ue030>\u0bb0;" // LETTER RA
|
||||
"\ue032>\u0bb2;" // LETTER LA
|
||||
"\ue033>\u0bb3;" // LETTER LLA
|
||||
"\ue034>\u0bb4;" // LETTER LLLA
|
||||
"\ue035>\u0bb5;" // LETTER VA
|
||||
"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
|
||||
"\ue037>\u0bb7;" // LETTER SSA
|
||||
"\ue038>\u0bb8;" // LETTER SA
|
||||
"\ue039>\u0bb9;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Tamil: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA
|
||||
"\ue03e>\u0bbe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0bbf;" // VOWEL SIGN I
|
||||
"\ue040>\u0bc0;" // VOWEL SIGN II
|
||||
"\ue041>\u0bc1;" // VOWEL SIGN U
|
||||
"\ue042>\u0bc2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
"\ue047>\u0bc7;" // VOWEL SIGN EE
|
||||
"\ue048>\u0bc8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
|
||||
"\ue04b>\u0bcb;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0bcc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0bcd;" // SIGN VIRAMA
|
||||
"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
|
||||
// \ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
|
||||
"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0bd7;" // AU LENGTH MARK
|
||||
"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
|
||||
"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
|
||||
"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
|
||||
"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
|
||||
"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
"\ue061>\u0b88;" // REMAP (indicExceptions.txt): \u0be1>\u0b88 = LETTER VOCALIC LL>LETTER II
|
||||
// \ue062>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L
|
||||
// \ue063>; # UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL
|
||||
// \ue066>; # UNMAPPED InterIndic-Tamil: DIGIT ZERO
|
||||
"\ue067>\u0be7;" // DIGIT ONE
|
||||
"\ue068>\u0be8;" // DIGIT TWO
|
||||
"\ue069>\u0be9;" // DIGIT THREE
|
||||
"\ue06a>\u0bea;" // DIGIT FOUR
|
||||
"\ue06b>\u0beb;" // DIGIT FIVE
|
||||
"\ue06c>\u0bec;" // DIGIT SIX
|
||||
"\ue06d>\u0bed;" // DIGIT SEVEN
|
||||
"\ue06e>\u0bee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0bef;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Tamil: ISSHAR
|
||||
"\ue00e>\u0b8e;" // LETTER E
|
||||
"\ue012>\u0b92;" // LETTER O
|
||||
"\ue031>\u0bb1;" // LETTER RRA
|
||||
"\ue046>\u0bc6;" // VOWEL SIGN E
|
||||
"\ue04a>\u0bca;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
134
icu4c/source/data/translit/t_InterIndic_Telu.txt
Normal file
134
icu4c/source/data/translit/t_InterIndic_Telu.txt
Normal file
@ -0,0 +1,134 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_InterIndic_Telugu.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Telugu
|
||||
|
||||
translit_InterIndic_Telugu {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_InterIndic_Telugu.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic_Telugu
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:01 2001
|
||||
//--------------------------------------------------------------------
|
||||
// InterIndic-Telugu
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0c01;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0c02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0c03;" // SIGN VISARGA
|
||||
"\ue005>\u0c05;" // LETTER A
|
||||
"\ue006>\u0c06;" // LETTER AA
|
||||
"\ue007>\u0c07;" // LETTER I
|
||||
"\ue008>\u0c08;" // LETTER II
|
||||
"\ue009>\u0c09;" // LETTER U
|
||||
"\ue00a>\u0c0a;" // LETTER UU
|
||||
"\ue00b>\u0c0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0c0c;" // LETTER VOCALIC L
|
||||
"\ue00f>\u0c0f;" // LETTER EE
|
||||
"\ue010>\u0c10;" // LETTER AI
|
||||
"\ue013>\u0c13;" // LETTER OO
|
||||
"\ue014>\u0c14;" // LETTER AU
|
||||
"\ue015>\u0c15;" // LETTER KA
|
||||
"\ue016>\u0c16;" // LETTER KHA
|
||||
"\ue017>\u0c17;" // LETTER GA
|
||||
"\ue018>\u0c18;" // LETTER GHA
|
||||
"\ue019>\u0c19;" // LETTER NGA
|
||||
"\ue01a>\u0c1a;" // LETTER CA
|
||||
"\ue01b>\u0c1b;" // LETTER CHA
|
||||
"\ue01c>\u0c1c;" // LETTER JA
|
||||
"\ue01d>\u0c1d;" // LETTER JHA
|
||||
"\ue01e>\u0c1e;" // LETTER NYA
|
||||
"\ue01f>\u0c1f;" // LETTER TTA
|
||||
"\ue020>\u0c20;" // LETTER TTHA
|
||||
"\ue021>\u0c21;" // LETTER DDA
|
||||
"\ue022>\u0c22;" // LETTER DDHA
|
||||
"\ue023>\u0c23;" // LETTER NNA
|
||||
"\ue024>\u0c24;" // LETTER TA
|
||||
"\ue025>\u0c25;" // LETTER THA
|
||||
"\ue026>\u0c26;" // LETTER DA
|
||||
"\ue027>\u0c27;" // LETTER DHA
|
||||
"\ue028>\u0c28;" // LETTER NA
|
||||
"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0c2a;" // LETTER PA
|
||||
"\ue02b>\u0c2b;" // LETTER PHA
|
||||
"\ue02c>\u0c2c;" // LETTER BA
|
||||
"\ue02d>\u0c2d;" // LETTER BHA
|
||||
"\ue02e>\u0c2e;" // LETTER MA
|
||||
"\ue02f>\u0c2f;" // LETTER YA
|
||||
"\ue030>\u0c30;" // LETTER RA
|
||||
"\ue032>\u0c32;" // LETTER LA
|
||||
"\ue033>\u0c33;" // LETTER LLA
|
||||
"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0c35;" // LETTER VA
|
||||
"\ue036>\u0c36;" // LETTER SHA
|
||||
"\ue037>\u0c37;" // LETTER SSA
|
||||
"\ue038>\u0c38;" // LETTER SA
|
||||
"\ue039>\u0c39;" // LETTER HA
|
||||
// \ue03c>; # UNMAPPED InterIndic-Telugu: SIGN NUKTA
|
||||
// \ue03d>; # UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA
|
||||
"\ue03e>\u0c3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0c3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0c40;" // VOWEL SIGN II
|
||||
"\ue041>\u0c41;" // VOWEL SIGN U
|
||||
"\ue042>\u0c42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\ue047>\u0c47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0c48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\ue04b>\u0c4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0c4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0c4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
|
||||
"\ue055>\u0c55;" // LENGTH MARK
|
||||
"\ue056>\u0c56;" // AI LENGTH MARK
|
||||
"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0c60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0c61;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
"\ue066>\u0c66;" // DIGIT ZERO
|
||||
"\ue067>\u0c67;" // DIGIT ONE
|
||||
"\ue068>\u0c68;" // DIGIT TWO
|
||||
"\ue069>\u0c69;" // DIGIT THREE
|
||||
"\ue06a>\u0c6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0c6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0c6c;" // DIGIT SIX
|
||||
"\ue06d>\u0c6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0c6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0c6f;" // DIGIT NINE
|
||||
// \ue080>; # UNMAPPED InterIndic-Telugu: ISSHAR
|
||||
"\ue00e>\u0c0e;" // LETTER E
|
||||
"\ue012>\u0c12;" // LETTER O
|
||||
"\ue031>\u0c31;" // LETTER RRA
|
||||
"\ue046>\u0c46;" // VOWEL SIGN E
|
||||
"\ue04a>\u0c4a;" // VOWEL SIGN O
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
117
icu4c/source/data/translit/t_Knda_InterIndic.txt
Normal file
117
icu4c/source/data/translit/t_Knda_InterIndic.txt
Normal file
@ -0,0 +1,117 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Kannada_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Kannada_InterIndic
|
||||
|
||||
translit_Kannada_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Kannada_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Kannada_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:05 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Kannada-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0c82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0c83>\ue003;" // SIGN VISARGA
|
||||
"\u0c85>\ue005;" // LETTER A
|
||||
"\u0c86>\ue006;" // LETTER AA
|
||||
"\u0c87>\ue007;" // LETTER I
|
||||
"\u0c88>\ue008;" // LETTER II
|
||||
"\u0c89>\ue009;" // LETTER U
|
||||
"\u0c8a>\ue00a;" // LETTER UU
|
||||
"\u0c8b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0c8c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0c8e>\ue00e;" // LETTER E
|
||||
"\u0c8f>\ue00f;" // LETTER EE
|
||||
"\u0c90>\ue010;" // LETTER AI
|
||||
"\u0c92>\ue012;" // LETTER O
|
||||
"\u0c93>\ue013;" // LETTER OO
|
||||
"\u0c94>\ue014;" // LETTER AU
|
||||
"\u0c95>\ue015;" // LETTER KA
|
||||
"\u0c96>\ue016;" // LETTER KHA
|
||||
"\u0c97>\ue017;" // LETTER GA
|
||||
"\u0c98>\ue018;" // LETTER GHA
|
||||
"\u0c99>\ue019;" // LETTER NGA
|
||||
"\u0c9a>\ue01a;" // LETTER CA
|
||||
"\u0c9b>\ue01b;" // LETTER CHA
|
||||
"\u0c9c>\ue01c;" // LETTER JA
|
||||
"\u0c9d>\ue01d;" // LETTER JHA
|
||||
"\u0c9e>\ue01e;" // LETTER NYA
|
||||
"\u0c9f>\ue01f;" // LETTER TTA
|
||||
"\u0ca0>\ue020;" // LETTER TTHA
|
||||
"\u0ca1>\ue021;" // LETTER DDA
|
||||
"\u0ca2>\ue022;" // LETTER DDHA
|
||||
"\u0ca3>\ue023;" // LETTER NNA
|
||||
"\u0ca4>\ue024;" // LETTER TA
|
||||
"\u0ca5>\ue025;" // LETTER THA
|
||||
"\u0ca6>\ue026;" // LETTER DA
|
||||
"\u0ca7>\ue027;" // LETTER DHA
|
||||
"\u0ca8>\ue028;" // LETTER NA
|
||||
"\u0caa>\ue02a;" // LETTER PA
|
||||
"\u0cab>\ue02b;" // LETTER PHA
|
||||
"\u0cac>\ue02c;" // LETTER BA
|
||||
"\u0cad>\ue02d;" // LETTER BHA
|
||||
"\u0cae>\ue02e;" // LETTER MA
|
||||
"\u0caf>\ue02f;" // LETTER YA
|
||||
"\u0cb0>\ue030;" // LETTER RA
|
||||
"\u0cb1>\ue031;" // LETTER RRA
|
||||
"\u0cb2>\ue032;" // LETTER LA
|
||||
"\u0cb3>\ue033;" // LETTER LLA
|
||||
"\u0cb5>\ue035;" // LETTER VA
|
||||
"\u0cb6>\ue036;" // LETTER SHA
|
||||
"\u0cb7>\ue037;" // LETTER SSA
|
||||
"\u0cb8>\ue038;" // LETTER SA
|
||||
"\u0cb9>\ue039;" // LETTER HA
|
||||
"\u0cbe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0cbf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0cc0>\ue040;" // VOWEL SIGN II
|
||||
"\u0cc1>\ue041;" // VOWEL SIGN U
|
||||
"\u0cc2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0cc3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0cc4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0cc6>\ue046;" // VOWEL SIGN E
|
||||
"\u0cc7>\ue047;" // VOWEL SIGN EE
|
||||
"\u0cc8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0cca>\ue04a;" // VOWEL SIGN O
|
||||
"\u0ccb>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0ccc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0ccd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0cd5>\ue055;" // LENGTH MARK
|
||||
"\u0cd6>\ue056;" // AI LENGTH MARK
|
||||
"\u0cde>\ue05e;" // LETTER FA
|
||||
"\u0ce0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0ce1>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0ce6>\ue066;" // DIGIT ZERO
|
||||
"\u0ce7>\ue067;" // DIGIT ONE
|
||||
"\u0ce8>\ue068;" // DIGIT TWO
|
||||
"\u0ce9>\ue069;" // DIGIT THREE
|
||||
"\u0cea>\ue06a;" // DIGIT FOUR
|
||||
"\u0ceb>\ue06b;" // DIGIT FIVE
|
||||
"\u0cec>\ue06c;" // DIGIT SIX
|
||||
"\u0ced>\ue06d;" // DIGIT SEVEN
|
||||
"\u0cee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0cef>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
319
icu4c/source/data/translit/t_Latn_InterIndic.txt
Normal file
319
icu4c/source/data/translit/t_Latn_InterIndic.txt
Normal file
@ -0,0 +1,319 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Latin_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_InterIndic
|
||||
|
||||
translit_Latin_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2001-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Latin-InterIndic
|
||||
//:: NFD;
|
||||
//\u0e00 reserved
|
||||
//consonants
|
||||
"$chandrabindu=\ue001;"
|
||||
"$anusvara=\ue002;"
|
||||
"$visarga=\ue003;"
|
||||
//\u0e004 reserved
|
||||
// w<vowel> represents the stand-alone form
|
||||
"$wa=\ue005;"
|
||||
"$waa=\ue006;"
|
||||
"$wi=\ue007;"
|
||||
"$wii=\ue008;"
|
||||
"$wu=\ue009;"
|
||||
"$wuu=\ue00a;"
|
||||
"$wr=\ue00b;"
|
||||
"$wl=\ue00c;"
|
||||
"$wce=\ue00d;" // LETTER CANDRA E
|
||||
"$wse=\ue00e;" // LETTER SHORT E
|
||||
"$we=\ue00f;" // \u090f LETTER E
|
||||
"$wai=\ue010;"
|
||||
"$wco=\ue011;" // LETTER CANDRA O
|
||||
"$wso=\ue012;" // LETTER SHORT O
|
||||
"$wo=\ue013;" // \u0913 LETTER O
|
||||
"$wau=\ue014;"
|
||||
"$ka=\ue015;"
|
||||
"$kha=\ue016;"
|
||||
"$ga=\ue017;"
|
||||
"$gha=\ue018;"
|
||||
"$nga=\ue019;"
|
||||
"$ca=\ue01a;"
|
||||
"$cha=\ue01b;"
|
||||
"$ja=\ue01c;"
|
||||
"$jha=\ue01d;"
|
||||
"$nya=\ue01e;"
|
||||
"$tta=\ue01f;"
|
||||
"$ttha=\ue020;"
|
||||
"$dda=\ue021;"
|
||||
"$ddha=\ue022;"
|
||||
"$nna=\ue023;"
|
||||
"$ta=\ue024;"
|
||||
"$tha=\ue025;"
|
||||
"$da=\ue026;"
|
||||
"$dha=\ue027;"
|
||||
"$na=\ue028;"
|
||||
"$ena=\ue029;" //compatibility
|
||||
"$pa=\ue02a;"
|
||||
"$pha=\ue02b;"
|
||||
"$ba=\ue02c;"
|
||||
"$bha=\ue02d;"
|
||||
"$ma=\ue02e;"
|
||||
"$ya=\ue02f;"
|
||||
"$ra=\ue030;"
|
||||
"$rra=\ue031;"
|
||||
"$la=\ue032;"
|
||||
"$lla=\ue033;"
|
||||
"$ela=\ue034;" //compatibility
|
||||
"$va=\ue035;"
|
||||
"$sha=\ue036;"
|
||||
"$ssa=\ue037;"
|
||||
"$sa=\ue038;"
|
||||
"$ha=\ue039;"
|
||||
//\u093a Reserved
|
||||
//\u093b Reserved
|
||||
"$nukta=\ue03c;"
|
||||
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
||||
// <vowel> represents the dependent form
|
||||
"$aa=\ue03e;"
|
||||
"$i=\ue03f;"
|
||||
"$ii=\ue040;"
|
||||
"$u=\ue041;"
|
||||
"$uu=\ue042;"
|
||||
"$rh=\ue043;"
|
||||
"$lh=\ue044;"
|
||||
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
||||
"$se=\ue046;" //VOWEL SIGN SHORT E
|
||||
"$e=\ue047;"
|
||||
"$ai=\ue048;"
|
||||
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
||||
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
||||
"$o=\ue04b;" // \u094b
|
||||
"$au=\ue04c;"
|
||||
"$virama=\ue04d;"
|
||||
// \u094e Reserved
|
||||
// \u094f Reserved
|
||||
//\u0950>\ue050; # OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"$lm = \ue055;"// Telugu Length Mark
|
||||
"$ailm=\ue056;"// AI Length Mark
|
||||
"$aulm=\ue057;"// AU Length Mark
|
||||
//urdu compatibity forms
|
||||
"$uka=\ue058;"
|
||||
"$ukha=\ue059;"
|
||||
"$ugha=\ue05a;"
|
||||
"$ujha=\ue05b;"
|
||||
"$uddha=\ue05c;"
|
||||
"$udha=\ue05d;"
|
||||
"$ufa=\ue05e;"
|
||||
"$uya=\ue05f;"
|
||||
"$wrr=\ue060;"
|
||||
"$wll=\ue061;"
|
||||
"$rrh=\ue062;"
|
||||
"$llh=\ue063;"
|
||||
"$danda=\ue064;"
|
||||
"$doubleDanda=\ue065;"
|
||||
"$zero=\ue066;" // DIGIT ZERO
|
||||
"$one=\ue067;" // DIGIT ONE
|
||||
"$two=\ue068;" // DIGIT TWO
|
||||
"$three=\ue069;" // DIGIT THREE
|
||||
"$four=\ue06a;" // DIGIT FOUR
|
||||
"$five=\ue06b;" // DIGIT FIVE
|
||||
"$six=\ue06c;" // DIGIT SIX
|
||||
"$seven=\ue06d;" // DIGIT SEVEN
|
||||
"$eight=\ue06e;" // DIGIT EIGHT
|
||||
"$nine=\ue06f;" // DIGIT NINE
|
||||
// For all other scripts
|
||||
"$ecp0=\ue070;"
|
||||
"$ecp1=\ue071;"
|
||||
"$ecp2=\ue072;"
|
||||
"$ecp3=\ue073;"
|
||||
"$ecp4=\ue074;"
|
||||
"$ecp5=\ue075;"
|
||||
"$ecp6=\ue076;"
|
||||
"$ecp7=\ue077;"
|
||||
"$ecp8=\ue078;"
|
||||
"$ecp9=\ue079;"
|
||||
"$ecpA=\ue07a;"
|
||||
"$ecpB=\ue07b;"
|
||||
"$ecpC=\ue07c;"
|
||||
"$ecpD=\ue07d;"
|
||||
"$ecpE=\ue07e;"
|
||||
"$ecpF=\ue07f;"
|
||||
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
||||
"$depVowelBelow=[\ue041-\ue044];"
|
||||
"$endThing=[$danda$doubleDanda];"
|
||||
// $x was originally called '&'; $z was '%'
|
||||
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
||||
"$z=[bcdfghjklmnpqrstvwxyz];"
|
||||
//DEBUG: $consonants=[$ka-$ha $virama];
|
||||
"\u0315 > $avagraha;"
|
||||
"'-'\u0303>$chandrabindu$anusvara;"
|
||||
"'-'m\u0310>$chandrabindu;"
|
||||
"'-'h\u0323>$visarga;"
|
||||
"mm>$anusvara;"
|
||||
"x>$visarga;"
|
||||
"aa>$waa;"
|
||||
"a\u0304>$waa;"
|
||||
"ai>$wai;"
|
||||
"au>$wau;"
|
||||
"ii>$wii;"
|
||||
"i\u0304>$wii;"
|
||||
"i>$wi;"
|
||||
"uu>$wuu;"
|
||||
"u\u0304>$wuu;"
|
||||
"u>$wu;"
|
||||
"rrh>$wrr;"
|
||||
"r\u0325\u0304>$wrr;"
|
||||
"rh>$wr;"
|
||||
"r\u0325>$wr;"
|
||||
"l\u0325\u0304>$wll;"
|
||||
"lh>$wl;l\u0325>$wl;"
|
||||
"e\u0304>$we;"
|
||||
"o\u0304>$wo;"
|
||||
"a>$wa;"
|
||||
"e\u0306>$wce;"
|
||||
"o\u0306>$wco;"
|
||||
"e>$wse;"
|
||||
"o>$wso;"
|
||||
"n}na > $na|$virama;"
|
||||
"n\u0307}[kg] > $anusvara;"
|
||||
"n\u0307}n\u0303 > $anusvara;"
|
||||
"n\u0304}[cj] > $anusvara;"
|
||||
"n\u0304}n\u0307 > $anusvara;"
|
||||
"n\u0323}[tdn]\u0323 > $anusvara;"
|
||||
"n}[tdn] > $anusvara;"
|
||||
"m}[pbm] > $anusvara;"
|
||||
"n} [yrlvsh] > $anusvara;"
|
||||
"'-'m\u0307 > $anusvara;"
|
||||
"y\u0307>$uya|$virama;"
|
||||
"l\u0331>$ela|$virama;"
|
||||
"n\u0331>$ena|$virama;"
|
||||
"n\u0307>$nga|$virama;"
|
||||
"n\u0303>$nya|$virama;"
|
||||
"n\u0323>$nna|$virama;"
|
||||
"t\u0323h>$ttha|$virama;"
|
||||
"t\u0323>$tta|$virama;"
|
||||
"r\u0323h>$udha|$virama;"
|
||||
"r\u0323>$uddha|$virama;"
|
||||
"d\u0323h>$ddha|$virama;"
|
||||
"d\u0323>$dda|$virama;"
|
||||
"kh>$kha|$virama;"
|
||||
"k>$ka|$virama;"
|
||||
"q>$ka|$virama;"
|
||||
"gh>$gha|$virama;"
|
||||
"g>$ga|$virama;"
|
||||
"ch>$cha|$virama;"
|
||||
"c>$ca|$virama;"
|
||||
"jh>$jha|$virama;"
|
||||
"j>$ja|$virama;"
|
||||
"ny>$nya|$virama;"
|
||||
"tth>$ttha|$virama;"
|
||||
"ddh>$ddha|$virama;"
|
||||
"th>$tha|$virama;"
|
||||
"t>$ta|$virama;"
|
||||
"dh>$dha|$virama;"
|
||||
"d>$da|$virama;"
|
||||
"n>$na|$virama;"
|
||||
"ph>$pha|$virama;"
|
||||
"p>$pa|$virama;"
|
||||
"bh>$bha|$virama;"
|
||||
"b>$ba|$virama;"
|
||||
"m>$ma|$virama;"
|
||||
"y>$ya|$virama;"
|
||||
"r>$ra|$virama;"
|
||||
"l\u0323a>$lla;"
|
||||
"l>$la|$virama;"
|
||||
"v>$va|$virama;"
|
||||
"f>$va|$virama;"
|
||||
"w>$va|$virama;"
|
||||
"sh>$sha|$virama;"
|
||||
"ss>$ssa|$virama;"
|
||||
"s\u0323>$ssa|$virama;"
|
||||
"s\u0301>$sha|$virama;"
|
||||
"s>$sa|$virama;"
|
||||
"z>$sa|$virama;"
|
||||
"h>$ha|$virama;"
|
||||
"'.'>$danda;"
|
||||
"$danda'.'>$doubleDanda;"
|
||||
"$depVowelAbove{'~'>$anusvara;"
|
||||
"$depVowelBelow{'~'>$chandrabindu;"
|
||||
"$virama aa>$aa;"
|
||||
"$virama a\u0304>$aa;"
|
||||
"$virama ai>$ai;"
|
||||
"$virama au>$au;"
|
||||
"$virama ii>$ii;"
|
||||
"$virama i\u0304>$ii;"
|
||||
"$virama i>$i;"
|
||||
"$virama uu>$uu;"
|
||||
"$virama u\u0304>$uu;"
|
||||
"$virama u>$u;"
|
||||
"$virama rrh>$rrh;"
|
||||
"$virama r\u0325\u0304>$rrh;"
|
||||
"$virama rh>$rh;"
|
||||
"$virama r\u0325a>$rh;"
|
||||
"$virama r\u0325>$rh;"
|
||||
"$virama l\u0325\u0304>$llh;"
|
||||
"$virama lh>$lh;"
|
||||
"$virama l\u0325>$lh;"
|
||||
"$virama e\u0304>$e;"
|
||||
"$virama o\u0304>$o;"
|
||||
"$virama a>;"
|
||||
"$virama e\u0306>$ce;"
|
||||
"$virama o\u0306>$co;"
|
||||
"$virama e>$se;"
|
||||
"$virama o>$so;"
|
||||
"$virama''aa>$waa;"
|
||||
"$virama''a\u0304>$waa;"
|
||||
"$virama''ai>$wai;"
|
||||
"$virama''au>$wau;"
|
||||
"$virama''ii>$wii;"
|
||||
"$virama''i\u0304>$wii;"
|
||||
"$virama''i>$wi;"
|
||||
"$virama''uu>$wuu;"
|
||||
"$virama''u\u0304>$wuu;"
|
||||
"$virama''u>$wu;"
|
||||
"$virama''rrh>$wrr;"
|
||||
"$virama''r\u0325\u0304>$wrr;"
|
||||
"$virama''rh>$wr;"
|
||||
"$virama''r\u0325>$wr;"
|
||||
"$virama''l\u0325\u0304>$wll;"
|
||||
"$virama''lh>$wl;"
|
||||
"$virama''l\u0325>$wl;"
|
||||
"$virama''e\u0304>$we;"
|
||||
"$virama''o\u0304>$wo;"
|
||||
"$virama''a>$wa;"
|
||||
"$virama''e\u0306>$wce;"
|
||||
"$virama''o\u0306>$wco;"
|
||||
"$virama''e>$wse;"
|
||||
"$virama''o>$wso;"
|
||||
"$virama } [$z] > $virama;"
|
||||
"$virama } ' ' > $virama ;"
|
||||
"$virama}$endThing>;"
|
||||
"0>$zero;"
|
||||
"1>$one;"
|
||||
"2>$two;"
|
||||
"3>$three;"
|
||||
"4>$four;"
|
||||
"5>$five;"
|
||||
"6>$six;"
|
||||
"7>$seven;"
|
||||
"8>$eight;"
|
||||
"9>$nine;"
|
||||
"''>;"
|
||||
//:: NFC (NFD) ;
|
||||
}
|
||||
}
|
528
icu4c/source/data/translit/t_Latn_Jamo.txt
Normal file
528
icu4c/source/data/translit/t_Latn_Jamo.txt
Normal file
@ -0,0 +1,528 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Latin_Jamo.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_Jamo
|
||||
|
||||
translit_Latin_Jamo {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Jamo
|
||||
|
||||
// Transliteration from Latin characters to Korean script is done in
|
||||
// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
|
||||
// transliteration is done algorithmically following Unicode 3.0
|
||||
// section 3.11. This file implements the Latin to Jamo
|
||||
// transliteration using rules.
|
||||
|
||||
// Jamo occupy the block 1100-11FF. Within this block there are three
|
||||
// groups of characters: initial consonants or choseong (I), medial
|
||||
// vowels or jungseong (M), and trailing consonants or jongseong (F).
|
||||
// Standard Korean syllables are of the form I+M+F*.
|
||||
|
||||
// Section 3.11 describes the use of 'filler' jamo to convert
|
||||
// nonstandard syllables to standard form: the choseong filler 115F and
|
||||
// the junseong filler 1160. In this transliterator, we will not use
|
||||
// 115F or 1160.
|
||||
|
||||
// We will, however, insert two 'null' jamo to make foreign words
|
||||
// conform to Korean syllable structure. These are the null initial
|
||||
// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
|
||||
// we will use the hyphen in order to disambiguate strings,
|
||||
// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
|
||||
|
||||
// We will not use all of the characters in the jamo block. We will
|
||||
// only use the 19 initials, 21 medials, and 27 finals possessing a
|
||||
// jamo short name as defined in section 4.4 of the Unicode book.
|
||||
|
||||
// Rules of thumb. These guidelines provide the basic framework
|
||||
// for the rules. They are phrased in terms of Latin-Jamo transliteration.
|
||||
// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
|
||||
// just context-free transliteration of jamo to corresponding short names,
|
||||
// with the addition of hyphens to maintain round-trip integrity
|
||||
// in the context of the Latin-Jamo rules.
|
||||
|
||||
// A sequence of vowels:
|
||||
// - Take the longest sequence you can. If there are too many, or you don't
|
||||
// have a starting consonant, introduce a 110B necessary.
|
||||
|
||||
// A sequence of consonants.
|
||||
// - First join the double consonants: G + G -> GG
|
||||
// - In the remaining list,
|
||||
// -- If there is no preceding vowel, take the first consonant, and insert EU
|
||||
// after it. Continue with the rest of the consonants.
|
||||
// -- If there is one consonant, attach to the following vowel
|
||||
// -- If there are two consonants and a following vowel, attach one to the
|
||||
// preceeding vowel, and one to the following vowel.
|
||||
// -- If there are more than two consonants, join the first two together if you
|
||||
// can: L + G => LG
|
||||
// -- If you still end up with more than 2 consonants, insert EU after the
|
||||
// first one, and continue with the rest of the consonants.
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Variables
|
||||
|
||||
// Some latin consonants or consonant pairs only occur as initials, and
|
||||
// some only as finals, but some occur as both. This makes some jamo
|
||||
// consonants ambiguous when transliterated into latin.
|
||||
// Initial only: IEUNG BB DD JJ R
|
||||
// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
|
||||
// Initial and Final: B C D G GG H J K M N P S SS T
|
||||
|
||||
"$Gi = \u1100;"
|
||||
"$GGi = \u1101;"
|
||||
"$Ni = \u1102;"
|
||||
"$Di = \u1103;"
|
||||
"$DD = \u1104;"
|
||||
"$R = \u1105;"
|
||||
"$Mi = \u1106;"
|
||||
"$Bi = \u1107;"
|
||||
"$BB = \u1108;"
|
||||
"$Si = \u1109;"
|
||||
"$SSi = \u110A;"
|
||||
"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
|
||||
"$Ji = \u110C;"
|
||||
"$JJ = \u110D;"
|
||||
"$Ci = \u110E;"
|
||||
"$Ki = \u110F;"
|
||||
"$Ti = \u1110;"
|
||||
"$Pi = \u1111;"
|
||||
"$Hi = \u1112;"
|
||||
|
||||
"$A = \u1161;"
|
||||
"$AE = \u1162;"
|
||||
"$YA = \u1163;"
|
||||
"$YAE = \u1164;"
|
||||
"$EO = \u1165;"
|
||||
"$E = \u1166;"
|
||||
"$YEO = \u1167;"
|
||||
"$YE = \u1168;"
|
||||
"$O = \u1169;"
|
||||
"$WA = \u116A;"
|
||||
"$WAE = \u116B;"
|
||||
"$OE = \u116C;"
|
||||
"$YO = \u116D;"
|
||||
"$U = \u116E;"
|
||||
"$WEO = \u116F;"
|
||||
"$WE = \u1170;"
|
||||
"$WI = \u1171;"
|
||||
"$YU = \u1172;"
|
||||
"$EU = \u1173;" // null medial, inserted during Latin-Jamo
|
||||
"$YI = \u1174;"
|
||||
"$I = \u1175;"
|
||||
|
||||
"$Gf = \u11A8;"
|
||||
"$GGf = \u11A9;"
|
||||
"$GS = \u11AA;"
|
||||
"$Nf = \u11AB;"
|
||||
"$NJ = \u11AC;"
|
||||
"$NH = \u11AD;"
|
||||
"$Df = \u11AE;"
|
||||
"$L = \u11AF;"
|
||||
"$LG = \u11B0;"
|
||||
"$LM = \u11B1;"
|
||||
"$LB = \u11B2;"
|
||||
"$LS = \u11B3;"
|
||||
"$LT = \u11B4;"
|
||||
"$LP = \u11B5;"
|
||||
"$LH = \u11B6;"
|
||||
"$Mf = \u11B7;"
|
||||
"$Bf = \u11B8;"
|
||||
"$BS = \u11B9;"
|
||||
"$Sf = \u11BA;"
|
||||
"$SSf = \u11BB;"
|
||||
"$NG = \u11BC;"
|
||||
"$Jf = \u11BD;"
|
||||
"$Cf = \u11BE;"
|
||||
"$Kf = \u11BF;"
|
||||
"$Tf = \u11C0;"
|
||||
"$Pf = \u11C1;"
|
||||
"$Hf = \u11C2;"
|
||||
|
||||
"$jamoInitial = [\u1100-\u1112];"
|
||||
|
||||
"$jamoMedial = [\u1161-\u1175];"
|
||||
|
||||
"$latinInitial = [bcdghjkmnprst];"
|
||||
|
||||
// Any character in the latin transliteration of a medial
|
||||
"$latinMedial = [aeiouwy];"
|
||||
|
||||
// The last character of the latin transliteration of a medial
|
||||
"$latinMedialEnd = [aeiou];"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Jamo-Latin
|
||||
|
||||
// Jamo to latin is relatively simple, since it is the latin that is
|
||||
// ambiguous. Most rules are straightforward, and we encode them below
|
||||
// as simple add-on back rule, e.g.:
|
||||
|
||||
// $jamoMedial {bs} > $BS;
|
||||
|
||||
// becomes
|
||||
|
||||
// $jamoMedial {bs} <> $BS;
|
||||
|
||||
// Furthermore, we don't care about the ordering for Jamo-Latin because
|
||||
// we are going from single characters, so we can very easily piggyback
|
||||
// on the Latin-Jamo.
|
||||
|
||||
// The main issue with Jamo-Latin is when to insert hyphens.
|
||||
// Hyphens are inserted to obtain correct round trip behavior. For
|
||||
// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
|
||||
// would then round trip to Ki A GGi E. To prevent this, we insert a
|
||||
// hyphen: "kag-ge". IMPORTANT: The need for hyphens depends
|
||||
// very specifically on the behavior of the Latin-Jamo rules. A change
|
||||
// in the Latin-Jamo behavior can completely change the way the
|
||||
// hyphen insertion must be done.
|
||||
|
||||
// First try to preserve actual hyphens in the jamo text by doubling
|
||||
// them. This fixes problems like:
|
||||
// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
|
||||
// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
|
||||
// -- if we don't care about losing hyphens in the jamo, we can delete
|
||||
// this rule.
|
||||
|
||||
"'--' <> '-';"
|
||||
|
||||
// Triple consonants. For three consonants "axxx" we insert a
|
||||
// hyphen between the first and second "x" if XXf, Xf, and Xi all
|
||||
// exist, and we have A Xf XXi. This prevents the reverse
|
||||
// transliteration to A XXf Xi.
|
||||
|
||||
"'-' < $latinMedialEnd g {} $GGi;"
|
||||
"'-' < $latinMedialEnd s {} $SSi;"
|
||||
|
||||
// For vowels the rule is similar. If there is a vowel "ae" such that
|
||||
// "a" by itself and "e" by itself are vowels, then we want to map A E
|
||||
// to "a-e" so as not to round trip to AE. However, in the text Ki EO
|
||||
// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
|
||||
// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
|
||||
// tested. NOTE: These rules used to have a left context of
|
||||
// $latinInitial instead of [^$latinMedial]. The problem with this is
|
||||
// sequences where an initial IEUNG is transliterated away:
|
||||
// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
|
||||
|
||||
"'-' < [^$latinMedial] [y w] e {} [$O $OE];"
|
||||
"'-' < [^$latinMedial] e {} [$O $OE $U];"
|
||||
"'-' < [^$latinMedial] [o a] {} [$E $EO $EU];"
|
||||
"'-' < [^$latinMedial] [w y] a {} [$E $EO $EU];"
|
||||
|
||||
// Similar to the above, but with an intervening $IEUNG.
|
||||
|
||||
"'-' < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
|
||||
"'-' < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
|
||||
"'-' < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
|
||||
"'-' < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"
|
||||
|
||||
// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
|
||||
// where Xi also exists, must be transliterated as "ax-e" to prevent
|
||||
// the round trip conversion to A Xi E.
|
||||
|
||||
"'-' < $latinMedialEnd b {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd c {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd d {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd h {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd j {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd k {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd m {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd p {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd t {} $IEUNG $jamoMedial;"
|
||||
|
||||
// Double finals followed by IEUNG. Similar to the single finals
|
||||
// followed by IEUNG. Any latin consonant pair X Y, between medials,
|
||||
// that we would split by Latin-Jamo, we must handle when it occurs as
|
||||
// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
|
||||
// E.
|
||||
|
||||
"'-' < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
|
||||
"'-' < $latinMedialEnd s s {} $IEUNG $jamoMedial;"
|
||||
|
||||
// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
|
||||
// we transliterate as "ax-xe" to prevent round trip transliteration as
|
||||
// A XXi E.
|
||||
|
||||
"'-' < $latinMedialEnd b {} $Bi $jamoMedial;"
|
||||
"'-' < $latinMedialEnd d {} $Di $jamoMedial;"
|
||||
"'-' < $latinMedialEnd j {} $Ji $jamoMedial;"
|
||||
"'-' < $latinMedialEnd g {} $Gi $jamoMedial;"
|
||||
"'-' < $latinMedialEnd s {} $Si $jamoMedial;"
|
||||
|
||||
// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
|
||||
// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
|
||||
// "xyy" forms that correspond to XYf Yi must be transliterated as
|
||||
// "xy-y".
|
||||
|
||||
"'-' < $latinMedialEnd b s {} [$Si $SSi];"
|
||||
"'-' < $latinMedialEnd g s {} [$Si $SSi];"
|
||||
"'-' < $latinMedialEnd l b {} [$Bi $BB];"
|
||||
"'-' < $latinMedialEnd l g {} [$Gi $GGi];"
|
||||
"'-' < $latinMedialEnd l s {} [$Si $SSi];"
|
||||
"'-' < $latinMedialEnd n g {} [$Gi $GGi];"
|
||||
"'-' < $latinMedialEnd n j {} [$Ji $JJ];"
|
||||
|
||||
// Deletion of IEUNG is handled below.
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Latin-Jamo
|
||||
|
||||
// [Basic, context-free Jamo-Latin rules are embedded here too. See
|
||||
// above.]
|
||||
|
||||
// Split digraphs: Text of the form 'axye', where 'xy' is a final
|
||||
// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
|
||||
// 'e' are medials, we want to transliterate this as A Xf Yi E rather
|
||||
// than A XYf IEUNG E. We do NOT include text of the form "axxe",
|
||||
// since that is handled differently below. These rules are generated
|
||||
// programmatically from the jamo data.
|
||||
|
||||
"$jamoMedial {b s} $latinMedial > $Bf $Si;"
|
||||
"$jamoMedial {g s} $latinMedial > $Gf $Si;"
|
||||
"$jamoMedial {l b} $latinMedial > $L $Bi;"
|
||||
"$jamoMedial {l g} $latinMedial > $L $Gi;"
|
||||
"$jamoMedial {l h} $latinMedial > $L $Hi;"
|
||||
"$jamoMedial {l m} $latinMedial > $L $Mi;"
|
||||
"$jamoMedial {l p} $latinMedial > $L $Pi;"
|
||||
"$jamoMedial {l s} $latinMedial > $L $Si;"
|
||||
"$jamoMedial {l t} $latinMedial > $L $Ti;"
|
||||
"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
|
||||
"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
|
||||
"$jamoMedial {n j} $latinMedial > $Nf $Ji;"
|
||||
|
||||
// Single consonants are initials: Text of the form 'axe', where 'x'
|
||||
// can be an initial or a final, and 'a' and 'e' are medials, we want
|
||||
// to transliterate as A Xi E rather than A Xf IEUNG E.
|
||||
|
||||
"$jamoMedial {b} $latinMedial > $Bi;"
|
||||
"$jamoMedial {c} $latinMedial > $Ci;"
|
||||
"$jamoMedial {d} $latinMedial > $Di;"
|
||||
"$jamoMedial {g} $latinMedial > $Gi;"
|
||||
"$jamoMedial {h} $latinMedial > $Hi;"
|
||||
"$jamoMedial {j} $latinMedial > $Ji;"
|
||||
"$jamoMedial {k} $latinMedial > $Ki;"
|
||||
"$jamoMedial {m} $latinMedial > $Mi;"
|
||||
"$jamoMedial {n} $latinMedial > $Ni;"
|
||||
"$jamoMedial {p} $latinMedial > $Pi;"
|
||||
"$jamoMedial {s} $latinMedial > $Si;"
|
||||
"$jamoMedial {t} $latinMedial > $Ti;"
|
||||
|
||||
// Doubled initials. The sequence "axxe", where XX exists as an initial
|
||||
// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
|
||||
// to transliterate as A XXi E, rather than split to A Xf Xi E.
|
||||
|
||||
"$jamoMedial {b b} $latinMedial > $BB;"
|
||||
"$jamoMedial {d d} $latinMedial > $DD;"
|
||||
"$jamoMedial {j j} $latinMedial > $JJ;"
|
||||
"$jamoMedial {g g} $latinMedial > $GGi;"
|
||||
"$jamoMedial {s s} $latinMedial > $SSi;"
|
||||
|
||||
// XYY. Because doubled consonants bind more strongly than XY
|
||||
// consonants, we must handle the sequence "axyy" specially. Here XYf
|
||||
// and YYi must exist. In these cases, we map to Xf YYi rather than
|
||||
// XYf.
|
||||
|
||||
"$jamoMedial {b} s s > $Bf;"
|
||||
"$jamoMedial {g} s s > $Gf;"
|
||||
"$jamoMedial {l} b b > $L;"
|
||||
"$jamoMedial {l} g g > $L;"
|
||||
"$jamoMedial {l} s s > $L;"
|
||||
"$jamoMedial {n} g g > $Nf;"
|
||||
"$jamoMedial {n} j j > $Nf;"
|
||||
|
||||
// Finals: Attach consonant with preceding medial to preceding medial.
|
||||
// Do this BEFORE mapping consonants to initials. Longer keys must
|
||||
// precede shorter keys that they start with, e.g., the rule for 'bs'
|
||||
// must precede 'b'.
|
||||
|
||||
// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
|
||||
// block for Jamo-Latin.]
|
||||
|
||||
"$jamoMedial {bs} <> $BS;"
|
||||
"$jamoMedial {b} <> $Bf;"
|
||||
"$jamoMedial {c} <> $Cf;"
|
||||
"$jamoMedial {d} <> $Df;"
|
||||
"$jamoMedial {gg} <> $GGf;"
|
||||
"$jamoMedial {gs} <> $GS;"
|
||||
"$jamoMedial {g} <> $Gf;"
|
||||
"$jamoMedial {h} <> $Hf;"
|
||||
"$jamoMedial {j} <> $Jf;"
|
||||
"$jamoMedial {k} <> $Kf;"
|
||||
"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
|
||||
"$jamoMedial {lh} <> $LH;"
|
||||
"$jamoMedial {lm} <> $LM;"
|
||||
"$jamoMedial {lp} <> $LP;"
|
||||
"$jamoMedial {ls} <> $LS;"
|
||||
"$jamoMedial {lt} <> $LT;"
|
||||
"$jamoMedial {l} <> $L;"
|
||||
"$jamoMedial {m} <> $Mf;"
|
||||
"$jamoMedial {ng} <> $NG;"
|
||||
"$jamoMedial {nh} <> $NH;"
|
||||
"$jamoMedial {nj} <> $NJ;"
|
||||
"$jamoMedial {n} <> $Nf;"
|
||||
"$jamoMedial {p} <> $Pf;"
|
||||
"$jamoMedial {ss} <> $SSf;"
|
||||
"$jamoMedial {s} <> $Sf;"
|
||||
"$jamoMedial {t} <> $Tf;"
|
||||
|
||||
// Initials: Attach single consonant to following medial. Do this
|
||||
// AFTER mapping finals. Longer keys must precede shorter keys that
|
||||
// they start with, e.g., the rule for 'gg' must precede 'g'.
|
||||
|
||||
// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
|
||||
// this block for Jamo-Latin.]
|
||||
|
||||
"{gg} $latinMedial <> $GGi;"
|
||||
"{g} $latinMedial <> $Gi;"
|
||||
"{n} $latinMedial <> $Ni;"
|
||||
"{dd} $latinMedial <> $DD;"
|
||||
"{d} $latinMedial <> $Di;"
|
||||
"{r} $latinMedial <> $R;"
|
||||
"{m} $latinMedial <> $Mi;"
|
||||
"{bb} $latinMedial <> $BB;"
|
||||
"{b} $latinMedial <> $Bi;"
|
||||
"{ss} $latinMedial <> $SSi;"
|
||||
"{s} $latinMedial <> $Si;"
|
||||
"{jj} $latinMedial <> $JJ;"
|
||||
"{j} $latinMedial <> $Ji;"
|
||||
"{c} $latinMedial <> $Ci;"
|
||||
"{k} $latinMedial <> $Ki;"
|
||||
"{t} $latinMedial <> $Ti;"
|
||||
"{p} $latinMedial <> $Pi;"
|
||||
"{h} $latinMedial <> $Hi;"
|
||||
|
||||
// 'r' in final position. Because of the equivalency of the 'l' and
|
||||
// 'r' jamo (the glyphs are the same), we try to provide the same
|
||||
// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
|
||||
// below. If we see an 'r' in an apparent final position, treat it
|
||||
// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
|
||||
// Instead, we want Ki A L Ki A.
|
||||
|
||||
"$jamoMedial {r} $latinInitial > | l;"
|
||||
|
||||
// Initial + Final: If we match the next rule, we have initial then
|
||||
// final consonant with no intervening medial. We insert the null
|
||||
// vowel BEFORE it to create a well-formed syllable. (In the next rule
|
||||
// we insert a null vowel AFTER an anomalous initial.)
|
||||
|
||||
"$jamoInitial {} [bcdghjklmnpst] > $EU;"
|
||||
|
||||
// Initial + X: This block matches an initial consonant not followed by
|
||||
// a medial. We insert the null vowel after it. We handle double
|
||||
// initials explicitly here; for single initial consonants we insert EU
|
||||
// (as Latin) after them and let standard rules do the rest.
|
||||
|
||||
// BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
"gg > $GGi $EU;"
|
||||
"dd > $DD $EU;"
|
||||
"bb > $BB $EU;"
|
||||
"ss > $SSi $EU;"
|
||||
"jj > $JJ $EU;"
|
||||
|
||||
"([bcdghjkmnprst]) > | $1 eu;"
|
||||
|
||||
// X + Final: Finally we have to deal with a consonant that can only be
|
||||
// interpreted as a final (not an initial) and which is preceded
|
||||
// neither by an initial nor a medial. It is the start of the
|
||||
// syllable, but cannot be. Most of these will already be handled by
|
||||
// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
|
||||
// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
|
||||
// For this isolated case, we could add a null initial and medial,
|
||||
// which would give "la" => IEUNG EU L IEUNG A, for example. A more
|
||||
// economical solution is to transliterate isolated "l" (that is,
|
||||
// initial "l") to "r". (Other similar conversions of consonants that
|
||||
// occur neither as initials nor as finals are handled below.)
|
||||
|
||||
"l > | r;"
|
||||
|
||||
// Medials. If a medial is preceded by an initial, then we proceed
|
||||
// normally. As usual, longer keys must precede shorter ones.
|
||||
|
||||
// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
|
||||
// this block for Jamo-Latin.]
|
||||
|
||||
"$jamoInitial {ae} <> $AE;"
|
||||
"$jamoInitial {a} <> $A;"
|
||||
"$jamoInitial {eo} <> $EO;"
|
||||
"$jamoInitial {eu} <> $EU;"
|
||||
"$jamoInitial {e} <> $E;"
|
||||
"$jamoInitial {i} <> $I;"
|
||||
"$jamoInitial {oe} <> $OE;"
|
||||
"$jamoInitial {o} <> $O;"
|
||||
"$jamoInitial {u} <> $U;"
|
||||
"$jamoInitial {wae} <> $WAE;"
|
||||
"$jamoInitial {wa} <> $WA;"
|
||||
"$jamoInitial {weo} <> $WEO;"
|
||||
"$jamoInitial {we} <> $WE;"
|
||||
"$jamoInitial {wi} <> $WI;"
|
||||
"$jamoInitial {yae} <> $YAE;"
|
||||
"$jamoInitial {ya} <> $YA;"
|
||||
"$jamoInitial {yeo} <> $YEO;"
|
||||
"$jamoInitial {ye} <> $YE;"
|
||||
"$jamoInitial {yi} <> $YI;"
|
||||
"$jamoInitial {yo} <> $YO;"
|
||||
"$jamoInitial {yu} <> $YU;"
|
||||
|
||||
// We may see an anomalous isolated 'w' or 'y'. In that case, we
|
||||
// interpret it as 'wi' and 'yu', respectively.
|
||||
|
||||
// BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
"$jamoInitial {w} > | wi;"
|
||||
"$jamoInitial {y} > | yu;"
|
||||
|
||||
// Otherwise, insert a null consonant IEUNG before the medial (which is
|
||||
// still an untransliterated latin vowel).
|
||||
|
||||
"($latinMedial) > $IEUNG | $1;"
|
||||
|
||||
// Convert non-jamo latin consonants to equivalents. These occur as
|
||||
// neither initials nor finals in jamo. 'l' occurs as a final, but not
|
||||
// an initial; it is handled above. The following letters (left hand
|
||||
// side) will never be output by Jamo-Latin.
|
||||
|
||||
"f > | p;"
|
||||
"q > | k;"
|
||||
"v > | b;"
|
||||
"x > | ks;"
|
||||
"z > | s;"
|
||||
|
||||
// Delete hyphens (Latin-Jamo).
|
||||
|
||||
"'-' > ;"
|
||||
|
||||
// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
|
||||
// since these may also occur in text.
|
||||
|
||||
"< $IEUNG;"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
470
icu4c/source/data/translit/t_Latn_Kana.txt
Normal file
470
icu4c/source/data/translit/t_Latn_Kana.txt
Normal file
@ -0,0 +1,470 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Latin_Katakana.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_Katakana
|
||||
|
||||
translit_Latin_Katakana {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Latn_Kana.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
"::NFD (NFC) ;"
|
||||
":: [:Latin:] Lower ();"
|
||||
|
||||
// Uses modified Hepburn. Small changes to make unambiguous.
|
||||
|
||||
// | Kunrei-shiki: Hepburn/MHepburn
|
||||
// | ------------------------------
|
||||
// | si: shi
|
||||
// | si ~ya: sha
|
||||
// | si ~yu: shu
|
||||
// | si ~yo: sho
|
||||
// | zi: ji
|
||||
// | zi ~ya: ja
|
||||
// | zi ~yu: ju
|
||||
// | zi ~yo: jo
|
||||
// | ti: chi
|
||||
// | ti ~ya: cha
|
||||
// | ti ~yu: chu
|
||||
// | ti ~yu: cho
|
||||
// | tu: tsu
|
||||
// | di: ji/dji
|
||||
// | du: zu/dzu
|
||||
// | hu: fu
|
||||
|
||||
// | For foreign words:
|
||||
// | -----------------
|
||||
// | se ~i si
|
||||
// | si ~e she
|
||||
// |
|
||||
// | ze ~i zi
|
||||
// | zi ~e je
|
||||
// |
|
||||
// | te ~i ti
|
||||
// | ti ~e che
|
||||
// | te ~u tu
|
||||
// |
|
||||
// | de ~i di
|
||||
// | de ~u du
|
||||
// | de ~i di
|
||||
// |
|
||||
// | he ~u: hu
|
||||
// | hu ~a fa
|
||||
// | hu ~i fi
|
||||
// | hu ~e he
|
||||
// | hu ~o ho
|
||||
|
||||
// Most small forms are generated, but if necessary
|
||||
// explicit small forms are given with ~a, ~ya, etc.
|
||||
|
||||
//------------------------------------------------------
|
||||
// Variables
|
||||
|
||||
"$vowel = [aeiou] ;"
|
||||
"$macron = \u0304 ;"
|
||||
|
||||
// Variables used for doubled-consonants with tsu
|
||||
|
||||
"$kana = [\u3041-\u3094] ;"
|
||||
|
||||
"$voice = [\u3099\u309B];"
|
||||
"$semivoice = [\u309A\u309C];"
|
||||
|
||||
"$k_start = [カキクケコかきくけこ] ;"
|
||||
|
||||
"$s_start = [サシスセソさしすせそ] ;"
|
||||
|
||||
"$j_start = [シし] $voice ;"
|
||||
|
||||
"$t_start = [タチツテトたちつてと] ;"
|
||||
|
||||
"$n_start = [ナニヌネノンなにぬねの] ;"
|
||||
|
||||
"$h_start = [ハヒヘホはひへほ] ;"
|
||||
"$f_start = [フふ] ;"
|
||||
|
||||
"$m_start = [マミムメモまみむめも] ;"
|
||||
|
||||
"$y_start = [ヤユヨやゆよ] ;"
|
||||
|
||||
"$r_start = [ラリルレロらりるれろ] ;"
|
||||
|
||||
"$w_start = [ワヰヱヲわゐゑを] ;"
|
||||
|
||||
"$v_start = [ワヰヱヲ]゙ ;"
|
||||
|
||||
// if ン is followed by $n_quoter, then it needs an
|
||||
// apostrophe after its romaji form to disambiguate it.
|
||||
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
|
||||
|
||||
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;"
|
||||
|
||||
"$small_y = [ャィュェョ] ;"
|
||||
|
||||
"$iteration = \u309D ;"
|
||||
|
||||
//------------------------------------------------------
|
||||
// katakana rules
|
||||
|
||||
// Punctuation
|
||||
|
||||
"'.' <> 。;"
|
||||
"',' <> 、;"
|
||||
// ' ' } [a-z] > ; # delete spaces before latin
|
||||
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
|
||||
|
||||
// Iteration Mark
|
||||
// Copy previous letter & marks
|
||||
|
||||
// TODO
|
||||
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
|
||||
|
||||
// Specials for katakana -- not shared with hiragana
|
||||
|
||||
"va <> ヷ ;"
|
||||
"vi <> ヸ ;"
|
||||
"ve <> ヹ ;"
|
||||
"vo <> ヺ ;"
|
||||
"'~ka' <> ヵ ;"
|
||||
"'~ke' <> ヶ ;"
|
||||
|
||||
// ~~~ begin shared rules ~~~
|
||||
|
||||
//special
|
||||
|
||||
"ya < '~'ャ;"
|
||||
"yi < '~'ィ ;"
|
||||
"yu < '~'ュ;"
|
||||
"ye < '~'ェ;"
|
||||
"yo < '~'ョ;"
|
||||
|
||||
//normal
|
||||
|
||||
"a <> ア ;"
|
||||
|
||||
"b | '~' < ヒ ゙} $small_y ;"
|
||||
"by } $vowel > ビ | '~y' ;"
|
||||
|
||||
"ba <> バ ;"
|
||||
"bi <> ビ ;"
|
||||
"bu <> ブ ;"
|
||||
"be <> ベ ;"
|
||||
"bo <> ボ ;"
|
||||
|
||||
"c } i > | s ;"
|
||||
"c } e > | s ;"
|
||||
|
||||
"da <> ダ ;"
|
||||
"di <> ディ ;"
|
||||
"du <> デゥ ;"
|
||||
"de <> デ ;"
|
||||
"do <> ド ;"
|
||||
"dzu <> ヅ ;"
|
||||
"dja < ヂャ ;"
|
||||
"dji'~i' < ヂィ ;" // liu
|
||||
"dju < ヂュ ;"
|
||||
"dje < ヂェ ;"
|
||||
"djo < ヂョ ;"
|
||||
"dji <> ヂ ;"
|
||||
"dj } $vowel > ヂ | '~y' ;"
|
||||
|
||||
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
|
||||
|
||||
"cha < チャ ;"
|
||||
"chi'~i' < チィ ;" // liu
|
||||
"chu < チュ ;"
|
||||
"che < チェ ;"
|
||||
"cho < チョ ;"
|
||||
"chi <> チ ;"
|
||||
"ch } $vowel > チ | '~y' ;"
|
||||
|
||||
"e <> エ ;"
|
||||
|
||||
"g | '~' < ギ} $small_y ;"
|
||||
"gy } $vowel > ギ | '~y' ;"
|
||||
|
||||
"ga <> ガ ;"
|
||||
"gi <> ギ ;"
|
||||
"gu <> グ ;"
|
||||
"ge <> ゲ ;"
|
||||
"go <> ゴ ;"
|
||||
|
||||
"i <> イ ;"
|
||||
|
||||
// j } $vowel > ジ | '~y' ;
|
||||
|
||||
"ja <> ジャ ;"
|
||||
"ji'~i' < ジィ ;" // liu
|
||||
"ju <> ジュ ;"
|
||||
"je <> ジェ ;"
|
||||
"jo <> ジョ ;"
|
||||
"ji <> ジ ;"
|
||||
|
||||
"k | '~' < キ} $small_y ;"
|
||||
"ky } $vowel > キ | '~y' ;"
|
||||
|
||||
"ka <> カ ;"
|
||||
"ki <> キ ;"
|
||||
"ku <> ク ;"
|
||||
"ke <> ケ ;"
|
||||
"ko <> コ ;"
|
||||
|
||||
"m | '~' < ミ} $small_y ;"
|
||||
"my } $vowel > ミ | '~y' ;"
|
||||
|
||||
"ma <> マ ;"
|
||||
"mi <> ミ ;"
|
||||
"mu <> ム ;"
|
||||
"me <> メ ;"
|
||||
"mo <> モ ;"
|
||||
|
||||
"m } [pbfv] > ン ;"
|
||||
|
||||
"n | '~' < ニ } $small_y ;"
|
||||
"ny } $vowel > ニ | '~y' ;"
|
||||
|
||||
"na <> ナ ;"
|
||||
"ni <> ニ ;"
|
||||
"nu <> ヌ ;"
|
||||
"ne <> ネ ;"
|
||||
"no <> ノ ;"
|
||||
|
||||
"o <> オ ;"
|
||||
|
||||
"p | '~' < ピ } $small_y ;"
|
||||
"py } $vowel > ピ | '~y' ;"
|
||||
|
||||
"pa <> パ ;"
|
||||
"pi <> ピ ;"
|
||||
"pu <> プ ;"
|
||||
"pe <> ペ ;"
|
||||
"po <> ポ ;"
|
||||
|
||||
"h | '~' < ヒ } $small_y ;"
|
||||
"hy } $vowel > ヒ | '~y' ;"
|
||||
|
||||
"ha <> ハ ;"
|
||||
"hi <> ヒ ;"
|
||||
"hu <> ヘゥ ;"
|
||||
"he <> ヘ ;"
|
||||
"ho <> ホ ;"
|
||||
|
||||
// f | '~' < フ } $small_y ;
|
||||
// f } $vowel > フ | '~' ;
|
||||
|
||||
"fa <> ファ ;"
|
||||
"fi <> フィ ;"
|
||||
"fe <> フェ ;"
|
||||
"fo <> フォ ;"
|
||||
"fu <> フ ;"
|
||||
|
||||
"r | '~' < リ } $small_y ;"
|
||||
"ry } $vowel > リ | '~y' ;"
|
||||
|
||||
"ra <> ラ ;"
|
||||
"ri <> リ ;"
|
||||
"ru <> ル ;"
|
||||
"re <> レ ;"
|
||||
"ro <> ロ ;"
|
||||
|
||||
"za <> ザ ;"
|
||||
"zi <> ゼィ ;"
|
||||
"zu <> ズ ;"
|
||||
"ze <> ゼ ;"
|
||||
"zo <> ゾ ;"
|
||||
|
||||
"sa <> サ ;"
|
||||
"si <> セィ ;"
|
||||
"su <> ス ;"
|
||||
"se <> セ ;"
|
||||
"so <> ソ ;"
|
||||
|
||||
"sha < シャ ;"
|
||||
"shi'~i' < シィ ;" // liu
|
||||
"shu < シュ ;"
|
||||
"she < シェ ;"
|
||||
"sho < ショ ;"
|
||||
"shi <> シ ;"
|
||||
"sh } $vowel > シ | '~y' ;"
|
||||
|
||||
"ta <> タ ;"
|
||||
"ti <> ティ ;"
|
||||
"tu <> テゥ ;"
|
||||
"te <> テ ;"
|
||||
"to <> ト ;"
|
||||
|
||||
"tsu <> ツ ;"
|
||||
|
||||
// v } $vowel > ヴ | '~' ;
|
||||
|
||||
//'v~a' < ヴァ ; # liu
|
||||
//'v~i' < ヴィ ; # liu
|
||||
//'v~e' < ヴェ ; # liu
|
||||
//'v~o' < ヴォ ; # liu
|
||||
"vu <> ヴ ;"
|
||||
|
||||
"u <> ウ ;"
|
||||
|
||||
// w } $vowel > ウ | '~' ;
|
||||
|
||||
"wa <> ワ ;"
|
||||
"wi <> ヰ ;"
|
||||
"wu > ウ ;"
|
||||
"we <> ヱ ;"
|
||||
"wo <> ヲ ;"
|
||||
|
||||
"ya <> ヤ ;"
|
||||
"yi > イ ;"
|
||||
"yu <> ユ ;"
|
||||
"ye > エ ;"
|
||||
"yo <> ヨ ;"
|
||||
|
||||
// double consonants
|
||||
|
||||
//specials
|
||||
"s } sh > ッ ;"
|
||||
"t } ch > ッ ;"
|
||||
|
||||
//voiced
|
||||
|
||||
"j } j <> ッ } $j_start ;"
|
||||
"b } b <> ッ } [$h_start$f_start] $voice;"
|
||||
"d } d <> ッ } $t_start $voice;"
|
||||
"g } g <> ッ } $k_start $voice;"
|
||||
"p } p <> ッ } [$h_start$f_start] $semivoice;"
|
||||
// v } v <> ッ } [ワヰウヱヲう] $voice ;
|
||||
"z } z <> ッ } $s_start $voice;"
|
||||
"v } v <> ッ } $v_start;"
|
||||
|
||||
// normal
|
||||
|
||||
"k } k <> ッ } $k_start ;"
|
||||
"m } m <> ッ } $m_start ;"
|
||||
"n } n <> ッ } $n_start ;"
|
||||
"h } h <> ッ } $h_start ;"
|
||||
"f } f <> ッ } $f_start ;"
|
||||
"r } r <> ッ } $r_start ;"
|
||||
"t } t <> ッ } $t_start ;"
|
||||
"s } s <> ッ } $s_start ;"
|
||||
|
||||
"w } w <> ッ } $w_start;"
|
||||
"y } y <> ッ } $y_start;"
|
||||
|
||||
// completeness
|
||||
"x } x > ッ ;"
|
||||
"c } k > ッ ;"
|
||||
"c } c > ッ ;"
|
||||
"c } q > ッ ;"
|
||||
"l } l > ッ ;"
|
||||
"q } q > ッ ;"
|
||||
// y } y > ッ ;
|
||||
// w } w > ッ ;
|
||||
|
||||
// prolonged vowel mark. this indicates a doubling of
|
||||
// the preceding vowel sound
|
||||
|
||||
//a < a { ー ; # liu
|
||||
//e < e { ー ; # liu
|
||||
//i < i { ー ; # liu
|
||||
//o < o { ー ; # liu
|
||||
//u < u { ー ; # liu
|
||||
|
||||
"$macron <> ー ;"
|
||||
|
||||
// small forms
|
||||
|
||||
"'~a' <> ァ ;"
|
||||
"'~i' <> ィ ;"
|
||||
"'~u' <> ゥ ;"
|
||||
"'~e' <> ェ ;"
|
||||
"'~o' <> ォ ;"
|
||||
"'~tsu' <> ッ ;"
|
||||
"'~wa' <> ヮ ;"
|
||||
"'~ya' <> ャ ;"
|
||||
"'~yi' > ィ ;"
|
||||
"'~yu' <> ュ ;"
|
||||
"'~ye' > ェ ;"
|
||||
"'~yo' <> ョ ;"
|
||||
|
||||
// h- rule: lengthens vowel if not followed by a vowel
|
||||
|
||||
"[aeiou] } h > ー ;"
|
||||
|
||||
// one-way latin- > kana rules. these do not occur in
|
||||
// well-formed romaji representing actual japanese text.
|
||||
// their purpose is to make all romaji map to kana of
|
||||
// some sort.
|
||||
|
||||
// the following are not really necessary, but produce
|
||||
// slightly more natural results.
|
||||
|
||||
"cy > セィ ;"
|
||||
"dy > ディ ;"
|
||||
"hy > ヒ ;"
|
||||
"sy > セィ ;"
|
||||
"ty > ティ ;"
|
||||
"zy > ゼィ ;"
|
||||
|
||||
"h > ヘ ;"
|
||||
|
||||
// isolated consonants listed here so as not to mask
|
||||
// longer rules above.
|
||||
|
||||
"ch > チ;"
|
||||
"sh > シ ;"
|
||||
"dz > ヅ ;"
|
||||
"dj > ヂ;"
|
||||
|
||||
"b > ブ ;"
|
||||
"d > デ ;"
|
||||
"g > グ ;"
|
||||
"k > ク ;"
|
||||
"m > ム ;"
|
||||
"n'' < ン } $n_quoter ;"
|
||||
"n <> ン ;"
|
||||
"p > プ ;"
|
||||
"r > ル ;"
|
||||
"s > ス ;"
|
||||
"t > テ ;"
|
||||
"y > イ ;"
|
||||
"z > ズ ;"
|
||||
"v > ヴ ;"
|
||||
|
||||
"f > フ;"
|
||||
"j > ジ;"
|
||||
"w > ウ;"
|
||||
|
||||
// simple substitutions using backup
|
||||
|
||||
"c > | k ;"
|
||||
"l > | r ;"
|
||||
"q > | k ;"
|
||||
"x > | ks ;"
|
||||
|
||||
// ~~~ END shared rules ~~~
|
||||
|
||||
//------------------------------------------------------
|
||||
// Final cleanup
|
||||
|
||||
"'~' > ;" // delete stray tildes between letters
|
||||
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
115
icu4c/source/data/translit/t_Mlym_InterIndic.txt
Normal file
115
icu4c/source/data/translit/t_Mlym_InterIndic.txt
Normal file
@ -0,0 +1,115 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Malayalam_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Malayalam_InterIndic
|
||||
|
||||
translit_Malayalam_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Malayalam_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Malayalam_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:06 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Malayalam-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0d02>\ue002;" // SIGN ANUSVARA
|
||||
"\u0d03>\ue003;" // SIGN VISARGA
|
||||
"\u0d05>\ue005;" // LETTER A
|
||||
"\u0d06>\ue006;" // LETTER AA
|
||||
"\u0d07>\ue007;" // LETTER I
|
||||
"\u0d08>\ue008;" // LETTER II
|
||||
"\u0d09>\ue009;" // LETTER U
|
||||
"\u0d0a>\ue00a;" // LETTER UU
|
||||
"\u0d0b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0d0c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0d0e>\ue00e;" // LETTER E
|
||||
"\u0d0f>\ue00f;" // LETTER EE
|
||||
"\u0d10>\ue010;" // LETTER AI
|
||||
"\u0d12>\ue012;" // LETTER O
|
||||
"\u0d13>\ue013;" // LETTER OO
|
||||
"\u0d14>\ue014;" // LETTER AU
|
||||
"\u0d15>\ue015;" // LETTER KA
|
||||
"\u0d16>\ue016;" // LETTER KHA
|
||||
"\u0d17>\ue017;" // LETTER GA
|
||||
"\u0d18>\ue018;" // LETTER GHA
|
||||
"\u0d19>\ue019;" // LETTER NGA
|
||||
"\u0d1a>\ue01a;" // LETTER CA
|
||||
"\u0d1b>\ue01b;" // LETTER CHA
|
||||
"\u0d1c>\ue01c;" // LETTER JA
|
||||
"\u0d1d>\ue01d;" // LETTER JHA
|
||||
"\u0d1e>\ue01e;" // LETTER NYA
|
||||
"\u0d1f>\ue01f;" // LETTER TTA
|
||||
"\u0d20>\ue020;" // LETTER TTHA
|
||||
"\u0d21>\ue021;" // LETTER DDA
|
||||
"\u0d22>\ue022;" // LETTER DDHA
|
||||
"\u0d23>\ue023;" // LETTER NNA
|
||||
"\u0d24>\ue024;" // LETTER TA
|
||||
"\u0d25>\ue025;" // LETTER THA
|
||||
"\u0d26>\ue026;" // LETTER DA
|
||||
"\u0d27>\ue027;" // LETTER DHA
|
||||
"\u0d28>\ue028;" // LETTER NA
|
||||
"\u0d2a>\ue02a;" // LETTER PA
|
||||
"\u0d2b>\ue02b;" // LETTER PHA
|
||||
"\u0d2c>\ue02c;" // LETTER BA
|
||||
"\u0d2d>\ue02d;" // LETTER BHA
|
||||
"\u0d2e>\ue02e;" // LETTER MA
|
||||
"\u0d2f>\ue02f;" // LETTER YA
|
||||
"\u0d30>\ue030;" // LETTER RA
|
||||
"\u0d31>\ue031;" // LETTER RRA
|
||||
"\u0d32>\ue032;" // LETTER LA
|
||||
"\u0d33>\ue033;" // LETTER LLA
|
||||
"\u0d34>\ue034;" // LETTER LLLA
|
||||
"\u0d35>\ue035;" // LETTER VA
|
||||
"\u0d36>\ue036;" // LETTER SHA
|
||||
"\u0d37>\ue037;" // LETTER SSA
|
||||
"\u0d38>\ue038;" // LETTER SA
|
||||
"\u0d39>\ue039;" // LETTER HA
|
||||
"\u0d3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0d3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0d40>\ue040;" // VOWEL SIGN II
|
||||
"\u0d41>\ue041;" // VOWEL SIGN U
|
||||
"\u0d42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0d43>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0d46>\ue046;" // VOWEL SIGN E
|
||||
"\u0d47>\ue047;" // VOWEL SIGN EE
|
||||
"\u0d48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0d4a>\ue04a;" // VOWEL SIGN O
|
||||
"\u0d4b>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0d4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0d4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0d57>\ue057;" // AU LENGTH MARK
|
||||
"\u0d60>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0d61>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0d66>\ue066;" // DIGIT ZERO
|
||||
"\u0d67>\ue067;" // DIGIT ONE
|
||||
"\u0d68>\ue068;" // DIGIT TWO
|
||||
"\u0d69>\ue069;" // DIGIT THREE
|
||||
"\u0d6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0d6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0d6c>\ue06c;" // DIGIT SIX
|
||||
"\u0d6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0d6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0d6f>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
116
icu4c/source/data/translit/t_Orya_InterIndic.txt
Normal file
116
icu4c/source/data/translit/t_Orya_InterIndic.txt
Normal file
@ -0,0 +1,116 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Oriya_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Oriya_InterIndic
|
||||
|
||||
translit_Oriya_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Oriya_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Oriya_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:07 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Oriya-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0b01>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0b02>\ue002;" // SIGN ANUSVARA
|
||||
"\u0b03>\ue003;" // SIGN VISARGA
|
||||
"\u0b05>\ue005;" // LETTER A
|
||||
"\u0b06>\ue006;" // LETTER AA
|
||||
"\u0b07>\ue007;" // LETTER I
|
||||
"\u0b08>\ue008;" // LETTER II
|
||||
"\u0b09>\ue009;" // LETTER U
|
||||
"\u0b0a>\ue00a;" // LETTER UU
|
||||
"\u0b0b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0b0c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0b0f>\ue00f;" // LETTER E
|
||||
"\u0b10>\ue010;" // LETTER AI
|
||||
"\u0b13>\ue013;" // LETTER O
|
||||
"\u0b14>\ue014;" // LETTER AU
|
||||
"\u0b15>\ue015;" // LETTER KA
|
||||
"\u0b16>\ue016;" // LETTER KHA
|
||||
"\u0b17>\ue017;" // LETTER GA
|
||||
"\u0b18>\ue018;" // LETTER GHA
|
||||
"\u0b19>\ue019;" // LETTER NGA
|
||||
"\u0b1a>\ue01a;" // LETTER CA
|
||||
"\u0b1b>\ue01b;" // LETTER CHA
|
||||
"\u0b1c>\ue01c;" // LETTER JA
|
||||
"\u0b1d>\ue01d;" // LETTER JHA
|
||||
"\u0b1e>\ue01e;" // LETTER NYA
|
||||
"\u0b1f>\ue01f;" // LETTER TTA
|
||||
"\u0b20>\ue020;" // LETTER TTHA
|
||||
"\u0b21>\ue021;" // LETTER DDA
|
||||
"\u0b22>\ue022;" // LETTER DDHA
|
||||
"\u0b23>\ue023;" // LETTER NNA
|
||||
"\u0b24>\ue024;" // LETTER TA
|
||||
"\u0b25>\ue025;" // LETTER THA
|
||||
"\u0b26>\ue026;" // LETTER DA
|
||||
"\u0b27>\ue027;" // LETTER DHA
|
||||
"\u0b28>\ue028;" // LETTER NA
|
||||
"\u0b2a>\ue02a;" // LETTER PA
|
||||
"\u0b2b>\ue02b;" // LETTER PHA
|
||||
"\u0b2c>\ue02c;" // LETTER BA
|
||||
"\u0b2d>\ue02d;" // LETTER BHA
|
||||
"\u0b2e>\ue02e;" // LETTER MA
|
||||
"\u0b2f>\ue02f;" // LETTER YA
|
||||
"\u0b30>\ue030;" // LETTER RA
|
||||
"\u0b32>\ue032;" // LETTER LA
|
||||
"\u0b33>\ue033;" // LETTER LLA
|
||||
"\u0b36>\ue036;" // LETTER SHA
|
||||
"\u0b37>\ue037;" // LETTER SSA
|
||||
"\u0b38>\ue038;" // LETTER SA
|
||||
"\u0b39>\ue039;" // LETTER HA
|
||||
"\u0b3c>\ue03c;" // SIGN NUKTA
|
||||
"\u0b3d>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u0b3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0b3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0b40>\ue040;" // VOWEL SIGN II
|
||||
"\u0b41>\ue041;" // VOWEL SIGN U
|
||||
"\u0b42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0b43>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0b47>\ue047;" // VOWEL SIGN E
|
||||
"\u0b48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0b4b>\ue04b;" // VOWEL SIGN O
|
||||
"\u0b4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0b4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0b56>\ue056;" // AI LENGTH MARK
|
||||
"\u0b57>\ue057;" // AU LENGTH MARK
|
||||
"\u0b5c>\ue05c;" // LETTER RRA
|
||||
"\u0b5d>\ue05d;" // LETTER RHA
|
||||
"\u0b5f>\ue05f;" // LETTER YYA
|
||||
"\u0b60>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0b61>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0b66>\ue066;" // DIGIT ZERO
|
||||
"\u0b67>\ue067;" // DIGIT ONE
|
||||
"\u0b68>\ue068;" // DIGIT TWO
|
||||
"\u0b69>\ue069;" // DIGIT THREE
|
||||
"\u0b6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0b6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0b6c>\ue06c;" // DIGIT SIX
|
||||
"\u0b6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0b6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0b6f>\ue06f;" // DIGIT NINE
|
||||
"\u0b70>\ue070;" // ISSHAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
98
icu4c/source/data/translit/t_Taml_InterIndic.txt
Normal file
98
icu4c/source/data/translit/t_Taml_InterIndic.txt
Normal file
@ -0,0 +1,98 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Tamil_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Tamil_InterIndic
|
||||
|
||||
translit_Tamil_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Tamil_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Tamil_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:07 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Tamil-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0b82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0b83>\ue003;" // SIGN VISARGA
|
||||
"\u0b85>\ue005;" // LETTER A
|
||||
"\u0b86>\ue006;" // LETTER AA
|
||||
"\u0b87>\ue007;" // LETTER I
|
||||
"\u0b88>\ue008;" // LETTER II
|
||||
"\u0b89>\ue009;" // LETTER U
|
||||
"\u0b8a>\ue00a;" // LETTER UU
|
||||
"\u0b8e>\ue00e;" // LETTER E
|
||||
"\u0b8f>\ue00f;" // LETTER EE
|
||||
"\u0b90>\ue010;" // LETTER AI
|
||||
"\u0b92>\ue012;" // LETTER O
|
||||
"\u0b93>\ue013;" // LETTER OO
|
||||
"\u0b94>\ue014;" // LETTER AU
|
||||
"\u0b95>\ue015;" // LETTER KA
|
||||
"\u0b99>\ue019;" // LETTER NGA
|
||||
"\u0b9a>\ue01a;" // LETTER CA
|
||||
"\u0b9c>\ue01c;" // LETTER JA
|
||||
"\u0b9e>\ue01e;" // LETTER NYA
|
||||
"\u0b9f>\ue01f;" // LETTER TTA
|
||||
"\u0ba3>\ue023;" // LETTER NNA
|
||||
"\u0ba4>\ue024;" // LETTER TA
|
||||
"\u0ba8>\ue028;" // LETTER NA
|
||||
"\u0ba9>\ue029;" // LETTER NNNA
|
||||
"\u0baa>\ue02a;" // LETTER PA
|
||||
"\u0bae>\ue02e;" // LETTER MA
|
||||
"\u0baf>\ue02f;" // LETTER YA
|
||||
"\u0bb0>\ue030;" // LETTER RA
|
||||
"\u0bb1>\ue031;" // LETTER RRA
|
||||
"\u0bb2>\ue032;" // LETTER LA
|
||||
"\u0bb3>\ue033;" // LETTER LLA
|
||||
"\u0bb4>\ue034;" // LETTER LLLA
|
||||
"\u0bb5>\ue035;" // LETTER VA
|
||||
"\u0bb7>\ue037;" // LETTER SSA
|
||||
"\u0bb8>\ue038;" // LETTER SA
|
||||
"\u0bb9>\ue039;" // LETTER HA
|
||||
"\u0bbe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0bbf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0bc0>\ue040;" // VOWEL SIGN II
|
||||
"\u0bc1>\ue041;" // VOWEL SIGN U
|
||||
"\u0bc2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0bc6>\ue046;" // VOWEL SIGN E
|
||||
"\u0bc7>\ue047;" // VOWEL SIGN EE
|
||||
"\u0bc8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0bca>\ue04a;" // VOWEL SIGN O
|
||||
"\u0bcb>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0bcc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0bcd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0bd7>\ue057;" // AU LENGTH MARK
|
||||
"\u0be7>\ue067;" // DIGIT ONE
|
||||
"\u0be8>\ue068;" // DIGIT TWO
|
||||
"\u0be9>\ue069;" // DIGIT THREE
|
||||
"\u0bea>\ue06a;" // DIGIT FOUR
|
||||
"\u0beb>\ue06b;" // DIGIT FIVE
|
||||
"\u0bec>\ue06c;" // DIGIT SIX
|
||||
"\u0bed>\ue06d;" // DIGIT SEVEN
|
||||
"\u0bee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0bef>\ue06f;" // DIGIT NINE
|
||||
// \u0bf0>; # UNMAPPED Tamil-InterIndic: NUMBER TEN
|
||||
// \u0bf1>; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
|
||||
// \u0bf2>; # UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
117
icu4c/source/data/translit/t_Telu_InterIndic.txt
Normal file
117
icu4c/source/data/translit/t_Telu_InterIndic.txt
Normal file
@ -0,0 +1,117 @@
|
||||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Telugu_InterIndic.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Telugu_InterIndic
|
||||
|
||||
translit_Telugu_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: \u005cicu4j\u005csrc\u005ccom\u005cibm\u005ctext\u005cresources/Transliterator_Telugu_InterIndic.utf8.txt
|
||||
// Date: Thu Mar 1 20:03:55 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Telugu_InterIndic
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2001, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date: Tue Jan 23 12:42:07 2001
|
||||
//--------------------------------------------------------------------
|
||||
// Telugu-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0c01>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0c02>\ue002;" // SIGN ANUSVARA
|
||||
"\u0c03>\ue003;" // SIGN VISARGA
|
||||
"\u0c05>\ue005;" // LETTER A
|
||||
"\u0c06>\ue006;" // LETTER AA
|
||||
"\u0c07>\ue007;" // LETTER I
|
||||
"\u0c08>\ue008;" // LETTER II
|
||||
"\u0c09>\ue009;" // LETTER U
|
||||
"\u0c0a>\ue00a;" // LETTER UU
|
||||
"\u0c0b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0c0c>\ue00c;" // LETTER VOCALIC L
|
||||
"\u0c0e>\ue00e;" // LETTER E
|
||||
"\u0c0f>\ue00f;" // LETTER EE
|
||||
"\u0c10>\ue010;" // LETTER AI
|
||||
"\u0c12>\ue012;" // LETTER O
|
||||
"\u0c13>\ue013;" // LETTER OO
|
||||
"\u0c14>\ue014;" // LETTER AU
|
||||
"\u0c15>\ue015;" // LETTER KA
|
||||
"\u0c16>\ue016;" // LETTER KHA
|
||||
"\u0c17>\ue017;" // LETTER GA
|
||||
"\u0c18>\ue018;" // LETTER GHA
|
||||
"\u0c19>\ue019;" // LETTER NGA
|
||||
"\u0c1a>\ue01a;" // LETTER CA
|
||||
"\u0c1b>\ue01b;" // LETTER CHA
|
||||
"\u0c1c>\ue01c;" // LETTER JA
|
||||
"\u0c1d>\ue01d;" // LETTER JHA
|
||||
"\u0c1e>\ue01e;" // LETTER NYA
|
||||
"\u0c1f>\ue01f;" // LETTER TTA
|
||||
"\u0c20>\ue020;" // LETTER TTHA
|
||||
"\u0c21>\ue021;" // LETTER DDA
|
||||
"\u0c22>\ue022;" // LETTER DDHA
|
||||
"\u0c23>\ue023;" // LETTER NNA
|
||||
"\u0c24>\ue024;" // LETTER TA
|
||||
"\u0c25>\ue025;" // LETTER THA
|
||||
"\u0c26>\ue026;" // LETTER DA
|
||||
"\u0c27>\ue027;" // LETTER DHA
|
||||
"\u0c28>\ue028;" // LETTER NA
|
||||
"\u0c2a>\ue02a;" // LETTER PA
|
||||
"\u0c2b>\ue02b;" // LETTER PHA
|
||||
"\u0c2c>\ue02c;" // LETTER BA
|
||||
"\u0c2d>\ue02d;" // LETTER BHA
|
||||
"\u0c2e>\ue02e;" // LETTER MA
|
||||
"\u0c2f>\ue02f;" // LETTER YA
|
||||
"\u0c30>\ue030;" // LETTER RA
|
||||
"\u0c31>\ue031;" // LETTER RRA
|
||||
"\u0c32>\ue032;" // LETTER LA
|
||||
"\u0c33>\ue033;" // LETTER LLA
|
||||
"\u0c35>\ue035;" // LETTER VA
|
||||
"\u0c36>\ue036;" // LETTER SHA
|
||||
"\u0c37>\ue037;" // LETTER SSA
|
||||
"\u0c38>\ue038;" // LETTER SA
|
||||
"\u0c39>\ue039;" // LETTER HA
|
||||
"\u0c3e>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0c3f>\ue03f;" // VOWEL SIGN I
|
||||
"\u0c40>\ue040;" // VOWEL SIGN II
|
||||
"\u0c41>\ue041;" // VOWEL SIGN U
|
||||
"\u0c42>\ue042;" // VOWEL SIGN UU
|
||||
"\u0c43>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0c44>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0c46>\ue046;" // VOWEL SIGN E
|
||||
"\u0c47>\ue047;" // VOWEL SIGN EE
|
||||
"\u0c48>\ue048;" // VOWEL SIGN AI
|
||||
"\u0c4a>\ue04a;" // VOWEL SIGN O
|
||||
"\u0c4b>\ue04b;" // VOWEL SIGN OO
|
||||
"\u0c4c>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0c4d>\ue04d;" // SIGN VIRAMA
|
||||
"\u0c55>\ue055;" // LENGTH MARK
|
||||
"\u0c56>\ue056;" // AI LENGTH MARK
|
||||
"\u0c60>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0c61>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0c66>\ue066;" // DIGIT ZERO
|
||||
"\u0c67>\ue067;" // DIGIT ONE
|
||||
"\u0c68>\ue068;" // DIGIT TWO
|
||||
"\u0c69>\ue069;" // DIGIT THREE
|
||||
"\u0c6a>\ue06a;" // DIGIT FOUR
|
||||
"\u0c6b>\ue06b;" // DIGIT FIVE
|
||||
"\u0c6c>\ue06c;" // DIGIT SIX
|
||||
"\u0c6d>\ue06d;" // DIGIT SEVEN
|
||||
"\u0c6e>\ue06e;" // DIGIT EIGHT
|
||||
"\u0c6f>\ue06f;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: Transliterator_index.txt
|
||||
// Date: Fri Mar 2 12:50:49 2001
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
@ -61,77 +61,70 @@ translit_index {
|
||||
|
||||
// Bidirectional rule files
|
||||
|
||||
{ "Fullwidth-Halfwidth", "file", "fullhalf", "FORWARD" },
|
||||
{ "Halfwidth-Fullwidth", "file", "fullhalf", "REVERSE" },
|
||||
{ "Fullwidth-Halfwidth", "file", "translit_Fullwidth_Halfwidth", "FORWARD" },
|
||||
{ "Halfwidth-Fullwidth", "file", "translit_Fullwidth_Halfwidth", "REVERSE" },
|
||||
|
||||
{ "Latin-Arabic", "file", "larabic", "FORWARD" },
|
||||
{ "Arabic-Latin", "file", "larabic", "REVERSE" },
|
||||
{ "Latin-Cyrillic", "file", "translit_Cyrillic_Latin", "REVERSE" },
|
||||
{ "Cyrillic-Latin", "file", "translit_Cyrillic_Latin", "FORWARD" },
|
||||
|
||||
{ "Latin-Cyrillic", "file", "lcyril", "FORWARD" },
|
||||
{ "Cyrillic-Latin", "file", "lcyril", "REVERSE" },
|
||||
{ "Latin-Greek", "file", "translit_Greek_Latin", "REVERSE" },
|
||||
{ "Greek-Latin", "file", "translit_Greek_Latin", "FORWARD" },
|
||||
|
||||
//{ "Latin-Devanagari", "file", "ldevan", "FORWARD" },
|
||||
//{ "Devanagari-Latin", "file", "ldevan", "REVERSE" },
|
||||
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
|
||||
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
|
||||
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },
|
||||
|
||||
{ "Latin-Greek", "file", "lgreek", "FORWARD" },
|
||||
{ "Greek-Latin", "file", "lgreek", "REVERSE" },
|
||||
{ "Latin-Katakana", "file", "translit_Latin_Katakana", "FORWARD" },
|
||||
{ "Katakana-Latin", "file", "translit_Latin_Katakana", "REVERSE" },
|
||||
|
||||
{ "Latin-Hebrew", "file", "lhebrew", "FORWARD" },
|
||||
{ "Hebrew-Latin", "file", "lhebrew", "REVERSE" },
|
||||
{ "Latin-Hiragana", "file", "translit_Hiragana_Latin", "REVERSE" },
|
||||
{ "Hiragana-Latin", "file", "translit_Hiragana_Latin", "FORWARD" },
|
||||
|
||||
{ "Latin-Jamo", "file", "ljamo", "FORWARD" },
|
||||
{ "Jamo-Latin", "file", "ljamo", "REVERSE" },
|
||||
{ "Hiragana-Katakana", "file", "translit_Hiragana_Katakana", "FORWARD" },
|
||||
{ "Katakana-Hiragana", "file", "translit_Hiragana_Katakana", "REVERSE" },
|
||||
|
||||
{ "Latin-Kana", "file", "lkana", "FORWARD" },
|
||||
{ "Kana-Latin", "file", "lkana", "REVERSE" },
|
||||
{ "Any-Accents", "file", "translit_Any_Accents", "FORWARD" },
|
||||
{ "Accents-Any", "file", "translit_Any_Accents", "REVERSE" },
|
||||
|
||||
{ "Hiragana-Katakana", "file", "kana", "FORWARD" },
|
||||
{ "Katakana-Hiragana", "file", "kana", "REVERSE" },
|
||||
|
||||
{ "StraightQuotes-CurlyQuotes", "file", "quotes", "FORWARD" },
|
||||
{ "CurlyQuotes-StraightQuotes", "file", "quotes", "REVERSE" },
|
||||
{ "Any-Publishing", "file", "translit_Any_Publishing", "FORWARD" },
|
||||
{ "Publishing-Any", "file", "translit_Any_Publishing", "REVERSE" },
|
||||
|
||||
// One way rules (forward only)
|
||||
|
||||
// Java only: { "Han-Pinyin", "file", "-", "FORWARD" },
|
||||
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
|
||||
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
|
||||
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
|
||||
|
||||
// Replaced by algorithmic transliterator:
|
||||
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
|
||||
|
||||
// Compound rules
|
||||
|
||||
/// TODO
|
||||
{ "Latin-Hangul", "alias", "[:Latin:];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
|
||||
{ "Latin-Hangul", "alias", "[\p{Latin}];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "[\uAC00-\uD7AF];NFD;Jamo-Latin", "" },
|
||||
|
||||
// Inter-Indic composed rules
|
||||
{ "Latin-InterIndic", "internal", "Latin_InterIndic", "FORWARD" },
|
||||
{ "Devanagari-InterIndic", "internal", "Devanagari_InterIndic", "FORWARD" },
|
||||
{ "Bengali-InterIndic", "internal", "Bengali_InterIndic", "FORWARD" },
|
||||
{ "Gurmukhi-InterIndic", "internal", "Gurmukhi_InterIndic", "FORWARD" },
|
||||
{ "Gujarati-InterIndic", "internal", "Gujarati_InterIndic", "FORWARD" },
|
||||
{ "Oriya-InterIndic", "internal", "Oriya_InterIndic", "FORWARD" },
|
||||
{ "Tamil-InterIndic", "internal", "Tamil_InterIndic", "FORWARD" },
|
||||
{ "Telugu-InterIndic", "internal", "Telugu_InterIndic", "FORWARD" },
|
||||
{ "Kannada-InterIndic", "internal", "Kannada_InterIndic", "FORWARD" },
|
||||
{ "Malayalam-InterIndic", "internal", "Malayalam_InterIndic", "FORWARD" },
|
||||
{ "Latin-InterIndic", "internal", "translit_Latin_InterIndic", "FORWARD" },
|
||||
{ "Devanagari-InterIndic", "internal", "translit_Devanagari_InterIndic", "FORWARD" },
|
||||
{ "Bengali-InterIndic", "internal", "translit_Bengali_InterIndic", "FORWARD" },
|
||||
{ "Gurmukhi-InterIndic", "internal", "translit_Gurmukhi_InterIndic", "FORWARD" },
|
||||
{ "Gujarati-InterIndic", "internal", "translit_Gujarati_InterIndic", "FORWARD" },
|
||||
{ "Oriya-InterIndic", "internal", "translit_Oriya_InterIndic", "FORWARD" },
|
||||
{ "Tamil-InterIndic", "internal", "translit_Tamil_InterIndic", "FORWARD" },
|
||||
{ "Telugu-InterIndic", "internal", "translit_Telugu_InterIndic", "FORWARD" },
|
||||
{ "Kannada-InterIndic", "internal", "translit_Kannada_InterIndic", "FORWARD" },
|
||||
{ "Malayalam-InterIndic", "internal", "translit_Malayalam_InterIndic", "FORWARD" },
|
||||
|
||||
{ "InterIndic-Latin", "internal", "InterIndic_Latin", "FORWARD" },
|
||||
{ "InterIndic-Devanagari", "internal", "InterIndic_Devanagari", "FORWARD" },
|
||||
{ "InterIndic-Bengali", "internal", "InterIndic_Bengali", "FORWARD" },
|
||||
{ "InterIndic-Gurmukhi", "internal", "InterIndic_Gurmukhi", "FORWARD" },
|
||||
{ "InterIndic-Gujarati", "internal", "InterIndic_Gujarati", "FORWARD" },
|
||||
{ "InterIndic-Oriya", "internal", "InterIndic_Oriya", "FORWARD" },
|
||||
{ "InterIndic-Tamil", "internal", "InterIndic_Tamil", "FORWARD" },
|
||||
{ "InterIndic-Telugu", "internal", "InterIndic_Telugu", "FORWARD" },
|
||||
{ "InterIndic-Kannada", "internal", "InterIndic_Kannada", "FORWARD" },
|
||||
{ "InterIndic-Malayalam", "internal", "InterIndic_Malayalam", "FORWARD" },
|
||||
{ "InterIndic-Latin", "internal", "translit_InterIndic_Latin", "FORWARD" },
|
||||
{ "InterIndic-Devanagari", "internal", "translit_InterIndic_Devanagari", "FORWARD" },
|
||||
{ "InterIndic-Bengali", "internal", "translit_InterIndic_Bengali", "FORWARD" },
|
||||
{ "InterIndic-Gurmukhi", "internal", "translit_InterIndic_Gurmukhi", "FORWARD" },
|
||||
{ "InterIndic-Gujarati", "internal", "translit_InterIndic_Gujarati", "FORWARD" },
|
||||
{ "InterIndic-Oriya", "internal", "translit_InterIndic_Oriya", "FORWARD" },
|
||||
{ "InterIndic-Tamil", "internal", "translit_InterIndic_Tamil", "FORWARD" },
|
||||
{ "InterIndic-Telugu", "internal", "translit_InterIndic_Telugu", "FORWARD" },
|
||||
{ "InterIndic-Kannada", "internal", "translit_InterIndic_Kannada", "FORWARD" },
|
||||
{ "InterIndic-Malayalam", "internal", "translit_InterIndic_Malayalam", "FORWARD" },
|
||||
|
||||
//Latin-X transliterators
|
||||
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
//Latin-Indic transliterators
|
||||
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "NFD;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "NFD;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "NFD;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
@ -141,8 +134,8 @@ translit_index {
|
||||
{ "Latin-Kannada", "alias", "NFD;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "NFD;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
//X-Latin transliterators
|
||||
{ "Devanagari-Latin","alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
//Indic-Latin transliterators
|
||||
{ "Devanagari-Latin", "alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Bengali-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Gurmukhi-Latin", "alias", "NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Gujarati-Latin", "alias", "NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
@ -152,7 +145,6 @@ translit_index {
|
||||
{ "Kannada-Latin", "alias", "NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Malayalam-Latin", "alias", "NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
|
||||
|
||||
{ "Devanagari-Bengali", "alias", "NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Devanagari-Gurmukhi", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Devanagari-Gujarati", "alias", "NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
@ -177,7 +169,7 @@ translit_index {
|
||||
{ "Gurmukhi-Telugu", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Gurmukhi-Kannada", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Gurmukhi-Malayalam", "alias", "NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
{ "Gujarati-Devanagari", "alias", "NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Gujarati-Devanagari", "alias", "Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Gujarati-Bengali", "alias", "NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Gujarati-Gurmukhi", "alias", "NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Gujarati-Oriya", "alias", "NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
@ -227,5 +219,6 @@ translit_index {
|
||||
{ "Malayalam-Kannada", "alias", "NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
|
||||
// eof
|
||||
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user