ICU-199 update rules
X-SVN-Rev: 565
This commit is contained in:
parent
8171577fca
commit
169072e0d5
File diff suppressed because it is too large
Load Diff
270
icu4c/data/translit/fullhalf.txt
Normal file
270
icu4c/data/translit/fullhalf.txt
Normal file
@ -0,0 +1,270 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2000, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 01/13/2000 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
fullhalf {
|
||||
Rule {
|
||||
// Mechanically generated from Unicode Character Database
|
||||
|
||||
// multicharacter
|
||||
|
||||
"\u30AC<>\uFF76\uFF9E;" // to KATAKANA LETTER GA
|
||||
"\u30AE<>\uFF77\uFF9E;" // to KATAKANA LETTER GI
|
||||
"\u30B0<>\uFF78\uFF9E;" // to KATAKANA LETTER GU
|
||||
"\u30B2<>\uFF79\uFF9E;" // to KATAKANA LETTER GE
|
||||
"\u30B4<>\uFF7A\uFF9E;" // to KATAKANA LETTER GO
|
||||
"\u30B6<>\uFF7B\uFF9E;" // to KATAKANA LETTER ZA
|
||||
"\u30B8<>\uFF7C\uFF9E;" // to KATAKANA LETTER ZI
|
||||
"\u30BA<>\uFF7D\uFF9E;" // to KATAKANA LETTER ZU
|
||||
"\u30BC<>\uFF7E\uFF9E;" // to KATAKANA LETTER ZE
|
||||
"\u30BE<>\uFF7F\uFF9E;" // to KATAKANA LETTER ZO
|
||||
"\u30C0<>\uFF80\uFF9E;" // to KATAKANA LETTER DA
|
||||
"\u30C2<>\uFF81\uFF9E;" // to KATAKANA LETTER DI
|
||||
"\u30C5<>\uFF82\uFF9E;" // to KATAKANA LETTER DU
|
||||
"\u30C7<>\uFF83\uFF9E;" // to KATAKANA LETTER DE
|
||||
"\u30C9<>\uFF84\uFF9E;" // to KATAKANA LETTER DO
|
||||
"\u30D0<>\uFF8A\uFF9E;" // to KATAKANA LETTER BA
|
||||
"\u30D1<>\uFF8A\uFF9F;" // to KATAKANA LETTER PA
|
||||
"\u30D3<>\uFF8B\uFF9E;" // to KATAKANA LETTER BI
|
||||
"\u30D4<>\uFF8B\uFF9F;" // to KATAKANA LETTER PI
|
||||
"\u30D6<>\uFF8C\uFF9E;" // to KATAKANA LETTER BU
|
||||
"\u30D7<>\uFF8C\uFF9F;" // to KATAKANA LETTER PU
|
||||
"\u30D9<>\uFF8D\uFF9E;" // to KATAKANA LETTER BE
|
||||
"\u30DA<>\uFF8D\uFF9F;" // to KATAKANA LETTER PE
|
||||
"\u30DC<>\uFF8E\uFF9E;" // to KATAKANA LETTER BO
|
||||
"\u30DD<>\uFF8E\uFF9F;" // to KATAKANA LETTER PO
|
||||
"\u30F4<>\uFF73\uFF9E;" // to KATAKANA LETTER VU
|
||||
"\u30F7<>\uFF9C\uFF9E;" // to KATAKANA LETTER VA
|
||||
"\u30FA<>\uFF66\uFF9E;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
"\uFF01<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
"\uFF02<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"\uFF03<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"\uFF04<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"\uFF05<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
"\uFF06<>'&';" // from FULLWIDTH AMPERSAND
|
||||
"\uFF07<>'';" // from FULLWIDTH APOSTROPHE
|
||||
"\uFF08<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
"\uFF09<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
"\uFF0A<>'*';" // from FULLWIDTH ASTERISK
|
||||
"\uFF0B<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
"\uFF0C<>',';" // from FULLWIDTH COMMA
|
||||
"\uFF0D<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
"\uFF0E<>'.';" // from FULLWIDTH FULL STOP
|
||||
"\uFF0F<>'/';" // from FULLWIDTH SOLIDUS
|
||||
"\uFF10<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
"\uFF11<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
"\uFF12<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
"\uFF13<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
"\uFF14<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
"\uFF15<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
"\uFF16<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
"\uFF17<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
"\uFF18<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
"\uFF19<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
"\uFF1A<>':';" // from FULLWIDTH COLON
|
||||
"\uFF1B<>';';" // from FULLWIDTH SEMICOLON
|
||||
"\uFF1C<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
"\uFF1D<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
"\uFF1E<>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
"\uFF1F<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
"\uFF20<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
"\uFF21<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
"\uFF22<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
"\uFF23<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
"\uFF24<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
"\uFF25<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
"\uFF26<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
"\uFF27<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
"\uFF28<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
"\uFF29<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
"\uFF2A<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
"\uFF2B<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
"\uFF2C<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
"\uFF2D<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
"\uFF2E<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
"\uFF2F<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
"\uFF30<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
"\uFF31<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
"\uFF32<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
"\uFF33<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
"\uFF34<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
"\uFF35<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
"\uFF36<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
"\uFF37<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
"\uFF38<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
"\uFF39<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
"\uFF3A<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
"\uFF3B<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
"\uFF3C<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
"\uFF3D<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
"\uFF3E<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
"\uFF3F<>'_';" // from FULLWIDTH LOW LINE
|
||||
"\uFF40<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
"\uFF41<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
"\uFF42<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
"\uFF43<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
"\uFF44<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
"\uFF45<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
"\uFF46<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
"\uFF47<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
"\uFF48<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
"\uFF49<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
"\uFF4A<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
"\uFF4B<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
"\uFF4C<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
"\uFF4D<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
"\uFF4E<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
"\uFF4F<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
"\uFF50<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
"\uFF51<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
"\uFF52<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
"\uFF53<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
"\uFF54<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
"\uFF55<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
"\uFF56<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
"\uFF57<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
"\uFF58<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
"\uFF59<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
"\uFF5A<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
"\uFF5B<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
"\uFF5C<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
"\uFF5D<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
"\uFF5E<>'~';" // from FULLWIDTH TILDE
|
||||
"\u3002<>\uFF61;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
"\u300C<>\uFF62;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
"\u300D<>\uFF63;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
"\u3001<>\uFF64;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
"\u30FB<>\uFF65;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
"\u30F2<>\uFF66;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
"\u30A1<>\uFF67;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
"\u30A3<>\uFF68;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
"\u30A5<>\uFF69;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
"\u30A7<>\uFF6A;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
"\u30A9<>\uFF6B;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
"\u30E3<>\uFF6C;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
"\u30E5<>\uFF6D;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
"\u30E7<>\uFF6E;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
"\u30C3<>\uFF6F;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
"\u30FC<>\uFF70;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
"\u30A2<>\uFF71;" // to HALFWIDTH KATAKANA LETTER A
|
||||
"\u30A4<>\uFF72;" // to HALFWIDTH KATAKANA LETTER I
|
||||
"\u30A6<>\uFF73;" // to HALFWIDTH KATAKANA LETTER U
|
||||
"\u30A8<>\uFF74;" // to HALFWIDTH KATAKANA LETTER E
|
||||
"\u30AA<>\uFF75;" // to HALFWIDTH KATAKANA LETTER O
|
||||
"\u30AB<>\uFF76;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
"\u30AD<>\uFF77;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
"\u30AF<>\uFF78;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
"\u30B1<>\uFF79;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
"\u30B3<>\uFF7A;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
"\u30B5<>\uFF7B;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
"\u30B7<>\uFF7C;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
"\u30B9<>\uFF7D;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
"\u30BB<>\uFF7E;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
"\u30BD<>\uFF7F;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
"\u30BF<>\uFF80;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
"\u30C1<>\uFF81;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
"\u30C4<>\uFF82;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
"\u30C6<>\uFF83;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
"\u30C8<>\uFF84;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
"\u30CA<>\uFF85;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
"\u30CB<>\uFF86;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
"\u30CC<>\uFF87;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
"\u30CD<>\uFF88;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
"\u30CE<>\uFF89;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
"\u30CF<>\uFF8A;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
"\u30D2<>\uFF8B;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
"\u30D5<>\uFF8C;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
"\u30D8<>\uFF8D;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
"\u30DB<>\uFF8E;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
"\u30DE<>\uFF8F;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
"\u30DF<>\uFF90;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
"\u30E0<>\uFF91;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
"\u30E1<>\uFF92;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
"\u30E2<>\uFF93;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
"\u30E4<>\uFF94;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
"\u30E6<>\uFF95;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
"\u30E8<>\uFF96;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
"\u30E9<>\uFF97;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
"\u30EA<>\uFF98;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
"\u30EB<>\uFF99;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
"\u30EC<>\uFF9A;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
"\u30ED<>\uFF9B;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
"\u30EF<>\uFF9C;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
"\u30F3<>\uFF9D;" // to HALFWIDTH KATAKANA LETTER N
|
||||
"\u3099<>\uFF9E;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
"\u309A<>\uFF9F;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
"\u1160<>\uFFA0;" // to HALFWIDTH HANGUL FILLER
|
||||
"\u1100<>\uFFA1;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
"\u1101<>\uFFA2;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
"\u11AA<>\uFFA3;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
"\u1102<>\uFFA4;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
"\u11AC<>\uFFA5;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
"\u11AD<>\uFFA6;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
"\u1103<>\uFFA7;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
"\u1104<>\uFFA8;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
"\u1105<>\uFFA9;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
"\u11B0<>\uFFAA;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
"\u11B1<>\uFFAB;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
"\u11B2<>\uFFAC;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
"\u11B3<>\uFFAD;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
"\u11B4<>\uFFAE;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
"\u11B5<>\uFFAF;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
"\u111A<>\uFFB0;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
"\u1106<>\uFFB1;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
"\u1107<>\uFFB2;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
"\u1108<>\uFFB3;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
"\u1121<>\uFFB4;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
"\u1109<>\uFFB5;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
"\u110A<>\uFFB6;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
"\u110B<>\uFFB7;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
"\u110C<>\uFFB8;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
"\u110D<>\uFFB9;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
"\u110E<>\uFFBA;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
"\u110F<>\uFFBB;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
"\u1110<>\uFFBC;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
"\u1111<>\uFFBD;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
"\u1112<>\uFFBE;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
"\u1161<>\uFFC2;" // to HALFWIDTH HANGUL LETTER A
|
||||
"\u1162<>\uFFC3;" // to HALFWIDTH HANGUL LETTER AE
|
||||
"\u1163<>\uFFC4;" // to HALFWIDTH HANGUL LETTER YA
|
||||
"\u1164<>\uFFC5;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
"\u1165<>\uFFC6;" // to HALFWIDTH HANGUL LETTER EO
|
||||
"\u1166<>\uFFC7;" // to HALFWIDTH HANGUL LETTER E
|
||||
"\u1167<>\uFFCA;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
"\u1168<>\uFFCB;" // to HALFWIDTH HANGUL LETTER YE
|
||||
"\u1169<>\uFFCC;" // to HALFWIDTH HANGUL LETTER O
|
||||
"\u116A<>\uFFCD;" // to HALFWIDTH HANGUL LETTER WA
|
||||
"\u116B<>\uFFCE;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
"\u116C<>\uFFCF;" // to HALFWIDTH HANGUL LETTER OE
|
||||
"\u116D<>\uFFD2;" // to HALFWIDTH HANGUL LETTER YO
|
||||
"\u116E<>\uFFD3;" // to HALFWIDTH HANGUL LETTER U
|
||||
"\u116F<>\uFFD4;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
"\u1170<>\uFFD5;" // to HALFWIDTH HANGUL LETTER WE
|
||||
"\u1171<>\uFFD6;" // to HALFWIDTH HANGUL LETTER WI
|
||||
"\u1172<>\uFFD7;" // to HALFWIDTH HANGUL LETTER YU
|
||||
"\u1173<>\uFFDA;" // to HALFWIDTH HANGUL LETTER EU
|
||||
"\u1174<>\uFFDB;" // to HALFWIDTH HANGUL LETTER YI
|
||||
"\u1175<>\uFFDC;" // to HALFWIDTH HANGUL LETTER I
|
||||
"\uFFE0<>'\u00a2';" // from FULLWIDTH CENT SIGN
|
||||
"\uFFE1<>'\u00a3';" // from FULLWIDTH POUND SIGN
|
||||
"\uFFE2<>'\u00ac';" // from FULLWIDTH NOT SIGN
|
||||
"\uFFE3<>' '\u0304;" // from FULLWIDTH MACRON
|
||||
"\uFFE4<>'\u00a6';" // from FULLWIDTH BROKEN BAR
|
||||
"\uFFE5<>'\u00a5';" // from FULLWIDTH YEN SIGN
|
||||
"\uFFE6<>\u20A9;" // from FULLWIDTH WON SIGN
|
||||
"\u2502<>\uFFE8;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
"\u2190<>\uFFE9;" // to HALFWIDTH LEFTWARDS ARROW
|
||||
"\u2191<>\uFFEA;" // to HALFWIDTH UPWARDS ARROW
|
||||
"\u2192<>\uFFEB;" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
"\u2193<>\uFFEC;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
"\u25A0<>\uFFED;" // to HALFWIDTH BLACK SQUARE
|
||||
"\u25CB<>\uFFEE;" // to HALFWIDTH WHITE CIRCLE
|
||||
}
|
||||
}
|
307
icu4c/data/translit/lcyril.txt
Normal file
307
icu4c/data/translit/lcyril.txt
Normal file
@ -0,0 +1,307 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
// 12/10/99 aliu Fix case handling.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
lcyril {
|
||||
Rule {
|
||||
//* /* This class is designed to be a general Latin-Cyrillic
|
||||
//* transliteration. The standard Russian transliterations
|
||||
//* are generally used for the letters from Russian,
|
||||
//* with additional Cyrillic characters given consistent
|
||||
//* mappings.
|
||||
//* */
|
||||
|
||||
"S-hacek=\u0160;"
|
||||
"s-hacek=\u0161;"
|
||||
|
||||
"YO=\u0401;"
|
||||
"J=\u0408;"
|
||||
"A=\u0410;"
|
||||
"B=\u0411;"
|
||||
"V=\u0412;"
|
||||
"G=\u0413;"
|
||||
"D=\u0414;"
|
||||
"YE=\u0415;"
|
||||
"ZH=\u0416;"
|
||||
"Z=\u0417;"
|
||||
"YI=\u0418;"
|
||||
"Y=\u0419;"
|
||||
"K=\u041A;"
|
||||
"L=\u041B;"
|
||||
"M=\u041C;"
|
||||
"N=\u041D;"
|
||||
"O=\u041E;"
|
||||
"P=\u041F;"
|
||||
"R=\u0420;"
|
||||
"S=\u0421;"
|
||||
"T=\u0422;"
|
||||
"U=\u0423;"
|
||||
"F=\u0424;"
|
||||
"KH=\u0425;"
|
||||
"TS=\u0426;"
|
||||
"CH=\u0427;"
|
||||
"SH=\u0428;"
|
||||
"SHCH=\u0429;"
|
||||
"HARD=\u042A;"
|
||||
"I=\u042B;"
|
||||
"SOFT=\u042C;"
|
||||
"E=\u042D;"
|
||||
"YU=\u042E;"
|
||||
"YA=\u042F;"
|
||||
|
||||
// Lowercase
|
||||
|
||||
"a=\u0430;"
|
||||
"b=\u0431;"
|
||||
"v=\u0432;"
|
||||
"g=\u0433;"
|
||||
"d=\u0434;"
|
||||
"ye=\u0435;"
|
||||
"zh=\u0436;"
|
||||
"z=\u0437;"
|
||||
"yi=\u0438;"
|
||||
"y=\u0439;"
|
||||
"k=\u043a;"
|
||||
"l=\u043b;"
|
||||
"m=\u043c;"
|
||||
"n=\u043d;"
|
||||
"o=\u043e;"
|
||||
"p=\u043f;"
|
||||
"r=\u0440;"
|
||||
"s=\u0441;"
|
||||
"t=\u0442;"
|
||||
"u=\u0443;"
|
||||
"f=\u0444;"
|
||||
"kh=\u0445;"
|
||||
"ts=\u0446;"
|
||||
"ch=\u0447;"
|
||||
"sh=\u0448;"
|
||||
"shch=\u0449;"
|
||||
"hard=\u044a;"
|
||||
"i=\u044b;"
|
||||
"soft=\u044c;"
|
||||
"e=\u044d;"
|
||||
"yu=\u044e;"
|
||||
"ya=\u044f;"
|
||||
|
||||
"yo=\u0451;"
|
||||
"j=\u0458;"
|
||||
|
||||
// variables
|
||||
// some are duplicated so lowercasing works
|
||||
|
||||
"csoft=[eiyEIY];"
|
||||
"CSOFT=[eiyEIY];"
|
||||
|
||||
"BECOMES_H=[{HARD}{hard}];"
|
||||
"becomes_h=[{HARD}{hard}];"
|
||||
|
||||
"BECOMES_S=[{S}{s}];"
|
||||
"becomes_s=[{S}{s}];"
|
||||
|
||||
"BECOMES_C=[{CH}{ch}];"
|
||||
"becomes_c=[{CH}{ch}];"
|
||||
|
||||
"BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
"becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
|
||||
"letter=[[:Lu:][:Ll:]];"
|
||||
"lower=[[:Ll:]];"
|
||||
|
||||
//* /*
|
||||
//* Modified to combine display transliterator and typing transliterator.
|
||||
//* The display mapping uses accents for the "soft" vowels.
|
||||
//* It does not, although it could, use characters like \u0161 instead of digraphs
|
||||
//* like sh.
|
||||
//* */
|
||||
|
||||
// #############################################
|
||||
// Special titlecase forms, not duplicated
|
||||
// #############################################
|
||||
|
||||
"Ch>{CH};" "Ch<{CH}({lower};"
|
||||
"Kh>{KH};" "Kh<{KH}({lower};"
|
||||
"Shch>{SHCH};" "Shch<{SHCH}({lower};"
|
||||
"Sh>{SH};" "Sh<{SH}({lower};"
|
||||
"Ts>{TS};" "Ts<{TS}({lower};"
|
||||
"Zh>{ZH};" "Zh<{ZH}({lower};"
|
||||
"Yi>{YI};" //+ "Yi<{YI}({lower};"
|
||||
"Ye>{YE};" //+ "Ye<{YE}({lower};"
|
||||
"Yo>{YO};" //+ "Yo<{YO}({lower};"
|
||||
"Yu>{YU};" //+ "Yu<{YU}({lower};"
|
||||
"Ya>{YA};" //+ "Ya<{YA}({lower};"
|
||||
|
||||
// #############################################
|
||||
// Rules to Duplicate
|
||||
// To get the lowercase versions, copy these and lowercase
|
||||
// #############################################
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"SHTCH>{SHCH};"
|
||||
"TCH>{CH};"
|
||||
"TH>{Z};"
|
||||
"Q>{K};"
|
||||
"WH>{V};"
|
||||
"W>{V};"
|
||||
"X>{K}{S};" //+ "X<{K}{S};"
|
||||
|
||||
// Separate letters that would otherwise join
|
||||
|
||||
"SH''<{SH}({BECOMES_C};"
|
||||
"T''<{T}({BECOMES_S};"
|
||||
|
||||
"K''<{K}({BECOMES_H};"
|
||||
"S''<{S}({BECOMES_H};"
|
||||
"T''<{T}({BECOMES_H};"
|
||||
"Z''<{Z}({BECOMES_H};"
|
||||
|
||||
"Y''<{Y}({BECOMES_VOWEL};"
|
||||
|
||||
// Main letters
|
||||
|
||||
"A<>{A};"
|
||||
"B<>{B};"
|
||||
"CH<>{CH};"
|
||||
"D<>{D};"
|
||||
"E<>{E};"
|
||||
"F<>{F};"
|
||||
"G<>{G};"
|
||||
"\u00cc<>{YI};"
|
||||
"I<>{I};"
|
||||
"KH<>{KH};"
|
||||
"K<>{K};"
|
||||
"L<>{L};"
|
||||
"M<>{M};"
|
||||
"N<>{N};"
|
||||
"O<>{O};"
|
||||
"P<>{P};"
|
||||
"R<>{R};"
|
||||
"SHCH<>{SHCH};"
|
||||
"SH>{SH};" //+ "SH<{SH};"
|
||||
"{S-hacek}<>{SH};"
|
||||
"S<>{S};"
|
||||
"TS<>{TS};"
|
||||
"T<>{T};"
|
||||
"U<>{U};"
|
||||
"V<>{V};"
|
||||
//\u00cc\u00c0\u00c8\u00d2\u00d9
|
||||
"YE>{YE};" //+ "YE<{YE};"
|
||||
"\u00c8<>{YE};"
|
||||
"YO>{YO};" //+ "YO<{YO};"
|
||||
"\u00d2<>{YO};"
|
||||
"YU>{YU};" //+ "YU<{YU};"
|
||||
"\u00d9<>{YU};"
|
||||
"YA>{YA};" //+ "YA<{YA};"
|
||||
"\u00c0<>{YA};"
|
||||
"Y<>{Y};"
|
||||
"ZH<>{ZH};"
|
||||
"Z<>{Z};"
|
||||
|
||||
"H<>{HARD};"
|
||||
"\u0178<>{SOFT};"
|
||||
|
||||
// Non-russian
|
||||
|
||||
"J<>{J};"
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"C({csoft}>{S};"
|
||||
"C>{K};"
|
||||
|
||||
// #############################################
|
||||
// Duplicated Rules
|
||||
// Copy and lowercase the above rules
|
||||
// #############################################
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"shtch>{shch};"
|
||||
"tch>{ch};"
|
||||
"th>{z};"
|
||||
"q>{k};"
|
||||
"wh>{v};"
|
||||
"w>{v};"
|
||||
"x>{k}{s};" //+ "x<{k}{s};"
|
||||
|
||||
// separate letters that would otherwise join
|
||||
|
||||
"sh''<{sh}({becomes_c};"
|
||||
"t''<{t}({becomes_s};"
|
||||
|
||||
"k''<{k}({becomes_h};"
|
||||
"s''<{s}({becomes_h};"
|
||||
"t''<{t}({becomes_h};"
|
||||
"z''<{z}({becomes_h};"
|
||||
|
||||
"y''<{y}({becomes_vowel};"
|
||||
|
||||
// main letters
|
||||
|
||||
"a<>{a};"
|
||||
"b<>{b};"
|
||||
"ch<>{ch};"
|
||||
"d<>{d};"
|
||||
"e<>{e};"
|
||||
"f<>{f};"
|
||||
"g<>{g};"
|
||||
"\u00ec<>{yi};"
|
||||
"i<>{i};"
|
||||
"kh<>{kh};"
|
||||
"k<>{k};"
|
||||
"l<>{l};"
|
||||
"m<>{m};"
|
||||
"n<>{n};"
|
||||
"o<>{o};"
|
||||
"p<>{p};"
|
||||
"r<>{r};"
|
||||
"shch<>{shch};"
|
||||
"sh>{sh};" //+ "sh<{sh};"
|
||||
"{s-hacek}<>{sh};"
|
||||
"s<>{s};"
|
||||
"ts<>{ts};"
|
||||
"t<>{t};"
|
||||
"u<>{u};"
|
||||
"v<>{v};"
|
||||
//\u00ec\u00e0\u00e8\u00f2\u00f9
|
||||
"ye>{ye};" //+ "ye<{ye};"
|
||||
"\u00e8<>{ye};"
|
||||
"yo>{yo};" //+ "yo<{yo};"
|
||||
"\u00f2<>{yo};"
|
||||
"yu>{yu};" //+ "yu<{yu};"
|
||||
"\u00f9<>{yu};"
|
||||
"ya>{ya};" //+ "ya<{ya};"
|
||||
"\u00e0<>{ya};"
|
||||
"y<>{y};"
|
||||
"zh<>{zh};"
|
||||
"z<>{z};"
|
||||
|
||||
"h<>{hard};"
|
||||
"\u00ff<>{soft};"
|
||||
|
||||
// non-russian
|
||||
|
||||
"j<>{j};"
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"c({csoft}>{s};"
|
||||
"c>{k};"
|
||||
|
||||
|
||||
|
||||
// #############################################
|
||||
// End of Duplicated Rules
|
||||
// #############################################
|
||||
|
||||
//generally the last rule
|
||||
"''>;"
|
||||
//the end
|
||||
}
|
||||
}
|
@ -1,270 +1,2 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
lhalfwid {
|
||||
Rule {
|
||||
// Mechanically generated from Unicode Character Database
|
||||
|
||||
// multicharacter
|
||||
|
||||
"\u30AC<>\uFF76\uFF9E;" // to KATAKANA LETTER GA
|
||||
"\u30AE<>\uFF77\uFF9E;" // to KATAKANA LETTER GI
|
||||
"\u30B0<>\uFF78\uFF9E;" // to KATAKANA LETTER GU
|
||||
"\u30B2<>\uFF79\uFF9E;" // to KATAKANA LETTER GE
|
||||
"\u30B4<>\uFF7A\uFF9E;" // to KATAKANA LETTER GO
|
||||
"\u30B6<>\uFF7B\uFF9E;" // to KATAKANA LETTER ZA
|
||||
"\u30B8<>\uFF7C\uFF9E;" // to KATAKANA LETTER ZI
|
||||
"\u30BA<>\uFF7D\uFF9E;" // to KATAKANA LETTER ZU
|
||||
"\u30BC<>\uFF7E\uFF9E;" // to KATAKANA LETTER ZE
|
||||
"\u30BE<>\uFF7F\uFF9E;" // to KATAKANA LETTER ZO
|
||||
"\u30C0<>\uFF80\uFF9E;" // to KATAKANA LETTER DA
|
||||
"\u30C2<>\uFF81\uFF9E;" // to KATAKANA LETTER DI
|
||||
"\u30C5<>\uFF82\uFF9E;" // to KATAKANA LETTER DU
|
||||
"\u30C7<>\uFF83\uFF9E;" // to KATAKANA LETTER DE
|
||||
"\u30C9<>\uFF84\uFF9E;" // to KATAKANA LETTER DO
|
||||
"\u30D0<>\uFF8A\uFF9E;" // to KATAKANA LETTER BA
|
||||
"\u30D1<>\uFF8A\uFF9F;" // to KATAKANA LETTER PA
|
||||
"\u30D3<>\uFF8B\uFF9E;" // to KATAKANA LETTER BI
|
||||
"\u30D4<>\uFF8B\uFF9F;" // to KATAKANA LETTER PI
|
||||
"\u30D6<>\uFF8C\uFF9E;" // to KATAKANA LETTER BU
|
||||
"\u30D7<>\uFF8C\uFF9F;" // to KATAKANA LETTER PU
|
||||
"\u30D9<>\uFF8D\uFF9E;" // to KATAKANA LETTER BE
|
||||
"\u30DA<>\uFF8D\uFF9F;" // to KATAKANA LETTER PE
|
||||
"\u30DC<>\uFF8E\uFF9E;" // to KATAKANA LETTER BO
|
||||
"\u30DD<>\uFF8E\uFF9F;" // to KATAKANA LETTER PO
|
||||
"\u30F4<>\uFF73\uFF9E;" // to KATAKANA LETTER VU
|
||||
"\u30F7<>\uFF9C\uFF9E;" // to KATAKANA LETTER VA
|
||||
"\u30FA<>\uFF66\uFF9E;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
"\uFF01<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
"\uFF02<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"\uFF03<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"\uFF04<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"\uFF05<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
"\uFF06<>'&';" // from FULLWIDTH AMPERSAND
|
||||
"\uFF07<>'';" // from FULLWIDTH APOSTROPHE
|
||||
"\uFF08<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
"\uFF09<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
"\uFF0A<>'*';" // from FULLWIDTH ASTERISK
|
||||
"\uFF0B<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
"\uFF0C<>',';" // from FULLWIDTH COMMA
|
||||
"\uFF0D<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
"\uFF0E<>'.';" // from FULLWIDTH FULL STOP
|
||||
"\uFF0F<>'/';" // from FULLWIDTH SOLIDUS
|
||||
"\uFF10<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
"\uFF11<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
"\uFF12<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
"\uFF13<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
"\uFF14<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
"\uFF15<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
"\uFF16<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
"\uFF17<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
"\uFF18<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
"\uFF19<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
"\uFF1A<>':';" // from FULLWIDTH COLON
|
||||
"\uFF1B<>';';" // from FULLWIDTH SEMICOLON
|
||||
"\uFF1C<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
"\uFF1D<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
"\uFF1E<>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
"\uFF1F<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
"\uFF20<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
"\uFF21<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
"\uFF22<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
"\uFF23<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
"\uFF24<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
"\uFF25<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
"\uFF26<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
"\uFF27<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
"\uFF28<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
"\uFF29<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
"\uFF2A<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
"\uFF2B<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
"\uFF2C<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
"\uFF2D<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
"\uFF2E<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
"\uFF2F<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
"\uFF30<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
"\uFF31<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
"\uFF32<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
"\uFF33<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
"\uFF34<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
"\uFF35<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
"\uFF36<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
"\uFF37<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
"\uFF38<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
"\uFF39<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
"\uFF3A<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
"\uFF3B<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
"\uFF3C<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
"\uFF3D<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
"\uFF3E<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
"\uFF3F<>'_';" // from FULLWIDTH LOW LINE
|
||||
"\uFF40<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
"\uFF41<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
"\uFF42<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
"\uFF43<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
"\uFF44<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
"\uFF45<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
"\uFF46<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
"\uFF47<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
"\uFF48<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
"\uFF49<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
"\uFF4A<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
"\uFF4B<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
"\uFF4C<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
"\uFF4D<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
"\uFF4E<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
"\uFF4F<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
"\uFF50<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
"\uFF51<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
"\uFF52<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
"\uFF53<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
"\uFF54<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
"\uFF55<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
"\uFF56<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
"\uFF57<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
"\uFF58<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
"\uFF59<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
"\uFF5A<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
"\uFF5B<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
"\uFF5C<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
"\uFF5D<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
"\uFF5E<>'~';" // from FULLWIDTH TILDE
|
||||
"\u3002<>\uFF61;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
"\u300C<>\uFF62;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
"\u300D<>\uFF63;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
"\u3001<>\uFF64;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
"\u30FB<>\uFF65;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
"\u30F2<>\uFF66;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
"\u30A1<>\uFF67;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
"\u30A3<>\uFF68;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
"\u30A5<>\uFF69;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
"\u30A7<>\uFF6A;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
"\u30A9<>\uFF6B;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
"\u30E3<>\uFF6C;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
"\u30E5<>\uFF6D;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
"\u30E7<>\uFF6E;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
"\u30C3<>\uFF6F;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
"\u30FC<>\uFF70;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
"\u30A2<>\uFF71;" // to HALFWIDTH KATAKANA LETTER A
|
||||
"\u30A4<>\uFF72;" // to HALFWIDTH KATAKANA LETTER I
|
||||
"\u30A6<>\uFF73;" // to HALFWIDTH KATAKANA LETTER U
|
||||
"\u30A8<>\uFF74;" // to HALFWIDTH KATAKANA LETTER E
|
||||
"\u30AA<>\uFF75;" // to HALFWIDTH KATAKANA LETTER O
|
||||
"\u30AB<>\uFF76;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
"\u30AD<>\uFF77;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
"\u30AF<>\uFF78;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
"\u30B1<>\uFF79;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
"\u30B3<>\uFF7A;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
"\u30B5<>\uFF7B;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
"\u30B7<>\uFF7C;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
"\u30B9<>\uFF7D;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
"\u30BB<>\uFF7E;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
"\u30BD<>\uFF7F;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
"\u30BF<>\uFF80;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
"\u30C1<>\uFF81;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
"\u30C4<>\uFF82;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
"\u30C6<>\uFF83;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
"\u30C8<>\uFF84;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
"\u30CA<>\uFF85;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
"\u30CB<>\uFF86;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
"\u30CC<>\uFF87;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
"\u30CD<>\uFF88;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
"\u30CE<>\uFF89;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
"\u30CF<>\uFF8A;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
"\u30D2<>\uFF8B;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
"\u30D5<>\uFF8C;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
"\u30D8<>\uFF8D;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
"\u30DB<>\uFF8E;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
"\u30DE<>\uFF8F;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
"\u30DF<>\uFF90;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
"\u30E0<>\uFF91;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
"\u30E1<>\uFF92;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
"\u30E2<>\uFF93;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
"\u30E4<>\uFF94;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
"\u30E6<>\uFF95;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
"\u30E8<>\uFF96;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
"\u30E9<>\uFF97;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
"\u30EA<>\uFF98;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
"\u30EB<>\uFF99;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
"\u30EC<>\uFF9A;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
"\u30ED<>\uFF9B;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
"\u30EF<>\uFF9C;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
"\u30F3<>\uFF9D;" // to HALFWIDTH KATAKANA LETTER N
|
||||
"\u3099<>\uFF9E;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
"\u309A<>\uFF9F;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
"\u1160<>\uFFA0;" // to HALFWIDTH HANGUL FILLER
|
||||
"\u1100<>\uFFA1;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
"\u1101<>\uFFA2;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
"\u11AA<>\uFFA3;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
"\u1102<>\uFFA4;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
"\u11AC<>\uFFA5;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
"\u11AD<>\uFFA6;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
"\u1103<>\uFFA7;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
"\u1104<>\uFFA8;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
"\u1105<>\uFFA9;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
"\u11B0<>\uFFAA;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
"\u11B1<>\uFFAB;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
"\u11B2<>\uFFAC;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
"\u11B3<>\uFFAD;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
"\u11B4<>\uFFAE;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
"\u11B5<>\uFFAF;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
"\u111A<>\uFFB0;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
"\u1106<>\uFFB1;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
"\u1107<>\uFFB2;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
"\u1108<>\uFFB3;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
"\u1121<>\uFFB4;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
"\u1109<>\uFFB5;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
"\u110A<>\uFFB6;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
"\u110B<>\uFFB7;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
"\u110C<>\uFFB8;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
"\u110D<>\uFFB9;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
"\u110E<>\uFFBA;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
"\u110F<>\uFFBB;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
"\u1110<>\uFFBC;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
"\u1111<>\uFFBD;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
"\u1112<>\uFFBE;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
"\u1161<>\uFFC2;" // to HALFWIDTH HANGUL LETTER A
|
||||
"\u1162<>\uFFC3;" // to HALFWIDTH HANGUL LETTER AE
|
||||
"\u1163<>\uFFC4;" // to HALFWIDTH HANGUL LETTER YA
|
||||
"\u1164<>\uFFC5;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
"\u1165<>\uFFC6;" // to HALFWIDTH HANGUL LETTER EO
|
||||
"\u1166<>\uFFC7;" // to HALFWIDTH HANGUL LETTER E
|
||||
"\u1167<>\uFFCA;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
"\u1168<>\uFFCB;" // to HALFWIDTH HANGUL LETTER YE
|
||||
"\u1169<>\uFFCC;" // to HALFWIDTH HANGUL LETTER O
|
||||
"\u116A<>\uFFCD;" // to HALFWIDTH HANGUL LETTER WA
|
||||
"\u116B<>\uFFCE;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
"\u116C<>\uFFCF;" // to HALFWIDTH HANGUL LETTER OE
|
||||
"\u116D<>\uFFD2;" // to HALFWIDTH HANGUL LETTER YO
|
||||
"\u116E<>\uFFD3;" // to HALFWIDTH HANGUL LETTER U
|
||||
"\u116F<>\uFFD4;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
"\u1170<>\uFFD5;" // to HALFWIDTH HANGUL LETTER WE
|
||||
"\u1171<>\uFFD6;" // to HALFWIDTH HANGUL LETTER WI
|
||||
"\u1172<>\uFFD7;" // to HALFWIDTH HANGUL LETTER YU
|
||||
"\u1173<>\uFFDA;" // to HALFWIDTH HANGUL LETTER EU
|
||||
"\u1174<>\uFFDB;" // to HALFWIDTH HANGUL LETTER YI
|
||||
"\u1175<>\uFFDC;" // to HALFWIDTH HANGUL LETTER I
|
||||
"\uFFE0<>'\u00a2';" // from FULLWIDTH CENT SIGN
|
||||
"\uFFE1<>'\u00a3';" // from FULLWIDTH POUND SIGN
|
||||
"\uFFE2<>'\u00ac';" // from FULLWIDTH NOT SIGN
|
||||
"\uFFE3<>' '\u0304;" // from FULLWIDTH MACRON
|
||||
"\uFFE4<>'\u00a6';" // from FULLWIDTH BROKEN BAR
|
||||
"\uFFE5<>'\u00a5';" // from FULLWIDTH YEN SIGN
|
||||
"\uFFE6<>\u20A9;" // from FULLWIDTH WON SIGN
|
||||
"\u2502<>\uFFE8;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
"\u2190<>\uFFE9;" // to HALFWIDTH LEFTWARDS ARROW
|
||||
"\u2191<>\uFFEA;" // to HALFWIDTH UPWARDS ARROW
|
||||
"\u2192<>\uFFEB;" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
"\u2193<>\uFFEC;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
"\u25A0<>\uFFED;" // to HALFWIDTH BLACK SQUARE
|
||||
"\u25CB<>\uFFEE;" // to HALFWIDTH WHITE CIRCLE
|
||||
}
|
||||
}
|
||||
// This file is obsolete
|
||||
// See fullhalf
|
||||
|
274
icu4c/data/translit/ljamo.txt
Normal file
274
icu4c/data/translit/ljamo.txt
Normal file
@ -0,0 +1,274 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2000, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 01/13/2000 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
ljamo {
|
||||
Rule {
|
||||
// VARIABLES
|
||||
|
||||
"medial=[\u1160-\u11A7];"
|
||||
"final=[\u11A8-\u11F9];" // added - aliu
|
||||
"vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
"ye=[yeYE];"
|
||||
"ywe=[yweYWE];"
|
||||
"yw=[ywYW];"
|
||||
"nl=[nlNL];"
|
||||
"gnl=[gnlGNL];"
|
||||
"lsgb=[lsgbLSGB];"
|
||||
"ywao=[ywaoYWAO];"
|
||||
"bl=[blBL];"
|
||||
|
||||
// RULES
|
||||
|
||||
// Hangul structure is IMF or IM
|
||||
// So you can have, because of adjacent sequences
|
||||
// IM, but not II or IF
|
||||
// MF or MI, but not MM
|
||||
// FI, but not FF or FM
|
||||
|
||||
// For English, we just have C or V.
|
||||
// To generate valid Hangul:
|
||||
// Vowels:
|
||||
// We insert IEUNG between VV, and otherwise map V to M
|
||||
// We also insert IEUNG if there is no
|
||||
// Consonants:
|
||||
// We don't break doubles
|
||||
// Cases like lmgg, we have to break at lm
|
||||
// So to guess whether a consonant is I or F
|
||||
// we map all C's to F, except when followed by a vowel, e.g.
|
||||
// X[{vowel}>CHOSEONG (initial)
|
||||
// X>JONGSEONG (final)
|
||||
|
||||
// special insertion for funny sequences of vowels
|
||||
|
||||
"({medial}) ({vowel}) > \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
// Fix casing.
|
||||
// Because Korean is caseless, we just want to treat everything as
|
||||
// lowercase.
|
||||
// we could do this by always preceeding this transliterator with
|
||||
// an upper-lowercase transformation, but that wouldn't invert nicely.
|
||||
// We use the "revisit" syntax to just convert latin to latin
|
||||
// so that we can avoid
|
||||
// having to restate all the Latin=>Jamo rules, with the I/F handling.
|
||||
|
||||
// We don't have to add titlecase, since that will be picked up
|
||||
// since the first letter is converted, then revisited. E.g.
|
||||
// |Gg => |gg => {sang kiyeok}
|
||||
// We do have to have all caps, since otherwise we could get:
|
||||
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
|
||||
|
||||
"Z > |z;"
|
||||
"YU > |yu;"
|
||||
"YO > |yo;"
|
||||
"YI > |yi;"
|
||||
"YEO > |yeo;"
|
||||
"YE > |ye;"
|
||||
"YAE > |yae;"
|
||||
"YA > |ya;"
|
||||
"Y > |y;"
|
||||
"WI > |wi;"
|
||||
"WEO > |weo;"
|
||||
"WE > |we;"
|
||||
"WAE > |wae;"
|
||||
"WA > |wa;"
|
||||
"W > |w;"
|
||||
"U > |u;"
|
||||
"T > |t;"
|
||||
"SS > |ss;"
|
||||
"S > |s;"
|
||||
"P > |p;"
|
||||
"OE > |oe;"
|
||||
"O > |o;"
|
||||
"NJ > |nj;"
|
||||
"NH > |nh;"
|
||||
"NG > |ng;"
|
||||
"N > |n;"
|
||||
"M > |m;"
|
||||
"LT > |lt;"
|
||||
"LS > |ls;"
|
||||
"LP > |lp;"
|
||||
"LM > |lm;"
|
||||
"LH > |lh;"
|
||||
"LG > |lg;"
|
||||
"LB > |lb;"
|
||||
"L > |l;"
|
||||
"K > |k;"
|
||||
"JJ > |jj;"
|
||||
"J > |j;"
|
||||
"I > |i;"
|
||||
"H > |h;"
|
||||
"GS > |gs;"
|
||||
"GG > |gg;"
|
||||
"G > |g;"
|
||||
"EU > |eu;"
|
||||
"EO > |eo;"
|
||||
"E > |e;"
|
||||
"DD > |dd;"
|
||||
"D > |d;"
|
||||
"BS > |bs;"
|
||||
"BB > |bb;"
|
||||
"B > |b;"
|
||||
"AE > |ae;"
|
||||
"A > |a;"
|
||||
|
||||
// APOSTROPHE
|
||||
|
||||
// As always, an apostrophe is used to separate digraphs into
|
||||
// singles. That is, if you really wanted [KAN][GGAN], instead
|
||||
// of [KANG][GAN] you would write "kan'ggan".
|
||||
|
||||
// Rules for inserting ' when mapping separated digraphs back
|
||||
// from Hangul to Latin. Catch every letter that can be the
|
||||
// LAST of a digraph (or multigraph)
|
||||
|
||||
"''u < ({ye}) \u116e;" // hangul jungseong u
|
||||
"''t < (l) \u11c0;" // hangul jongseong thieuth
|
||||
"''t < (l) \u1110;" // hangul choseong thieuth
|
||||
"''s < ({lsgb}) \u11ba;" // hangul jongseong sios
|
||||
"''s < ({lsgb}) \u1109;" // hangul choseong sios
|
||||
"''p < (l) \u11c1;" // hangul jongseong phieuph
|
||||
"''p < (l) \u1111;" // hangul choseong phieuph
|
||||
"''o < ({ywe}) \u1169;" // hangul jungseong o
|
||||
"''m < (l) \u11b7;" // hangul jongseong mieum
|
||||
"''m < (l) \u1106;" // hangul choseong mieum
|
||||
"''j < (n) \u11bd;" // hangul jongseong cieuc
|
||||
"''j < (n) \u110c;" // hangul choseong cieuc
|
||||
"''i < ({yw}) \u1175;" // hangul jungseong i
|
||||
"''h < ({nl}) \u11c2;" // hangul jongseong hieuh
|
||||
"''h < ({nl}) \u1112;" // hangul choseong hieuh
|
||||
"''g < ({gnl}) \u11a9;" // hangul jongseong ssangkiyeok
|
||||
"''g < ({gnl}) \u1100;" // hangul choseong kiyeok
|
||||
"''e < ({ywao}) \u1166;" // hangul jungseong e
|
||||
"''d < (d) \u11ae;" // hangul jongseong tikeut
|
||||
"''d < (d) \u1103;" // hangul choseong tikeut
|
||||
"''b < ({bl}) \u11b8;" // hangul jongseong pieup
|
||||
"''b < ({bl}) \u1107;" // hangul choseong pieup
|
||||
"''a < ({yw}) \u1161;" // hangul jungseong a
|
||||
|
||||
// INITIALS
|
||||
|
||||
"t ({vowel}) <> \u1110;" // hangul choseong thieuth
|
||||
"ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
|
||||
"s ({vowel}) <> \u1109;" // hangul choseong sios
|
||||
"p ({vowel}) <> \u1111;" // hangul choseong phieuph
|
||||
"n ({vowel}) <> \u1102;" // hangul choseong nieun
|
||||
"m ({vowel}) <> \u1106;" // hangul choseong mieum
|
||||
"l ({vowel}) <> \u1105;" // hangul choseong rieul
|
||||
"k ({vowel}) <> \u110f;" // hangul choseong khieukh
|
||||
"j ({vowel}) <> \u110c;" // hangul choseong cieuc
|
||||
"h ({vowel}) <> \u1112;" // hangul choseong hieuh
|
||||
"gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
|
||||
"g ({vowel}) <> \u1100;" // hangul choseong kiyeok
|
||||
"d ({vowel}) <> \u1103;" // hangul choseong tikeut
|
||||
"c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
||||
"bb ({vowel}) <> \u1108;" // hangul choseong ssangpieup
|
||||
"b ({vowel}) <> \u1107;" // hangul choseong pieup
|
||||
|
||||
// If we have gotten through to these rules, and we start with
|
||||
// a consonant, then the remaining mappings would be to F,
|
||||
// because must have CC (or C<non-letter>), not CV.
|
||||
// If we have F before us, then
|
||||
// we would end up with FF, which is wrong. The simplest fix is
|
||||
// to still make it an initial, but also insert an "u",
|
||||
// so we end up with F, I, u, and then continue with the C
|
||||
|
||||
"({final}) t > \u1110\u116e;" // hangul choseong thieuth
|
||||
"({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
|
||||
"({final}) s > \u1109\u116e;" // hangul choseong sios
|
||||
"({final}) p > \u1111\u116e;" // hangul choseong phieuph
|
||||
"({final}) n > \u1102\u116e;" // hangul choseong nieun
|
||||
"({final}) m > \u1106\u116e;" // hangul choseong mieum
|
||||
"({final}) l > \u1105\u116e;" // hangul choseong rieul
|
||||
"({final}) k > \u110f\u116e;" // hangul choseong khieukh
|
||||
"({final}) j > \u110c\u116e;" // hangul choseong cieuc
|
||||
"({final}) h > \u1112\u116e;" // hangul choseong hieuh
|
||||
"({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
||||
"({final}) g > \u1100\u116e;" // hangul choseong kiyeok
|
||||
"({final}) d > \u1103\u116e;" // hangul choseong tikeut
|
||||
"({final}) c > \u110e\u116e;" // hangul choseong chieuch
|
||||
"({final}) bb > \u1108\u116e;" // hangul choseong ssangpieup
|
||||
"({final}) b > \u1107\u116e;" // hangul choseong pieup
|
||||
|
||||
// MEDIALS (vowels) and FINALS
|
||||
|
||||
"yu <> \u1172;" // hangul jungseong yu
|
||||
"yo <> \u116d;" // hangul jungseong yo
|
||||
"yi <> \u1174;" // hangul jungseong yi
|
||||
"yeo <> \u1167;" // hangul jungseong yeo
|
||||
"ye <> \u1168;" // hangul jungseong ye
|
||||
"yae <> \u1164;" // hangul jungseong yae
|
||||
"ya <> \u1163;" // hangul jungseong ya
|
||||
"wi <> \u1171;" // hangul jungseong wi
|
||||
"weo <> \u116f;" // hangul jungseong weo
|
||||
"we <> \u1170;" // hangul jungseong we
|
||||
"wae <> \u116b;" // hangul jungseong wae
|
||||
"wa <> \u116a;" // hangul jungseong wa
|
||||
"u <> \u116e;" // hangul jungseong u
|
||||
"t <> \u11c0;" // hangul jongseong thieuth
|
||||
"ss <> \u11bb;" // hangul jongseong ssangsios
|
||||
"s <> \u11ba;" // hangul jongseong sios
|
||||
"p <> \u11c1;" // hangul jongseong phieuph
|
||||
"oe <> \u116c;" // hangul jungseong oe
|
||||
"o <> \u1169;" // hangul jungseong o
|
||||
"nj <> \u11ac;" // hangul jongseong nieun-cieuc
|
||||
"nh <> \u11ad;" // hangul jongseong nieun-hieuh
|
||||
"ng <> \u11bc;" // hangul jongseong ieung
|
||||
"n <> \u11ab;" // hangul jongseong nieun
|
||||
"m <> \u11b7;" // hangul jongseong mieum
|
||||
"lt <> \u11b4;" // hangul jongseong rieul-thieuth
|
||||
"ls <> \u11b3;" // hangul jongseong rieul-sios
|
||||
"lp <> \u11b5;" // hangul jongseong rieul-phieuph
|
||||
"lm <> \u11b1;" // hangul jongseong rieul-mieum
|
||||
"lh <> \u11b6;" // hangul jongseong rieul-hieuh
|
||||
"lg <> \u11b0;" // hangul jongseong rieul-kiyeok
|
||||
"lb <> \u11b2;" // hangul jongseong rieul-pieup
|
||||
"l <> \u11af;" // hangul jongseong rieul
|
||||
"k <> \u11bf;" // hangul jongseong khieukh
|
||||
"jj <> \u110d;" // hangul choseong ssangcieuc
|
||||
"j <> \u11bd;" // hangul jongseong cieuc
|
||||
"i <> \u1175;" // hangul jungseong i
|
||||
"h <> \u11c2;" // hangul jongseong hieuh
|
||||
"gs <> \u11aa;" // hangul jongseong kiyeok-sios
|
||||
"gg <> \u11a9;" // hangul jongseong ssangkiyeok
|
||||
"g <> \u11a8;" // hangul jongseong kiyeok
|
||||
"eu <> \u1173;" // hangul jungseong eu
|
||||
"eo <> \u1165;" // hangul jungseong eo
|
||||
"e <> \u1166;" // hangul jungseong e
|
||||
"dd <> \u1104;" // hangul choseong ssangtikeut
|
||||
"d <> \u11ae;" // hangul jongseong tikeut
|
||||
"c <> \u11be;" // hangul jongseong chieuch
|
||||
"bs <> \u11b9;" // hangul jongseong pieup-sios
|
||||
"b <> \u11b8;" // hangul jongseong pieup
|
||||
"ae <> \u1162;" // hangul jungseong ae
|
||||
"a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// extra English letters
|
||||
// {moved to bottom - aliu}
|
||||
|
||||
"z > |s;"
|
||||
//{ "Z > |s;" } masked
|
||||
"x > |ks;"
|
||||
"X > |ks;"
|
||||
"v > |b;"
|
||||
"V > |b;"
|
||||
"r > |l;"
|
||||
"R > |l;"
|
||||
"q > |k;"
|
||||
"Q > |k;"
|
||||
"f > |p;"
|
||||
"F > |p;"
|
||||
//{ "c > |k;" } masked
|
||||
"C > |k;"
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
"''>;"
|
||||
}
|
||||
}
|
@ -1,307 +1,2 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
// 12/10/99 aliu Fix case handling.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
lrussian {
|
||||
Rule {
|
||||
//* /* This class is designed to be a general Latin-Cyrillic
|
||||
//* transliteration. The standard Russian transliterations
|
||||
//* are generally used for the letters from Russian,
|
||||
//* with additional Cyrillic characters given consistent
|
||||
//* mappings.
|
||||
//* */
|
||||
|
||||
"S-hacek=\u0160;"
|
||||
"s-hacek=\u0161;"
|
||||
|
||||
"YO=\u0401;"
|
||||
"J=\u0408;"
|
||||
"A=\u0410;"
|
||||
"B=\u0411;"
|
||||
"V=\u0412;"
|
||||
"G=\u0413;"
|
||||
"D=\u0414;"
|
||||
"YE=\u0415;"
|
||||
"ZH=\u0416;"
|
||||
"Z=\u0417;"
|
||||
"YI=\u0418;"
|
||||
"Y=\u0419;"
|
||||
"K=\u041A;"
|
||||
"L=\u041B;"
|
||||
"M=\u041C;"
|
||||
"N=\u041D;"
|
||||
"O=\u041E;"
|
||||
"P=\u041F;"
|
||||
"R=\u0420;"
|
||||
"S=\u0421;"
|
||||
"T=\u0422;"
|
||||
"U=\u0423;"
|
||||
"F=\u0424;"
|
||||
"KH=\u0425;"
|
||||
"TS=\u0426;"
|
||||
"CH=\u0427;"
|
||||
"SH=\u0428;"
|
||||
"SHCH=\u0429;"
|
||||
"HARD=\u042A;"
|
||||
"I=\u042B;"
|
||||
"SOFT=\u042C;"
|
||||
"E=\u042D;"
|
||||
"YU=\u042E;"
|
||||
"YA=\u042F;"
|
||||
|
||||
// Lowercase
|
||||
|
||||
"a=\u0430;"
|
||||
"b=\u0431;"
|
||||
"v=\u0432;"
|
||||
"g=\u0433;"
|
||||
"d=\u0434;"
|
||||
"ye=\u0435;"
|
||||
"zh=\u0436;"
|
||||
"z=\u0437;"
|
||||
"yi=\u0438;"
|
||||
"y=\u0439;"
|
||||
"k=\u043a;"
|
||||
"l=\u043b;"
|
||||
"m=\u043c;"
|
||||
"n=\u043d;"
|
||||
"o=\u043e;"
|
||||
"p=\u043f;"
|
||||
"r=\u0440;"
|
||||
"s=\u0441;"
|
||||
"t=\u0442;"
|
||||
"u=\u0443;"
|
||||
"f=\u0444;"
|
||||
"kh=\u0445;"
|
||||
"ts=\u0446;"
|
||||
"ch=\u0447;"
|
||||
"sh=\u0448;"
|
||||
"shch=\u0449;"
|
||||
"hard=\u044a;"
|
||||
"i=\u044b;"
|
||||
"soft=\u044c;"
|
||||
"e=\u044d;"
|
||||
"yu=\u044e;"
|
||||
"ya=\u044f;"
|
||||
|
||||
"yo=\u0451;"
|
||||
"j=\u0458;"
|
||||
|
||||
// variables
|
||||
// some are duplicated so lowercasing works
|
||||
|
||||
"csoft=[eiyEIY];"
|
||||
"CSOFT=[eiyEIY];"
|
||||
|
||||
"BECOMES_H=[{HARD}{hard}];"
|
||||
"becomes_h=[{HARD}{hard}];"
|
||||
|
||||
"BECOMES_S=[{S}{s}];"
|
||||
"becomes_s=[{S}{s}];"
|
||||
|
||||
"BECOMES_C=[{CH}{ch}];"
|
||||
"becomes_c=[{CH}{ch}];"
|
||||
|
||||
"BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
"becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
|
||||
"letter=[[:Lu:][:Ll:]];"
|
||||
"lower=[[:Ll:]];"
|
||||
|
||||
//* /*
|
||||
//* Modified to combine display transliterator and typing transliterator.
|
||||
//* The display mapping uses accents for the "soft" vowels.
|
||||
//* It does not, although it could, use characters like \u0161 instead of digraphs
|
||||
//* like sh.
|
||||
//* */
|
||||
|
||||
// #############################################
|
||||
// Special titlecase forms, not duplicated
|
||||
// #############################################
|
||||
|
||||
"Ch>{CH};" "Ch<{CH}({lower};"
|
||||
"Kh>{KH};" "Kh<{KH}({lower};"
|
||||
"Shch>{SHCH};" "Shch<{SHCH}({lower};"
|
||||
"Sh>{SH};" "Sh<{SH}({lower};"
|
||||
"Ts>{TS};" "Ts<{TS}({lower};"
|
||||
"Zh>{ZH};" "Zh<{ZH}({lower};"
|
||||
"Yi>{YI};" //+ "Yi<{YI}({lower};"
|
||||
"Ye>{YE};" //+ "Ye<{YE}({lower};"
|
||||
"Yo>{YO};" //+ "Yo<{YO}({lower};"
|
||||
"Yu>{YU};" //+ "Yu<{YU}({lower};"
|
||||
"Ya>{YA};" //+ "Ya<{YA}({lower};"
|
||||
|
||||
// #############################################
|
||||
// Rules to Duplicate
|
||||
// To get the lowercase versions, copy these and lowercase
|
||||
// #############################################
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"SHTCH>{SHCH};"
|
||||
"TCH>{CH};"
|
||||
"TH>{Z};"
|
||||
"Q>{K};"
|
||||
"WH>{V};"
|
||||
"W>{V};"
|
||||
"X>{K}{S};" //+ "X<{K}{S};"
|
||||
|
||||
// Separate letters that would otherwise join
|
||||
|
||||
"SH''<{SH}({BECOMES_C};"
|
||||
"T''<{T}({BECOMES_S};"
|
||||
|
||||
"K''<{K}({BECOMES_H};"
|
||||
"S''<{S}({BECOMES_H};"
|
||||
"T''<{T}({BECOMES_H};"
|
||||
"Z''<{Z}({BECOMES_H};"
|
||||
|
||||
"Y''<{Y}({BECOMES_VOWEL};"
|
||||
|
||||
// Main letters
|
||||
|
||||
"A<>{A};"
|
||||
"B<>{B};"
|
||||
"CH<>{CH};"
|
||||
"D<>{D};"
|
||||
"E<>{E};"
|
||||
"F<>{F};"
|
||||
"G<>{G};"
|
||||
"\u00cc<>{YI};"
|
||||
"I<>{I};"
|
||||
"KH<>{KH};"
|
||||
"K<>{K};"
|
||||
"L<>{L};"
|
||||
"M<>{M};"
|
||||
"N<>{N};"
|
||||
"O<>{O};"
|
||||
"P<>{P};"
|
||||
"R<>{R};"
|
||||
"SHCH<>{SHCH};"
|
||||
"SH>{SH};" //+ "SH<{SH};"
|
||||
"{S-hacek}<>{SH};"
|
||||
"S<>{S};"
|
||||
"TS<>{TS};"
|
||||
"T<>{T};"
|
||||
"U<>{U};"
|
||||
"V<>{V};"
|
||||
//\u00cc\u00c0\u00c8\u00d2\u00d9
|
||||
"YE>{YE};" //+ "YE<{YE};"
|
||||
"\u00c8<>{YE};"
|
||||
"YO>{YO};" //+ "YO<{YO};"
|
||||
"\u00d2<>{YO};"
|
||||
"YU>{YU};" //+ "YU<{YU};"
|
||||
"\u00d9<>{YU};"
|
||||
"YA>{YA};" //+ "YA<{YA};"
|
||||
"\u00c0<>{YA};"
|
||||
"Y<>{Y};"
|
||||
"ZH<>{ZH};"
|
||||
"Z<>{Z};"
|
||||
|
||||
"H<>{HARD};"
|
||||
"\u0178<>{SOFT};"
|
||||
|
||||
// Non-russian
|
||||
|
||||
"J<>{J};"
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"C({csoft}>{S};"
|
||||
"C>{K};"
|
||||
|
||||
// #############################################
|
||||
// Duplicated Rules
|
||||
// Copy and lowercase the above rules
|
||||
// #############################################
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"shtch>{shch};"
|
||||
"tch>{ch};"
|
||||
"th>{z};"
|
||||
"q>{k};"
|
||||
"wh>{v};"
|
||||
"w>{v};"
|
||||
"x>{k}{s};" //+ "x<{k}{s};"
|
||||
|
||||
// separate letters that would otherwise join
|
||||
|
||||
"sh''<{sh}({becomes_c};"
|
||||
"t''<{t}({becomes_s};"
|
||||
|
||||
"k''<{k}({becomes_h};"
|
||||
"s''<{s}({becomes_h};"
|
||||
"t''<{t}({becomes_h};"
|
||||
"z''<{z}({becomes_h};"
|
||||
|
||||
"y''<{y}({becomes_vowel};"
|
||||
|
||||
// main letters
|
||||
|
||||
"a<>{a};"
|
||||
"b<>{b};"
|
||||
"ch<>{ch};"
|
||||
"d<>{d};"
|
||||
"e<>{e};"
|
||||
"f<>{f};"
|
||||
"g<>{g};"
|
||||
"\u00ec<>{yi};"
|
||||
"i<>{i};"
|
||||
"kh<>{kh};"
|
||||
"k<>{k};"
|
||||
"l<>{l};"
|
||||
"m<>{m};"
|
||||
"n<>{n};"
|
||||
"o<>{o};"
|
||||
"p<>{p};"
|
||||
"r<>{r};"
|
||||
"shch<>{shch};"
|
||||
"sh>{sh};" //+ "sh<{sh};"
|
||||
"{s-hacek}<>{sh};"
|
||||
"s<>{s};"
|
||||
"ts<>{ts};"
|
||||
"t<>{t};"
|
||||
"u<>{u};"
|
||||
"v<>{v};"
|
||||
//\u00ec\u00e0\u00e8\u00f2\u00f9
|
||||
"ye>{ye};" //+ "ye<{ye};"
|
||||
"\u00e8<>{ye};"
|
||||
"yo>{yo};" //+ "yo<{yo};"
|
||||
"\u00f2<>{yo};"
|
||||
"yu>{yu};" //+ "yu<{yu};"
|
||||
"\u00f9<>{yu};"
|
||||
"ya>{ya};" //+ "ya<{ya};"
|
||||
"\u00e0<>{ya};"
|
||||
"y<>{y};"
|
||||
"zh<>{zh};"
|
||||
"z<>{z};"
|
||||
|
||||
"h<>{hard};"
|
||||
"\u00ff<>{soft};"
|
||||
|
||||
// non-russian
|
||||
|
||||
"j<>{j};"
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"c({csoft}>{s};"
|
||||
"c>{k};"
|
||||
|
||||
|
||||
|
||||
// #############################################
|
||||
// End of Duplicated Rules
|
||||
// #############################################
|
||||
|
||||
//generally the last rule
|
||||
"''>;"
|
||||
//the end
|
||||
}
|
||||
}
|
||||
// This file is obsolete
|
||||
// See lcyril
|
||||
|
@ -53,8 +53,8 @@ sl_SI.txt sq.txt sq_AL.txt sr.txt sr_YU.txt \
|
||||
sv.txt sv_SE.txt th.txt th_TH.txt tr.txt \
|
||||
tr_TR.txt uk.txt uk_UA.txt vi.txt vi_VN.txt \
|
||||
zh.txt zh_CN.txt zh_HK.txt zh_TW.txt \
|
||||
$(TRANS)expcon.txt $(TRANS)kbdescl1.txt $(TRANS)larabic.txt \
|
||||
$(TRANS)ldevan.txt $(TRANS)lgreek.txt $(TRANS)lhalfwid.txt \
|
||||
$(TRANS)lhebrew.txt $(TRANS)lkana.txt $(TRANS)lrussian.txt \
|
||||
$(TRANS)quotes.txt $(TRANS)ucname.txt \
|
||||
$(TEST)default.txt $(TEST)te.txt $(TEST)te_IN.txt
|
||||
$(TRANS)fullhalf.txt $(TRANS)index.txt $(TRANS)kbdescl1.txt \
|
||||
$(TRANS)larabic.txt $(TRANS)lcyril.txt $(TRANS)ldevan.txt \
|
||||
$(TRANS)lgreek.txt $(TRANS)lhebrew.txt $(TRANS)ljamo.txt \
|
||||
$(TRANS)lkana.txt $(TRANS)quotes.txt $(TRANS)ucname.txt \
|
||||
$(TEST)default.txt $(TEST)te.txt $(TEST)te_IN.txt
|
||||
|
Loading…
Reference in New Issue
Block a user