ICU-184 .res files go to mem map files - build issue
X-SVN-Rev: 1407
This commit is contained in:
parent
9e868b81f1
commit
032a18718e
@ -1 +0,0 @@
|
||||
// This file is obsolete
|
@ -1,270 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2000, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 01/13/2000 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
fullhalf {
|
||||
Rule {
|
||||
// Mechanically generated from Unicode Character Database
|
||||
|
||||
// multicharacter
|
||||
|
||||
"\u30AC<>\uFF76\uFF9E;" // to KATAKANA LETTER GA
|
||||
"\u30AE<>\uFF77\uFF9E;" // to KATAKANA LETTER GI
|
||||
"\u30B0<>\uFF78\uFF9E;" // to KATAKANA LETTER GU
|
||||
"\u30B2<>\uFF79\uFF9E;" // to KATAKANA LETTER GE
|
||||
"\u30B4<>\uFF7A\uFF9E;" // to KATAKANA LETTER GO
|
||||
"\u30B6<>\uFF7B\uFF9E;" // to KATAKANA LETTER ZA
|
||||
"\u30B8<>\uFF7C\uFF9E;" // to KATAKANA LETTER ZI
|
||||
"\u30BA<>\uFF7D\uFF9E;" // to KATAKANA LETTER ZU
|
||||
"\u30BC<>\uFF7E\uFF9E;" // to KATAKANA LETTER ZE
|
||||
"\u30BE<>\uFF7F\uFF9E;" // to KATAKANA LETTER ZO
|
||||
"\u30C0<>\uFF80\uFF9E;" // to KATAKANA LETTER DA
|
||||
"\u30C2<>\uFF81\uFF9E;" // to KATAKANA LETTER DI
|
||||
"\u30C5<>\uFF82\uFF9E;" // to KATAKANA LETTER DU
|
||||
"\u30C7<>\uFF83\uFF9E;" // to KATAKANA LETTER DE
|
||||
"\u30C9<>\uFF84\uFF9E;" // to KATAKANA LETTER DO
|
||||
"\u30D0<>\uFF8A\uFF9E;" // to KATAKANA LETTER BA
|
||||
"\u30D1<>\uFF8A\uFF9F;" // to KATAKANA LETTER PA
|
||||
"\u30D3<>\uFF8B\uFF9E;" // to KATAKANA LETTER BI
|
||||
"\u30D4<>\uFF8B\uFF9F;" // to KATAKANA LETTER PI
|
||||
"\u30D6<>\uFF8C\uFF9E;" // to KATAKANA LETTER BU
|
||||
"\u30D7<>\uFF8C\uFF9F;" // to KATAKANA LETTER PU
|
||||
"\u30D9<>\uFF8D\uFF9E;" // to KATAKANA LETTER BE
|
||||
"\u30DA<>\uFF8D\uFF9F;" // to KATAKANA LETTER PE
|
||||
"\u30DC<>\uFF8E\uFF9E;" // to KATAKANA LETTER BO
|
||||
"\u30DD<>\uFF8E\uFF9F;" // to KATAKANA LETTER PO
|
||||
"\u30F4<>\uFF73\uFF9E;" // to KATAKANA LETTER VU
|
||||
"\u30F7<>\uFF9C\uFF9E;" // to KATAKANA LETTER VA
|
||||
"\u30FA<>\uFF66\uFF9E;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
"\uFF01<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
"\uFF02<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"\uFF03<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"\uFF04<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"\uFF05<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
"\uFF06<>'&';" // from FULLWIDTH AMPERSAND
|
||||
"\uFF07<>'';" // from FULLWIDTH APOSTROPHE
|
||||
"\uFF08<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
"\uFF09<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
"\uFF0A<>'*';" // from FULLWIDTH ASTERISK
|
||||
"\uFF0B<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
"\uFF0C<>',';" // from FULLWIDTH COMMA
|
||||
"\uFF0D<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
"\uFF0E<>'.';" // from FULLWIDTH FULL STOP
|
||||
"\uFF0F<>'/';" // from FULLWIDTH SOLIDUS
|
||||
"\uFF10<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
"\uFF11<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
"\uFF12<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
"\uFF13<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
"\uFF14<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
"\uFF15<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
"\uFF16<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
"\uFF17<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
"\uFF18<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
"\uFF19<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
"\uFF1A<>':';" // from FULLWIDTH COLON
|
||||
"\uFF1B<>';';" // from FULLWIDTH SEMICOLON
|
||||
"\uFF1C<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
"\uFF1D<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
"\uFF1E<>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
"\uFF1F<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
"\uFF20<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
"\uFF21<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
"\uFF22<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
"\uFF23<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
"\uFF24<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
"\uFF25<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
"\uFF26<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
"\uFF27<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
"\uFF28<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
"\uFF29<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
"\uFF2A<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
"\uFF2B<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
"\uFF2C<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
"\uFF2D<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
"\uFF2E<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
"\uFF2F<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
"\uFF30<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
"\uFF31<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
"\uFF32<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
"\uFF33<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
"\uFF34<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
"\uFF35<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
"\uFF36<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
"\uFF37<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
"\uFF38<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
"\uFF39<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
"\uFF3A<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
"\uFF3B<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
"\uFF3C<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
"\uFF3D<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
"\uFF3E<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
"\uFF3F<>'_';" // from FULLWIDTH LOW LINE
|
||||
"\uFF40<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
"\uFF41<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
"\uFF42<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
"\uFF43<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
"\uFF44<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
"\uFF45<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
"\uFF46<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
"\uFF47<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
"\uFF48<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
"\uFF49<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
"\uFF4A<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
"\uFF4B<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
"\uFF4C<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
"\uFF4D<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
"\uFF4E<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
"\uFF4F<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
"\uFF50<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
"\uFF51<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
"\uFF52<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
"\uFF53<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
"\uFF54<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
"\uFF55<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
"\uFF56<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
"\uFF57<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
"\uFF58<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
"\uFF59<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
"\uFF5A<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
"\uFF5B<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
"\uFF5C<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
"\uFF5D<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
"\uFF5E<>'~';" // from FULLWIDTH TILDE
|
||||
"\u3002<>\uFF61;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
"\u300C<>\uFF62;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
"\u300D<>\uFF63;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
"\u3001<>\uFF64;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
"\u30FB<>\uFF65;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
"\u30F2<>\uFF66;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
"\u30A1<>\uFF67;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
"\u30A3<>\uFF68;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
"\u30A5<>\uFF69;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
"\u30A7<>\uFF6A;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
"\u30A9<>\uFF6B;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
"\u30E3<>\uFF6C;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
"\u30E5<>\uFF6D;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
"\u30E7<>\uFF6E;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
"\u30C3<>\uFF6F;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
"\u30FC<>\uFF70;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
"\u30A2<>\uFF71;" // to HALFWIDTH KATAKANA LETTER A
|
||||
"\u30A4<>\uFF72;" // to HALFWIDTH KATAKANA LETTER I
|
||||
"\u30A6<>\uFF73;" // to HALFWIDTH KATAKANA LETTER U
|
||||
"\u30A8<>\uFF74;" // to HALFWIDTH KATAKANA LETTER E
|
||||
"\u30AA<>\uFF75;" // to HALFWIDTH KATAKANA LETTER O
|
||||
"\u30AB<>\uFF76;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
"\u30AD<>\uFF77;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
"\u30AF<>\uFF78;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
"\u30B1<>\uFF79;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
"\u30B3<>\uFF7A;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
"\u30B5<>\uFF7B;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
"\u30B7<>\uFF7C;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
"\u30B9<>\uFF7D;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
"\u30BB<>\uFF7E;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
"\u30BD<>\uFF7F;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
"\u30BF<>\uFF80;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
"\u30C1<>\uFF81;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
"\u30C4<>\uFF82;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
"\u30C6<>\uFF83;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
"\u30C8<>\uFF84;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
"\u30CA<>\uFF85;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
"\u30CB<>\uFF86;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
"\u30CC<>\uFF87;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
"\u30CD<>\uFF88;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
"\u30CE<>\uFF89;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
"\u30CF<>\uFF8A;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
"\u30D2<>\uFF8B;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
"\u30D5<>\uFF8C;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
"\u30D8<>\uFF8D;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
"\u30DB<>\uFF8E;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
"\u30DE<>\uFF8F;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
"\u30DF<>\uFF90;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
"\u30E0<>\uFF91;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
"\u30E1<>\uFF92;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
"\u30E2<>\uFF93;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
"\u30E4<>\uFF94;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
"\u30E6<>\uFF95;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
"\u30E8<>\uFF96;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
"\u30E9<>\uFF97;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
"\u30EA<>\uFF98;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
"\u30EB<>\uFF99;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
"\u30EC<>\uFF9A;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
"\u30ED<>\uFF9B;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
"\u30EF<>\uFF9C;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
"\u30F3<>\uFF9D;" // to HALFWIDTH KATAKANA LETTER N
|
||||
"\u3099<>\uFF9E;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
"\u309A<>\uFF9F;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
"\u1160<>\uFFA0;" // to HALFWIDTH HANGUL FILLER
|
||||
"\u1100<>\uFFA1;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
"\u1101<>\uFFA2;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
"\u11AA<>\uFFA3;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
"\u1102<>\uFFA4;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
"\u11AC<>\uFFA5;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
"\u11AD<>\uFFA6;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
"\u1103<>\uFFA7;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
"\u1104<>\uFFA8;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
"\u1105<>\uFFA9;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
"\u11B0<>\uFFAA;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
"\u11B1<>\uFFAB;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
"\u11B2<>\uFFAC;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
"\u11B3<>\uFFAD;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
"\u11B4<>\uFFAE;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
"\u11B5<>\uFFAF;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
"\u111A<>\uFFB0;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
"\u1106<>\uFFB1;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
"\u1107<>\uFFB2;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
"\u1108<>\uFFB3;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
"\u1121<>\uFFB4;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
"\u1109<>\uFFB5;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
"\u110A<>\uFFB6;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
"\u110B<>\uFFB7;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
"\u110C<>\uFFB8;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
"\u110D<>\uFFB9;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
"\u110E<>\uFFBA;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
"\u110F<>\uFFBB;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
"\u1110<>\uFFBC;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
"\u1111<>\uFFBD;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
"\u1112<>\uFFBE;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
"\u1161<>\uFFC2;" // to HALFWIDTH HANGUL LETTER A
|
||||
"\u1162<>\uFFC3;" // to HALFWIDTH HANGUL LETTER AE
|
||||
"\u1163<>\uFFC4;" // to HALFWIDTH HANGUL LETTER YA
|
||||
"\u1164<>\uFFC5;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
"\u1165<>\uFFC6;" // to HALFWIDTH HANGUL LETTER EO
|
||||
"\u1166<>\uFFC7;" // to HALFWIDTH HANGUL LETTER E
|
||||
"\u1167<>\uFFCA;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
"\u1168<>\uFFCB;" // to HALFWIDTH HANGUL LETTER YE
|
||||
"\u1169<>\uFFCC;" // to HALFWIDTH HANGUL LETTER O
|
||||
"\u116A<>\uFFCD;" // to HALFWIDTH HANGUL LETTER WA
|
||||
"\u116B<>\uFFCE;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
"\u116C<>\uFFCF;" // to HALFWIDTH HANGUL LETTER OE
|
||||
"\u116D<>\uFFD2;" // to HALFWIDTH HANGUL LETTER YO
|
||||
"\u116E<>\uFFD3;" // to HALFWIDTH HANGUL LETTER U
|
||||
"\u116F<>\uFFD4;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
"\u1170<>\uFFD5;" // to HALFWIDTH HANGUL LETTER WE
|
||||
"\u1171<>\uFFD6;" // to HALFWIDTH HANGUL LETTER WI
|
||||
"\u1172<>\uFFD7;" // to HALFWIDTH HANGUL LETTER YU
|
||||
"\u1173<>\uFFDA;" // to HALFWIDTH HANGUL LETTER EU
|
||||
"\u1174<>\uFFDB;" // to HALFWIDTH HANGUL LETTER YI
|
||||
"\u1175<>\uFFDC;" // to HALFWIDTH HANGUL LETTER I
|
||||
"\uFFE0<>'\u00a2';" // from FULLWIDTH CENT SIGN
|
||||
"\uFFE1<>'\u00a3';" // from FULLWIDTH POUND SIGN
|
||||
"\uFFE2<>'\u00ac';" // from FULLWIDTH NOT SIGN
|
||||
"\uFFE3<>' '\u0304;" // from FULLWIDTH MACRON
|
||||
"\uFFE4<>'\u00a6';" // from FULLWIDTH BROKEN BAR
|
||||
"\uFFE5<>'\u00a5';" // from FULLWIDTH YEN SIGN
|
||||
"\uFFE6<>\u20A9;" // from FULLWIDTH WON SIGN
|
||||
"\u2502<>\uFFE8;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
"\u2190<>\uFFE9;" // to HALFWIDTH LEFTWARDS ARROW
|
||||
"\u2191<>\uFFEA;" // to HALFWIDTH UPWARDS ARROW
|
||||
"\u2192<>\uFFEB;" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
"\u2193<>\uFFEC;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
"\u25A0<>\uFFED;" // to HALFWIDTH BLACK SQUARE
|
||||
"\u25CB<>\uFFEE;" // to HALFWIDTH WHITE CIRCLE
|
||||
}
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2000, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 01/13/2000 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
index {
|
||||
// Map transliterator names to resource file names. This
|
||||
// mapping is required because transliterator names typically
|
||||
// exceed the limits of 8.3 file names.
|
||||
|
||||
RuleBasedTransliteratorIDs {
|
||||
// This is an n x 3 array of strings that serves as an index
|
||||
// to the system rule-based transliterator resource bundle
|
||||
// data files. If RBT files are installed or removed from the
|
||||
// system this table must be updated. For each of the n rows,
|
||||
// the first item is the ID of the forward transliterator for
|
||||
// the rule. The second item is the ID of the reverse
|
||||
// transliterator for the rule. Some rule files only contain
|
||||
// forward direction rules; for those, the second item is a
|
||||
// zero-length string. The third item is the name of the
|
||||
// resource bundle file, a string of 8 or fewer lowercase
|
||||
// letters or digits. This file will be sought in the data
|
||||
// directory within the subdirectory "translit". IDs must
|
||||
// have the form "From-To" or "SingleName" to work properly
|
||||
// with the Transliterator code.
|
||||
|
||||
// Basic language rules
|
||||
{ "Fullwidth-Halfwidth", "Halfwidth-Fullwidth", "fullhalf" }
|
||||
{ "Latin-Arabic", "Arabic-Latin", "larabic" }
|
||||
{ "Latin-Cyrillic", "Cyrillic-Latin", "lcyril" }
|
||||
{ "Latin-Devanagari", "Devanagari-Latin", "ldevan" }
|
||||
{ "Latin-Greek", "Greek-Latin", "lgreek" }
|
||||
{ "Latin-Hebrew", "Hebrew-Latin", "lhebrew" }
|
||||
{ "Latin-Jamo", "Jamo-Latin", "ljamo" }
|
||||
{ "Latin-Kana", "Kana-Latin", "lkana" }
|
||||
|
||||
// Other miscellaneous rules
|
||||
{ "StraightQuotes-CurlyQuotes", "CurlyQuotes-StraightQuotes",
|
||||
"quotes" }
|
||||
{ "KeyboardEscape-Latin1", "", "kbdescl1" }
|
||||
{ "UnicodeName-UnicodeChar", "", "ucname" }
|
||||
}
|
||||
}
|
@ -1,128 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// KeyboardEscape-Latin1
|
||||
|
||||
kbdescl1 {
|
||||
Rule {
|
||||
"esc='';"
|
||||
"grave=`;"
|
||||
"acute='';"
|
||||
"hat=^;"
|
||||
"tilde=~;"
|
||||
"umlaut=:;"
|
||||
"ring=.;"
|
||||
"cedilla=,;"
|
||||
"slash=/;"
|
||||
"super=^;"
|
||||
|
||||
// Make keyboard entry of {esc} possible
|
||||
// and of backslash
|
||||
"'\\'{esc}>{esc};"
|
||||
"'\\\\'>'\\';"
|
||||
|
||||
// Long keys
|
||||
"cur{esc}>\u00A4;"
|
||||
"sec{esc}>\u00A7;"
|
||||
"not{esc}>\u00AC;"
|
||||
"mul{esc}>\u00D7;"
|
||||
"div{esc}>\u00F7;"
|
||||
|
||||
"\\ {esc}>\u00A0;" // non-breaking space
|
||||
"!{esc}>\u00A1;" // inverted exclamation
|
||||
"c/{esc}>\u00A2;" // cent sign
|
||||
"lb{esc}>\u00A3;" // pound sign
|
||||
"'|'{esc}>\u00A6;" // broken vertical bar
|
||||
":{esc}>\u00A8;" // umlaut
|
||||
"{super}a{esc}>\u00AA;" // feminine ordinal
|
||||
"'<<'{esc}>\u00AB;"
|
||||
"r{esc}>\u00AE;"
|
||||
"--{esc}>\u00AF;"
|
||||
"-{esc}>\u00AD;"
|
||||
"+-{esc}>\u00B1;"
|
||||
"{super}2{esc}>\u00B2;"
|
||||
"{super}3{esc}>\u00B3;"
|
||||
"{acute}{esc}>\u00B4;"
|
||||
"m{esc}>\u00B5;"
|
||||
"para{esc}>\u00B6;"
|
||||
"dot{esc}>\u00B7;"
|
||||
"{cedilla}{esc}>\u00B8;"
|
||||
"{super}1{esc}>\u00B9;"
|
||||
"{super}o{esc}>\u00BA;" // masculine ordinal
|
||||
"'>>'{esc}>\u00BB;"
|
||||
"1/4{esc}>\u00BC;"
|
||||
"1/2{esc}>\u00BD;"
|
||||
"3/4{esc}>\u00BE;"
|
||||
"?{esc}>\u00BF;"
|
||||
"A{grave}{esc}>\u00C0;"
|
||||
"A{acute}{esc}>\u00C1;"
|
||||
"A{hat}{esc}>\u00C2;"
|
||||
"A{tilde}{esc}>\u00C3;"
|
||||
"A{umlaut}{esc}>\u00C4;"
|
||||
"A{ring}{esc}>\u00C5;"
|
||||
"AE{esc}>\u00C6;"
|
||||
"C{cedilla}{esc}>\u00C7;"
|
||||
"E{grave}{esc}>\u00C8;"
|
||||
"E{acute}{esc}>\u00C9;"
|
||||
"E{hat}{esc}>\u00CA;"
|
||||
"E{umlaut}{esc}>\u00CB;"
|
||||
"I{grave}{esc}>\u00CC;"
|
||||
"I{acute}{esc}>\u00CD;"
|
||||
"I{hat}{esc}>\u00CE;"
|
||||
"I{umlaut}{esc}>\u00CF;"
|
||||
"D-{esc}>\u00D0;"
|
||||
"N{tilde}{esc}>\u00D1;"
|
||||
"O{grave}{esc}>\u00D2;"
|
||||
"O{acute}{esc}>\u00D3;"
|
||||
"O{hat}{esc}>\u00D4;"
|
||||
"O{tilde}{esc}>\u00D5;"
|
||||
"O{umlaut}{esc}>\u00D6;"
|
||||
"O{slash}{esc}>\u00D8;"
|
||||
"U{grave}{esc}>\u00D9;"
|
||||
"U{acute}{esc}>\u00DA;"
|
||||
"U{hat}{esc}>\u00DB;"
|
||||
"U{umlaut}{esc}>\u00DC;"
|
||||
"Y{acute}{esc}>\u00DD;"
|
||||
"TH{esc}>\u00DE;"
|
||||
"ss{esc}>\u00DF;"
|
||||
"a{grave}{esc}>\u00E0;"
|
||||
"a{acute}{esc}>\u00E1;"
|
||||
"a{hat}{esc}>\u00E2;"
|
||||
"a{tilde}{esc}>\u00E3;"
|
||||
"a{umlaut}{esc}>\u00E4;"
|
||||
"a{ring}{esc}>\u00E5;"
|
||||
"ae{esc}>\u00E6;"
|
||||
"c{cedilla}{esc}>\u00E7;"
|
||||
"c{esc}>\u00A9;" // copyright - after c{cedilla}
|
||||
"e{grave}{esc}>\u00E8;"
|
||||
"e{acute}{esc}>\u00E9;"
|
||||
"e{hat}{esc}>\u00EA;"
|
||||
"e{umlaut}{esc}>\u00EB;"
|
||||
"i{grave}{esc}>\u00EC;"
|
||||
"i{acute}{esc}>\u00ED;"
|
||||
"i{hat}{esc}>\u00EE;"
|
||||
"i{umlaut}{esc}>\u00EF;"
|
||||
"d-{esc}>\u00F0;"
|
||||
"n{tilde}{esc}>\u00F1;"
|
||||
"o{grave}{esc}>\u00F2;"
|
||||
"o{acute}{esc}>\u00F3;"
|
||||
"o{hat}{esc}>\u00F4;"
|
||||
"o{tilde}{esc}>\u00F5;"
|
||||
"o{umlaut}{esc}>\u00F6;"
|
||||
"o{slash}{esc}>\u00F8;"
|
||||
"o{esc}>\u00B0;"
|
||||
"u{grave}{esc}>\u00F9;"
|
||||
"u{acute}{esc}>\u00FA;"
|
||||
"u{hat}{esc}>\u00FB;"
|
||||
"u{umlaut}{esc}>\u00FC;"
|
||||
"y{acute}{esc}>\u00FD;"
|
||||
"y{esc}>\u00A5;" // yen sign
|
||||
"th{esc}>\u00FE;"
|
||||
//masked: "ss{esc}>\u00FF;"
|
||||
}
|
||||
}
|
@ -1,240 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Arabic
|
||||
|
||||
larabic {
|
||||
Rule {
|
||||
// To Do: finish adding shadda, add sokoon
|
||||
|
||||
"alefmadda=\u0622;"
|
||||
"alefuhamza=\u0623;"
|
||||
"wauuhamza=\u0624;"
|
||||
"alefhamza=\u0625;"
|
||||
"yehuhamza=\u0626;"
|
||||
"alef=\u0627;"
|
||||
"beh=\u0628;"
|
||||
"tehmarbuta=\u0629;"
|
||||
"teh=\u062A;"
|
||||
"theh=\u062B;"
|
||||
"geem=\u062C;"
|
||||
"hah=\u062D;"
|
||||
"kha=\u062E;"
|
||||
"dal=\u062F;"
|
||||
"dhal=\u0630;"
|
||||
"reh=\u0631;"
|
||||
"zain=\u0632;"
|
||||
"seen=\u0633;"
|
||||
"sheen=\u0634;"
|
||||
"sad=\u0635;"
|
||||
"dad=\u0636;"
|
||||
"tah=\u0637;"
|
||||
"zah=\u0638;"
|
||||
"ein=\u0639;"
|
||||
"ghein=\u063A;"
|
||||
"feh=\u0641;"
|
||||
"qaaf=\u0642;"
|
||||
"kaf=\u0643;"
|
||||
"lam=\u0644;"
|
||||
"meem=\u0645;"
|
||||
"noon=\u0646;"
|
||||
"heh=\u0647;"
|
||||
"wau=\u0648;"
|
||||
"yehmaqsura=\u0649;"
|
||||
"yeh=\u064A;"
|
||||
"peh=\u06A4;"
|
||||
|
||||
"hamza=\u0621;"
|
||||
"fathatein=\u064B;"
|
||||
"dammatein=\u064C;"
|
||||
"kasratein=\u064D;"
|
||||
"fatha=\u064E;"
|
||||
"damma=\u064F;"
|
||||
"kasra=\u0650;"
|
||||
"shadda=\u0651;"
|
||||
"sokoon=\u0652;"
|
||||
|
||||
// convert English to Arabic
|
||||
"Arabic>"
|
||||
"\u062a\u062a\u0645\u062a\u0639' '"
|
||||
"\u0627\u0644\u0644\u063a\u0629' '"
|
||||
"\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629' '"
|
||||
"\u0628\u0628\u0646\u0638\u0645' '"
|
||||
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"
|
||||
"\u062c\u0645\u064a\u0644\u0629;"
|
||||
|
||||
"ai>{alefmadda};"
|
||||
"ae>{alefuhamza};"
|
||||
"ao>{alefhamza};"
|
||||
"aa>{alef};"
|
||||
"an>{fathatein};"
|
||||
"a>{fatha};"
|
||||
"b>{beh};"
|
||||
"c>{kaf};"
|
||||
"{dhal})dh>{shadda};"
|
||||
"dh>{dhal};"
|
||||
"{dad})dd>{shadda};"
|
||||
"dd>{dad};"
|
||||
"{dal})d>{shadda};"
|
||||
"d>{dal};"
|
||||
"e>{ein};"
|
||||
"f>{feh};"
|
||||
"gh>{ghein};"
|
||||
"g>{geem};"
|
||||
"hh>{hah};"
|
||||
"h>{heh};"
|
||||
"ii>{kasratein};"
|
||||
"i>{kasra};"
|
||||
"j>{geem};"
|
||||
"kh>{kha};"
|
||||
"k>{kaf};"
|
||||
"l>{lam};"
|
||||
"m>{meem};"
|
||||
"n>{noon};"
|
||||
"o>{hamza};"
|
||||
"p>{peh};"
|
||||
"q>{qaaf};"
|
||||
"r>{reh};"
|
||||
"sh>{sheen};"
|
||||
"ss>{sad};"
|
||||
"s>{seen};"
|
||||
"th>{theh};"
|
||||
"tm>{tehmarbuta};"
|
||||
"tt>{tah};"
|
||||
"t>{teh};"
|
||||
"uu>{dammatein};"
|
||||
"u>{damma};"
|
||||
"v>{beh};"
|
||||
"we>{wauuhamza};"
|
||||
"w>{wau};"
|
||||
"x>{kaf}{shadda}{seen};"
|
||||
"ye>{yehuhamza};"
|
||||
"ym>{yehmaqsura};"
|
||||
"y>{yeh};"
|
||||
"zz>{zah};"
|
||||
"z>{zain};"
|
||||
|
||||
"0>\u0660;"+ // Arabic digit 0
|
||||
"1>\u0661;"+ // Arabic digit 1
|
||||
"2>\u0662;"+ // Arabic digit 2
|
||||
"3>\u0663;"+ // Arabic digit 3
|
||||
"4>\u0664;"+ // Arabic digit 4
|
||||
"5>\u0665;"+ // Arabic digit 5
|
||||
"6>\u0666;"+ // Arabic digit 6
|
||||
"7>\u0667;"+ // Arabic digit 7
|
||||
"8>\u0668;"+ // Arabic digit 8
|
||||
"9>\u0669;"+ // Arabic digit 9
|
||||
"%>\u066A;"+ // Arabic %
|
||||
".>\u066B;"+ // Arabic decimal separator
|
||||
",>\u066C;"+ // Arabic thousands separator
|
||||
"*>\u066D;"+ // Arabic five-pointed star
|
||||
|
||||
"`0>0;"+ // Escaped forms of the above
|
||||
"`1>1;"
|
||||
"`2>2;"
|
||||
"`3>3;"
|
||||
"`4>4;"
|
||||
"`5>5;"
|
||||
"`6>6;"
|
||||
"`7>7;"
|
||||
"`8>8;"
|
||||
"`9>9;"
|
||||
"`%>%;"
|
||||
"`.>.;"
|
||||
"`,>,;"
|
||||
"`*>*;"
|
||||
"``>`;"
|
||||
|
||||
"''>;"
|
||||
|
||||
// now Arabic to English
|
||||
|
||||
"''ai<a){alefmadda};"
|
||||
"ai<{alefmadda};"
|
||||
"''ae<a){alefuhamza};"
|
||||
"ae<{alefuhamza};"
|
||||
"''ao<a){alefhamza};"
|
||||
"ao<{alefhamza};"
|
||||
"''aa<a){alef};"
|
||||
"aa<{alef};"
|
||||
"''an<a){fathatein};"
|
||||
"an<{fathatein};"
|
||||
"''a<a){fatha};"
|
||||
"a<{fatha};"
|
||||
"b<{beh};"
|
||||
"''dh<d){dhal};"
|
||||
"dh<{dhal};"
|
||||
"''dd<d){dad};"
|
||||
"dd<{dad};"
|
||||
"''d<d){dal};"
|
||||
"d<{dal};"
|
||||
"''e<a){ein};"
|
||||
"''e<w){ein};"
|
||||
"''e<y){ein};"
|
||||
"e<{ein};"
|
||||
"f<{feh};"
|
||||
"gh<{ghein};"
|
||||
"''hh<d){hah};"
|
||||
"''hh<t){hah};"
|
||||
"''hh<k){hah};"
|
||||
"''hh<s){hah};"
|
||||
"hh<{hah};"
|
||||
"''h<d){heh};"
|
||||
"''h<t){heh};"
|
||||
"''h<k){heh};"
|
||||
"''h<s){heh};"
|
||||
"h<{heh};"
|
||||
"''ii<i){kasratein};"
|
||||
"ii<{kasratein};"
|
||||
"''i<i){kasra};"
|
||||
"i<{kasra};"
|
||||
"j<{geem};"
|
||||
"kh<{kha};"
|
||||
"x<{kaf}{shadda}{seen};"
|
||||
"k<{kaf};"
|
||||
"l<{lam};"
|
||||
"''m<y){meem};"
|
||||
"''m<t){meem};"
|
||||
"m<{meem};"
|
||||
"n<{noon};"
|
||||
"''o<a){hamza};"
|
||||
"o<{hamza};"
|
||||
"p<{peh};"
|
||||
"q<{qaaf};"
|
||||
"r<{reh};"
|
||||
"sh<{sheen};"
|
||||
"''ss<s){sad};"
|
||||
"ss<{sad};"
|
||||
"''s<s){seen};"
|
||||
"s<{seen};"
|
||||
"th<{theh};"
|
||||
"tm<{tehmarbuta};"
|
||||
"''tt<t){tah};"
|
||||
"tt<{tah};"
|
||||
"''t<t){teh};"
|
||||
"t<{teh};"
|
||||
"''uu<u){dammatein};"
|
||||
"uu<{dammatein};"
|
||||
"''u<u){damma};"
|
||||
"u<{damma};"
|
||||
"we<{wauuhamza};"
|
||||
"w<{wau};"
|
||||
"ye<{yehuhamza};"
|
||||
"ym<{yehmaqsura};"
|
||||
"''y<y){yeh};"
|
||||
"y<{yeh};"
|
||||
"''zz<z){zah};"
|
||||
"zz<{zah};"
|
||||
"''z<z){zain};"
|
||||
"z<{zain};"
|
||||
|
||||
"dh<dh){shadda};"
|
||||
"dd<dd){shadda};"
|
||||
"''d<d){shadda};"
|
||||
}
|
||||
}
|
@ -1,307 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
// 12/10/99 aliu Fix case handling.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
lcyril {
|
||||
Rule {
|
||||
//* /* This class is designed to be a general Latin-Cyrillic
|
||||
//* transliteration. The standard Russian transliterations
|
||||
//* are generally used for the letters from Russian,
|
||||
//* with additional Cyrillic characters given consistent
|
||||
//* mappings.
|
||||
//* */
|
||||
|
||||
"S-hacek=\u0160;"
|
||||
"s-hacek=\u0161;"
|
||||
|
||||
"YO=\u0401;"
|
||||
"J=\u0408;"
|
||||
"A=\u0410;"
|
||||
"B=\u0411;"
|
||||
"V=\u0412;"
|
||||
"G=\u0413;"
|
||||
"D=\u0414;"
|
||||
"YE=\u0415;"
|
||||
"ZH=\u0416;"
|
||||
"Z=\u0417;"
|
||||
"YI=\u0418;"
|
||||
"Y=\u0419;"
|
||||
"K=\u041A;"
|
||||
"L=\u041B;"
|
||||
"M=\u041C;"
|
||||
"N=\u041D;"
|
||||
"O=\u041E;"
|
||||
"P=\u041F;"
|
||||
"R=\u0420;"
|
||||
"S=\u0421;"
|
||||
"T=\u0422;"
|
||||
"U=\u0423;"
|
||||
"F=\u0424;"
|
||||
"KH=\u0425;"
|
||||
"TS=\u0426;"
|
||||
"CH=\u0427;"
|
||||
"SH=\u0428;"
|
||||
"SHCH=\u0429;"
|
||||
"HARD=\u042A;"
|
||||
"I=\u042B;"
|
||||
"SOFT=\u042C;"
|
||||
"E=\u042D;"
|
||||
"YU=\u042E;"
|
||||
"YA=\u042F;"
|
||||
|
||||
// Lowercase
|
||||
|
||||
"a=\u0430;"
|
||||
"b=\u0431;"
|
||||
"v=\u0432;"
|
||||
"g=\u0433;"
|
||||
"d=\u0434;"
|
||||
"ye=\u0435;"
|
||||
"zh=\u0436;"
|
||||
"z=\u0437;"
|
||||
"yi=\u0438;"
|
||||
"y=\u0439;"
|
||||
"k=\u043a;"
|
||||
"l=\u043b;"
|
||||
"m=\u043c;"
|
||||
"n=\u043d;"
|
||||
"o=\u043e;"
|
||||
"p=\u043f;"
|
||||
"r=\u0440;"
|
||||
"s=\u0441;"
|
||||
"t=\u0442;"
|
||||
"u=\u0443;"
|
||||
"f=\u0444;"
|
||||
"kh=\u0445;"
|
||||
"ts=\u0446;"
|
||||
"ch=\u0447;"
|
||||
"sh=\u0448;"
|
||||
"shch=\u0449;"
|
||||
"hard=\u044a;"
|
||||
"i=\u044b;"
|
||||
"soft=\u044c;"
|
||||
"e=\u044d;"
|
||||
"yu=\u044e;"
|
||||
"ya=\u044f;"
|
||||
|
||||
"yo=\u0451;"
|
||||
"j=\u0458;"
|
||||
|
||||
// variables
|
||||
// some are duplicated so lowercasing works
|
||||
|
||||
"csoft=[eiyEIY];"
|
||||
"CSOFT=[eiyEIY];"
|
||||
|
||||
"BECOMES_H=[{HARD}{hard}];"
|
||||
"becomes_h=[{HARD}{hard}];"
|
||||
|
||||
"BECOMES_S=[{S}{s}];"
|
||||
"becomes_s=[{S}{s}];"
|
||||
|
||||
"BECOMES_C=[{CH}{ch}];"
|
||||
"becomes_c=[{CH}{ch}];"
|
||||
|
||||
"BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
"becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
|
||||
"letter=[[:Lu:][:Ll:]];"
|
||||
"lower=[[:Ll:]];"
|
||||
|
||||
//* /*
|
||||
//* Modified to combine display transliterator and typing transliterator.
|
||||
//* The display mapping uses accents for the "soft" vowels.
|
||||
//* It does not, although it could, use characters like \u0161 instead of digraphs
|
||||
//* like sh.
|
||||
//* */
|
||||
|
||||
// #############################################
|
||||
// Special titlecase forms, not duplicated
|
||||
// #############################################
|
||||
|
||||
"Ch>{CH};" "Ch<{CH}({lower};"
|
||||
"Kh>{KH};" "Kh<{KH}({lower};"
|
||||
"Shch>{SHCH};" "Shch<{SHCH}({lower};"
|
||||
"Sh>{SH};" "Sh<{SH}({lower};"
|
||||
"Ts>{TS};" "Ts<{TS}({lower};"
|
||||
"Zh>{ZH};" "Zh<{ZH}({lower};"
|
||||
"Yi>{YI};" //+ "Yi<{YI}({lower};"
|
||||
"Ye>{YE};" //+ "Ye<{YE}({lower};"
|
||||
"Yo>{YO};" //+ "Yo<{YO}({lower};"
|
||||
"Yu>{YU};" //+ "Yu<{YU}({lower};"
|
||||
"Ya>{YA};" //+ "Ya<{YA}({lower};"
|
||||
|
||||
// #############################################
|
||||
// Rules to Duplicate
|
||||
// To get the lowercase versions, copy these and lowercase
|
||||
// #############################################
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"SHTCH>{SHCH};"
|
||||
"TCH>{CH};"
|
||||
"TH>{Z};"
|
||||
"Q>{K};"
|
||||
"WH>{V};"
|
||||
"W>{V};"
|
||||
"X>{K}{S};" //+ "X<{K}{S};"
|
||||
|
||||
// Separate letters that would otherwise join
|
||||
|
||||
"SH''<{SH}({BECOMES_C};"
|
||||
"T''<{T}({BECOMES_S};"
|
||||
|
||||
"K''<{K}({BECOMES_H};"
|
||||
"S''<{S}({BECOMES_H};"
|
||||
"T''<{T}({BECOMES_H};"
|
||||
"Z''<{Z}({BECOMES_H};"
|
||||
|
||||
"Y''<{Y}({BECOMES_VOWEL};"
|
||||
|
||||
// Main letters
|
||||
|
||||
"A<>{A};"
|
||||
"B<>{B};"
|
||||
"CH<>{CH};"
|
||||
"D<>{D};"
|
||||
"E<>{E};"
|
||||
"F<>{F};"
|
||||
"G<>{G};"
|
||||
"\u00cc<>{YI};"
|
||||
"I<>{I};"
|
||||
"KH<>{KH};"
|
||||
"K<>{K};"
|
||||
"L<>{L};"
|
||||
"M<>{M};"
|
||||
"N<>{N};"
|
||||
"O<>{O};"
|
||||
"P<>{P};"
|
||||
"R<>{R};"
|
||||
"SHCH<>{SHCH};"
|
||||
"SH>{SH};" //+ "SH<{SH};"
|
||||
"{S-hacek}<>{SH};"
|
||||
"S<>{S};"
|
||||
"TS<>{TS};"
|
||||
"T<>{T};"
|
||||
"U<>{U};"
|
||||
"V<>{V};"
|
||||
//\u00cc\u00c0\u00c8\u00d2\u00d9
|
||||
"YE>{YE};" //+ "YE<{YE};"
|
||||
"\u00c8<>{YE};"
|
||||
"YO>{YO};" //+ "YO<{YO};"
|
||||
"\u00d2<>{YO};"
|
||||
"YU>{YU};" //+ "YU<{YU};"
|
||||
"\u00d9<>{YU};"
|
||||
"YA>{YA};" //+ "YA<{YA};"
|
||||
"\u00c0<>{YA};"
|
||||
"Y<>{Y};"
|
||||
"ZH<>{ZH};"
|
||||
"Z<>{Z};"
|
||||
|
||||
"H<>{HARD};"
|
||||
"\u0178<>{SOFT};"
|
||||
|
||||
// Non-russian
|
||||
|
||||
"J<>{J};"
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"C({csoft}>{S};"
|
||||
"C>{K};"
|
||||
|
||||
// #############################################
|
||||
// Duplicated Rules
|
||||
// Copy and lowercase the above rules
|
||||
// #############################################
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"shtch>{shch};"
|
||||
"tch>{ch};"
|
||||
"th>{z};"
|
||||
"q>{k};"
|
||||
"wh>{v};"
|
||||
"w>{v};"
|
||||
"x>{k}{s};" //+ "x<{k}{s};"
|
||||
|
||||
// separate letters that would otherwise join
|
||||
|
||||
"sh''<{sh}({becomes_c};"
|
||||
"t''<{t}({becomes_s};"
|
||||
|
||||
"k''<{k}({becomes_h};"
|
||||
"s''<{s}({becomes_h};"
|
||||
"t''<{t}({becomes_h};"
|
||||
"z''<{z}({becomes_h};"
|
||||
|
||||
"y''<{y}({becomes_vowel};"
|
||||
|
||||
// main letters
|
||||
|
||||
"a<>{a};"
|
||||
"b<>{b};"
|
||||
"ch<>{ch};"
|
||||
"d<>{d};"
|
||||
"e<>{e};"
|
||||
"f<>{f};"
|
||||
"g<>{g};"
|
||||
"\u00ec<>{yi};"
|
||||
"i<>{i};"
|
||||
"kh<>{kh};"
|
||||
"k<>{k};"
|
||||
"l<>{l};"
|
||||
"m<>{m};"
|
||||
"n<>{n};"
|
||||
"o<>{o};"
|
||||
"p<>{p};"
|
||||
"r<>{r};"
|
||||
"shch<>{shch};"
|
||||
"sh>{sh};" //+ "sh<{sh};"
|
||||
"{s-hacek}<>{sh};"
|
||||
"s<>{s};"
|
||||
"ts<>{ts};"
|
||||
"t<>{t};"
|
||||
"u<>{u};"
|
||||
"v<>{v};"
|
||||
//\u00ec\u00e0\u00e8\u00f2\u00f9
|
||||
"ye>{ye};" //+ "ye<{ye};"
|
||||
"\u00e8<>{ye};"
|
||||
"yo>{yo};" //+ "yo<{yo};"
|
||||
"\u00f2<>{yo};"
|
||||
"yu>{yu};" //+ "yu<{yu};"
|
||||
"\u00f9<>{yu};"
|
||||
"ya>{ya};" //+ "ya<{ya};"
|
||||
"\u00e0<>{ya};"
|
||||
"y<>{y};"
|
||||
"zh<>{zh};"
|
||||
"z<>{z};"
|
||||
|
||||
"h<>{hard};"
|
||||
"\u00ff<>{soft};"
|
||||
|
||||
// non-russian
|
||||
|
||||
"j<>{j};"
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"c({csoft}>{s};"
|
||||
"c>{k};"
|
||||
|
||||
|
||||
|
||||
// #############################################
|
||||
// End of Duplicated Rules
|
||||
// #############################################
|
||||
|
||||
//generally the last rule
|
||||
"''>;"
|
||||
//the end
|
||||
}
|
||||
}
|
@ -1,411 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Devanagari
|
||||
|
||||
ldevan {
|
||||
Rule {
|
||||
//#####################################################################
|
||||
// Keyboard Transliteration Table
|
||||
//#####################################################################
|
||||
// Conversions should be:
|
||||
// 1. complete
|
||||
// * convert every sequence of Latin letters (a to z plus apostrophe)
|
||||
// to a sequence of Native letters
|
||||
// * convert every sequence of Native letters to Latin letters
|
||||
// 2. reversable
|
||||
// * any string of Native converted to Latin and back should be the same
|
||||
// * this is not true for English converted to Native & back, e.g.:
|
||||
// k -> {kaf} -> k
|
||||
// c -> {kaf} -> k
|
||||
//#####################################################################
|
||||
// Sequences of Latin letters may convert to a single Native letter.
|
||||
// When this is the case, an apostrophe can be used to indicate separate
|
||||
// letters.$
|
||||
// E.g. sh -> {shin}
|
||||
// s'h -> {sin}{heh}
|
||||
// ss -> {sad}
|
||||
// s's -> {sin}{shadda}
|
||||
//#####################################################################
|
||||
// To Do:
|
||||
// finish adding shadda, add sokoon, fix uppercase
|
||||
// make two transliteration tables: one with vowels, one without
|
||||
//#####################################################################
|
||||
// Modifications
|
||||
// Devanagari Transliterator: broken up with consonsants/vowels
|
||||
//#####################################################################
|
||||
// Unicode character name definitions
|
||||
//#####################################################################
|
||||
|
||||
//consonants
|
||||
"candrabindu=\u0901;"
|
||||
"bindu=\u0902;"
|
||||
"visarga=\u0903;"
|
||||
|
||||
// w<vowel> represents the stand-alone form
|
||||
"wa=\u0905;"
|
||||
"waa=\u0906;"
|
||||
"wi=\u0907;"
|
||||
"wii=\u0908;"
|
||||
"wu=\u0909;"
|
||||
"wuu=\u090A;"
|
||||
"wr=\u090B;"
|
||||
"wl=\u090C;"
|
||||
"we=\u090F;"
|
||||
"wai=\u0910;"
|
||||
"wo=\u0913;"
|
||||
"wau=\u0914;"
|
||||
|
||||
"ka=\u0915;"
|
||||
"kha=\u0916;"
|
||||
"ga=\u0917;"
|
||||
"gha=\u0918;"
|
||||
"nga=\u0919;"
|
||||
|
||||
"ca=\u091A;"
|
||||
"cha=\u091B;"
|
||||
"ja=\u091C;"
|
||||
"jha=\u091D;"
|
||||
"nya=\u091E;"
|
||||
|
||||
"tta=\u091F;"
|
||||
"ttha=\u0920;"
|
||||
"dda=\u0921;"
|
||||
"ddha=\u0922;"
|
||||
"nna=\u0923;"
|
||||
|
||||
"ta=\u0924;"
|
||||
"tha=\u0925;"
|
||||
"da=\u0926;"
|
||||
"dha=\u0927;"
|
||||
"na=\u0928;"
|
||||
|
||||
"pa=\u092A;"
|
||||
"pha=\u092B;"
|
||||
"ba=\u092C;"
|
||||
"bha=\u092D;"
|
||||
"ma=\u092E;"
|
||||
|
||||
"ya=\u092F;"
|
||||
"ra=\u0930;"
|
||||
"rra=\u0931;"
|
||||
"la=\u0933;"
|
||||
"va=\u0935;"
|
||||
|
||||
"sha=\u0936;"
|
||||
"ssa=\u0937;"
|
||||
"sa=\u0938;"
|
||||
"ha=\u0939;"
|
||||
|
||||
// <vowel> represents the dependent form
|
||||
"aa=\u093E;"
|
||||
"i=\u093F;"
|
||||
"ii=\u0940;"
|
||||
"u=\u0941;"
|
||||
"uu=\u0942;"
|
||||
"rh=\u0943;"
|
||||
"lh=\u0944;"
|
||||
"e=\u0947;"
|
||||
"ai=\u0948;"
|
||||
"o=\u094B;"
|
||||
"au=\u094C;"
|
||||
|
||||
"virama=\u094D;"
|
||||
|
||||
"wrr=\u0960;"
|
||||
"rrh=\u0962;"
|
||||
|
||||
"danda=\u0964;"
|
||||
"doubleDanda=\u0965;"
|
||||
"depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
|
||||
"depVowelBelow=[\u0941-\u0944];"
|
||||
// Ech: Double escape U+0000, so UnicodeString doesn't consider it
|
||||
// to be the end of the string. This is only necessary for U+0000
|
||||
// right now. [liu]
|
||||
"endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF];"
|
||||
|
||||
"&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
|
||||
"%=[bcdfghjklmnpqrstvwxyz];"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Latin letters to Native letters
|
||||
//#####################################################################
|
||||
//Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
|
||||
|
||||
// special forms with no good conversion
|
||||
|
||||
"mm>{bindu};"
|
||||
"x>{visarga};"
|
||||
|
||||
// convert to independent forms at start of word or syllable:
|
||||
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
// Moved up [LIU]
|
||||
|
||||
"aa>{waa};"
|
||||
"ai>{wai};"
|
||||
"au>{wau};"
|
||||
"ii>{wii};"
|
||||
"i>{wi};"
|
||||
"uu>{wuu};"
|
||||
"u>{wu};"
|
||||
"rrh>{wrr};"
|
||||
"rh>{wr};"
|
||||
"lh>{wl};"
|
||||
"e>{we};"
|
||||
"o>{wo};"
|
||||
"a>{wa};"
|
||||
|
||||
// normal consonants
|
||||
|
||||
"kh>{kha}|{virama};"
|
||||
"k>{ka}|{virama};"
|
||||
"q>{ka}|{virama};"
|
||||
"gh>{gha}|{virama};"
|
||||
"g>{ga}|{virama};"
|
||||
"ng>{nga}|{virama};"
|
||||
"ch>{cha}|{virama};"
|
||||
"c>{ca}|{virama};"
|
||||
"jh>{jha}|{virama};"
|
||||
"j>{ja}|{virama};"
|
||||
"ny>{nya}|{virama};"
|
||||
"tth>{ttha}|{virama};"
|
||||
"tt>{tta}|{virama};"
|
||||
"ddh>{ddha}|{virama};"
|
||||
"dd>{dda}|{virama};"
|
||||
"nn>{nna}|{virama};"
|
||||
"th>{tha}|{virama};"
|
||||
"t>{ta}|{virama};"
|
||||
"dh>{dha}|{virama};"
|
||||
"d>{da}|{virama};"
|
||||
"n>{na}|{virama};"
|
||||
"ph>{pha}|{virama};"
|
||||
"p>{pa}|{virama};"
|
||||
"bh>{bha}|{virama};"
|
||||
"b>{ba}|{virama};"
|
||||
"m>{ma}|{virama};"
|
||||
"y>{ya}|{virama};"
|
||||
"r>{ra}|{virama};"
|
||||
"l>{la}|{virama};"
|
||||
"v>{va}|{virama};"
|
||||
"f>{va}|{virama};"
|
||||
"w>{va}|{virama};"
|
||||
"sh>{sha}|{virama};"
|
||||
"ss>{ssa}|{virama};"
|
||||
"s>{sa}|{virama};"
|
||||
"z>{sa}|{virama};"
|
||||
"h>{ha}|{virama};"
|
||||
|
||||
".>{danda};"
|
||||
"{danda}.>{doubleDanda};"
|
||||
"{depVowelAbove})~>{bindu};"
|
||||
"{depVowelBelow})~>{candrabindu};"
|
||||
|
||||
// convert to dependent forms after consonant with no vowel:
|
||||
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
|
||||
"{virama}aa>{aa};"
|
||||
"{virama}ai>{ai};"
|
||||
"{virama}au>{au};"
|
||||
"{virama}ii>{ii};"
|
||||
"{virama}i>{i};"
|
||||
"{virama}uu>{uu};"
|
||||
"{virama}u>{u};"
|
||||
"{virama}rrh>{rrh};"
|
||||
"{virama}rh>{rh};"
|
||||
"{virama}lh>{lh};"
|
||||
"{virama}e>{e};"
|
||||
"{virama}o>{o};"
|
||||
"{virama}a>;"
|
||||
|
||||
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
|
||||
"{virama}''aa>{waa};"
|
||||
"{virama}''ai>{wai};"
|
||||
"{virama}''au>{wau};"
|
||||
"{virama}''ii>{wii};"
|
||||
"{virama}''i>{wi};"
|
||||
"{virama}''uu>{wuu};"
|
||||
"{virama}''u>{wu};"
|
||||
"{virama}''rrh>{wrr};"
|
||||
"{virama}''rh>{wr};"
|
||||
"{virama}''lh>{wl};"
|
||||
"{virama}''e>{we};"
|
||||
"{virama}''o>{wo};"
|
||||
"{virama}''a>{wa};"
|
||||
|
||||
"{virama}({endThing}>;"
|
||||
|
||||
// convert any left-over apostrophes used for separation
|
||||
|
||||
"''>;"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Native letters to Latin letters
|
||||
//#####################################################################
|
||||
|
||||
// special forms with no good conversion
|
||||
|
||||
"mm<{bindu};"
|
||||
"x<{visarga};"
|
||||
|
||||
// normal consonants
|
||||
|
||||
"kh<{kha}(&;"
|
||||
"kha<{kha};"
|
||||
"k''<{ka}{virama}({ha};"
|
||||
"k<{ka}(&;"
|
||||
"ka<{ka};"
|
||||
"gh<{gha}(&;"
|
||||
"gha<{gha};"
|
||||
"g''<{ga}{virama}({ha};"
|
||||
"g<{ga}(&;"
|
||||
"ga<{ga};"
|
||||
"ng<{nga}(&;"
|
||||
"nga<{nga};"
|
||||
"ch<{cha}(&;"
|
||||
"cha<{cha};"
|
||||
"c''<{ca}{virama}({ha};"
|
||||
"c<{ca}(&;"
|
||||
"ca<{ca};"
|
||||
"jh<{jha}(&;"
|
||||
"jha<{jha};"
|
||||
"j''<{ja}{virama}({ha};"
|
||||
"j<{ja}(&;"
|
||||
"ja<{ja};"
|
||||
"ny<{nya}(&;"
|
||||
"nya<{nya};"
|
||||
"tth<{ttha}(&;"
|
||||
"ttha<{ttha};"
|
||||
"tt''<{tta}{virama}({ha};"
|
||||
"tt<{tta}(&;"
|
||||
"tta<{tta};"
|
||||
"ddh<{ddha}(&;"
|
||||
"ddha<{ddha};"
|
||||
"dd''<{dda}(&{ha};"
|
||||
"dd<{dda}(&;"
|
||||
"dda<{dda};"
|
||||
"dh<{dha}(&;"
|
||||
"dha<{dha};"
|
||||
"d''<{da}{virama}({ha};"
|
||||
"d''<{da}{virama}({ddha};"
|
||||
"d''<{da}{virama}({dda};"
|
||||
"d''<{da}{virama}({dha};"
|
||||
"d''<{da}{virama}({da};"
|
||||
"d<{da}(&;"
|
||||
"da<{da};"
|
||||
"th<{tha}(&;"
|
||||
"tha<{tha};"
|
||||
"t''<{ta}{virama}({ha};"
|
||||
"t''<{ta}{virama}({ttha};"
|
||||
"t''<{ta}{virama}({tta};"
|
||||
"t''<{ta}{virama}({tha};"
|
||||
"t''<{ta}{virama}({ta};"
|
||||
"t<{ta}(&;"
|
||||
"ta<{ta};"
|
||||
"n''<{na}{virama}({ga};"
|
||||
"n''<{na}{virama}({ya};"
|
||||
"n<{na}(&;"
|
||||
"na<{na};"
|
||||
"ph<{pha}(&;"
|
||||
"pha<{pha};"
|
||||
"p''<{pa}{virama}({ha};"
|
||||
"p<{pa}(&;"
|
||||
"pa<{pa};"
|
||||
"bh<{bha}(&;"
|
||||
"bha<{bha};"
|
||||
"b''<{ba}{virama}({ha};"
|
||||
"b<{ba}(&;"
|
||||
"ba<{ba};"
|
||||
"m''<{ma}{virama}({ma};"
|
||||
"m''<{ma}{virama}({bindu};"
|
||||
"m<{ma}(&;"
|
||||
"ma<{ma};"
|
||||
"y<{ya}(&;"
|
||||
"ya<{ya};"
|
||||
"r''<{ra}{virama}({ha};"
|
||||
"r<{ra}(&;"
|
||||
"ra<{ra};"
|
||||
"l''<{la}{virama}({ha};"
|
||||
"l<{la}(&;"
|
||||
"la<{la};"
|
||||
"v<{va}(&;"
|
||||
"va<{va};"
|
||||
"sh<{sha}(&;"
|
||||
"sha<{sha};"
|
||||
"ss<{ssa}(&;"
|
||||
"ssa<{ssa};"
|
||||
"s''<{sa}{virama}({ha};"
|
||||
"s''<{sa}{virama}({sha};"
|
||||
"s''<{sa}{virama}({ssa};"
|
||||
"s''<{sa}{virama}({sa};"
|
||||
"s<{sa}(&;"
|
||||
"sa<{sa};"
|
||||
"h<{ha}(&;"
|
||||
"ha<{ha};"
|
||||
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
|
||||
"aa<{aa};"
|
||||
"ai<{ai};"
|
||||
"au<{au};"
|
||||
"ii<{ii};"
|
||||
"i<{i};"
|
||||
"uu<{uu};"
|
||||
"u<{u};"
|
||||
"rrh<{rrh};"
|
||||
"rh<{rh};"
|
||||
"lh<{lh};"
|
||||
"e<{e};"
|
||||
"o<{o};"
|
||||
|
||||
// independent vowels (when following consonants)
|
||||
|
||||
"''aa<a){waa};"
|
||||
"''aa<%){waa};"
|
||||
"''ai<a){wai};"
|
||||
"''ai<%){wai};"
|
||||
"''au<a){wau};"
|
||||
"''au<%){wau};"
|
||||
"''ii<a){wii};"
|
||||
"''ii<%){wii};"
|
||||
"''i<a){wi};"
|
||||
"''i<%){wi};"
|
||||
"''uu<a){wuu};"
|
||||
"''uu<%){wuu};"
|
||||
"''u<a){wu};"
|
||||
"''u<%){wu};"
|
||||
"''rrh<%){wrr};"
|
||||
"''rh<%){wr};"
|
||||
"''lh<%){wl};"
|
||||
"''e<%){we};"
|
||||
"''o<%){wo};"
|
||||
"''a<a){wa};"
|
||||
"''a<%){wa};"
|
||||
|
||||
|
||||
// independent vowels (otherwise)
|
||||
|
||||
"aa<{waa};"
|
||||
"ai<{wai};"
|
||||
"au<{wau};"
|
||||
"ii<{wii};"
|
||||
"i<{wi};"
|
||||
"uu<{wuu};"
|
||||
"u<{wu};"
|
||||
"rrh<{wrr};"
|
||||
"rh<{wr};"
|
||||
"lh<{wl};"
|
||||
"e<{we};"
|
||||
"o<{wo};"
|
||||
"a<{wa};"
|
||||
|
||||
// blow away any remaining viramas
|
||||
|
||||
"<{virama};"
|
||||
}
|
||||
}
|
@ -1,376 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Greek
|
||||
|
||||
lgreek {
|
||||
Rule {
|
||||
// ==============================================
|
||||
// Modern Greek Transliteration Rules
|
||||
//
|
||||
// This transliterates modern Greek characters, but using rules
|
||||
// that are traditional for Ancient Greek, and
|
||||
// thus more resemble Greek words that have become part
|
||||
// of English. It differs from the official Greek
|
||||
// transliteration, which is more phonetic (since
|
||||
// most modern Greek vowels, for example, have
|
||||
// degenerated simply to sound like "ee").
|
||||
//
|
||||
// There are only a few tricky parts.
|
||||
// 1. eta and omega don't map directly to Latin vowels,
|
||||
// so we use a macron on e and o, and some
|
||||
// other combinations if they are accented.
|
||||
// 2. The accented, diaeresis i and y are substituted too.
|
||||
// 3. Some letters use digraphs, like "ph". While typical,
|
||||
// they need some special handling.
|
||||
// 4. A gamma before a gamma or a few other letters is
|
||||
// transliterated as an "n", as in "Anglo"
|
||||
// 5. An ypsilon after a vowel is a "u", as in
|
||||
// "Mouseio". Otherwise it is a "y" as in "Physikon"
|
||||
// 6. The construction of the rules is made simpler by making sure
|
||||
// that most rules for lowercase letters exactly correspond to the
|
||||
// rules for uppercase letters, *except* for the case of the letters
|
||||
// in the rule itself. That way, after modifying the uppercase rules,
|
||||
// you can just copy, paste, and "set to lowercase" to get
|
||||
// the rules for lowercase letters!
|
||||
// ==============================================
|
||||
|
||||
// ==============================================
|
||||
// Variables, used to make the rules more comprehensible
|
||||
// and for conditionals.
|
||||
// ==============================================
|
||||
|
||||
// Latin Letters
|
||||
|
||||
"E-MACRON=\u0112;"
|
||||
"e-macron=\u0113;"
|
||||
"O-MACRON=\u014C;"
|
||||
"o-macron=\u014D;"
|
||||
"Y-UMLAUT=\u0178;"
|
||||
"y-umlaut=\u00FF;"
|
||||
|
||||
//! // with real accents.
|
||||
//! + "E-MACRON-ACUTE=\u0112\u0301;"
|
||||
//! + "e-macron-acute=\u0113\u0301;"
|
||||
//! + "O-MACRON-ACUTE=\u014C\u0301;"
|
||||
//! + "o-macron-acute=\u014D\u0301;"
|
||||
//! + "y-umlaut-acute=\u00FF\u0301;"
|
||||
//! + "\u00ef-acute=\u00ef\u0301;"
|
||||
//! + "\u00fc-acute=\u00fc\u0301;"
|
||||
//! //
|
||||
|
||||
// single letter equivalents
|
||||
|
||||
"E-MACRON-ACUTE=\u00CA;"
|
||||
"e-macron-acute=\u00EA;"
|
||||
"O-MACRON-ACUTE=\u00D4;"
|
||||
"o-macron-acute=\u00F4;"
|
||||
"y-umlaut-acute=\u0177;"
|
||||
"\u00ef-acute=\u00EE;"
|
||||
"\u00fc-acute=\u00FB;"
|
||||
|
||||
// Greek Letters
|
||||
|
||||
"ALPHA=\u0391;"
|
||||
"BETA=\u0392;"
|
||||
"GAMMA=\u0393;"
|
||||
"DELTA=\u0394;"
|
||||
"EPSILON=\u0395;"
|
||||
"ZETA=\u0396;"
|
||||
"ETA=\u0397;"
|
||||
"THETA=\u0398;"
|
||||
"IOTA=\u0399;"
|
||||
"KAPPA=\u039A;"
|
||||
"LAMBDA=\u039B;"
|
||||
"MU=\u039C;"
|
||||
"NU=\u039D;"
|
||||
"XI=\u039E;"
|
||||
"OMICRON=\u039F;"
|
||||
"PI=\u03A0;"
|
||||
"RHO=\u03A1;"
|
||||
"SIGMA=\u03A3;"
|
||||
"TAU=\u03A4;"
|
||||
"YPSILON=\u03A5;"
|
||||
"PHI=\u03A6;"
|
||||
"CHI=\u03A7;"
|
||||
"PSI=\u03A8;"
|
||||
"OMEGA=\u03A9;"
|
||||
|
||||
"ALPHA+=\u0386;"
|
||||
"EPSILON+=\u0388;"
|
||||
"ETA+=\u0389;"
|
||||
"IOTA+=\u038A;"
|
||||
"OMICRON+=\u038C;"
|
||||
"YPSILON+=\u038E;"
|
||||
"OMEGA+=\u038F;"
|
||||
"IOTA_DIAERESIS=\u03AA;"
|
||||
"YPSILON_DIAERESIS=\u03AB;"
|
||||
|
||||
"alpha=\u03B1;"
|
||||
"beta=\u03B2;"
|
||||
"gamma=\u03B3;"
|
||||
"delta=\u03B4;"
|
||||
"epsilon=\u03B5;"
|
||||
"zeta=\u03B6;"
|
||||
"eta=\u03B7;"
|
||||
"theta=\u03B8;"
|
||||
"iota=\u03B9;"
|
||||
"kappa=\u03BA;"
|
||||
"lambda=\u03BB;"
|
||||
"mu=\u03BC;"
|
||||
"nu=\u03BD;"
|
||||
"xi=\u03BE;"
|
||||
"omicron=\u03BF;"
|
||||
"pi=\u03C0;"
|
||||
"rho=\u03C1;"
|
||||
"sigma=\u03C3;"
|
||||
"tau=\u03C4;"
|
||||
"ypsilon=\u03C5;"
|
||||
"phi=\u03C6;"
|
||||
"chi=\u03C7;"
|
||||
"psi=\u03C8;"
|
||||
"omega=\u03C9;"
|
||||
|
||||
//forms
|
||||
|
||||
"alpha+=\u03AC;"
|
||||
"epsilon+=\u03AD;"
|
||||
"eta+=\u03AE;"
|
||||
"iota+=\u03AF;"
|
||||
"omicron+=\u03CC;"
|
||||
"ypsilon+=\u03CD;"
|
||||
"omega+=\u03CE;"
|
||||
"iota_diaeresis=\u03CA;"
|
||||
"ypsilon_diaeresis=\u03CB;"
|
||||
"iota_diaeresis+=\u0390;"
|
||||
"ypsilon_diaeresis+=\u03B0;"
|
||||
"sigma+=\u03C2;"
|
||||
|
||||
// Variables for conditional mappings
|
||||
|
||||
// Use lowercase for all variable names, to allow cut/paste below.
|
||||
|
||||
"letter=[~[:Lu:][:Ll:]];"
|
||||
"lower=[[:Ll:]];"
|
||||
"softener=[eiyEIY];"
|
||||
"vowel=[aeiouAEIOU"
|
||||
"{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
|
||||
"{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
|
||||
"{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
|
||||
"{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
|
||||
"{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
|
||||
"{iota_diaeresis}{ypsilon_diaeresis}"
|
||||
"{iota_diaeresis+}{ypsilon_diaeresis+}"
|
||||
"];"
|
||||
"n-gamma=[GKXCgkxc];"
|
||||
"gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
|
||||
"pp=[Pp];"
|
||||
|
||||
// ==============================================
|
||||
// Rules
|
||||
// ==============================================
|
||||
// The following are special titlecases, and should
|
||||
// not be copied when duplicating the lowercase
|
||||
// ==============================================
|
||||
|
||||
"Th <> {THETA}({lower};"
|
||||
"Ph <> {PHI}({lower};"
|
||||
"Ch <> {CHI}({lower};"
|
||||
//masked: + "Ps<{PHI}({lower};"
|
||||
|
||||
// Because there is no uppercase forms for final sigma,
|
||||
// we had to move all the sigma rules up here.
|
||||
|
||||
// Remember to insert ' to preserve round trip, for double letters
|
||||
// don't need to do this for the digraphs with h,
|
||||
// since it is not created when mapping back from greek
|
||||
|
||||
// use special form for s
|
||||
|
||||
"''S <> ({pp}) {SIGMA} ;" // handle PS
|
||||
"S <> {SIGMA};"
|
||||
|
||||
// The following are a bit tricky. 's' takes two forms in greek
|
||||
// final or non final.
|
||||
// We use ~s to represent the abnormal form: final before letter
|
||||
// or non-final before non-letter.
|
||||
// We use 's to separate p and s (otherwise ps is one letter)
|
||||
// so, we break out the following forms:
|
||||
|
||||
"''s < ({pp}) {sigma} ({letter});"
|
||||
"s < {sigma} ({letter});"
|
||||
"~s < {sigma} ;"
|
||||
|
||||
"~s < {sigma+} ({letter});"
|
||||
"''s < ({pp}) {sigma+} ;"
|
||||
"s < {sigma+} ;"
|
||||
|
||||
"~s ({letter}) > {sigma+};"
|
||||
"~s > {sigma};"
|
||||
"''s ({letter}) > {sigma};"
|
||||
"''s > {sigma+};"
|
||||
"s ({letter}) > {sigma};"
|
||||
"s > {sigma+};"
|
||||
|
||||
// because there are no uppercase forms, had to move these up too.
|
||||
|
||||
"i\"`>{iota_diaeresis+};"
|
||||
"y\"`>{ypsilon_diaeresis+};"
|
||||
|
||||
"{\u00ef-acute} <> {iota_diaeresis+};"
|
||||
"{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
|
||||
"{y-umlaut-acute} <> {ypsilon_diaeresis+};"
|
||||
|
||||
// ==============================================
|
||||
// Uppercase Forms.
|
||||
// To make lowercase forms, just copy and lowercase below
|
||||
// ==============================================
|
||||
|
||||
// Typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
"A`>{ALPHA+};"
|
||||
"E`>{EPSILON+};"
|
||||
"EE`>{ETA+};"
|
||||
"EE>{ETA};"
|
||||
"I`>{IOTA+};"
|
||||
"O`>{OMICRON+};"
|
||||
"OO`>{OMEGA+};"
|
||||
"OO>{OMEGA};"
|
||||
"I\">{IOTA_DIAERESIS};"
|
||||
"Y\">{YPSILON_DIAERESIS};"
|
||||
|
||||
// Basic Letters
|
||||
|
||||
"A<>{ALPHA};"
|
||||
"\u00c1<>{ALPHA+};"
|
||||
"B<>{BETA};"
|
||||
"N ({n-gamma}) <> {GAMMA} ({gamma-n});"
|
||||
"G<>{GAMMA};"
|
||||
"D<>{DELTA};"
|
||||
"''E <> ([Ee]){EPSILON};" // handle EE
|
||||
"E<>{EPSILON};"
|
||||
"\u00c9<>{EPSILON+};"
|
||||
"Z<>{ZETA};"
|
||||
"{E-MACRON-ACUTE}<>{ETA+};"
|
||||
"{E-MACRON}<>{ETA};"
|
||||
"TH<>{THETA};"
|
||||
"I<>{IOTA};"
|
||||
"\u00cd<>{IOTA+};"
|
||||
"\u00cf<>{IOTA_DIAERESIS};"
|
||||
"K<>{KAPPA};"
|
||||
"L<>{LAMBDA};"
|
||||
"M<>{MU};"
|
||||
"N'' <> {NU} ({gamma-n});"
|
||||
"N<>{NU};"
|
||||
"X<>{XI};"
|
||||
"''O <> ([Oo]) {OMICRON};" // handle OO
|
||||
"O<>{OMICRON};"
|
||||
"\u00d3<>{OMICRON+};"
|
||||
"PH<>{PHI};" // needs ordering before P
|
||||
"PS<>{PSI};" // needs ordering before P
|
||||
"P<>{PI};"
|
||||
"R<>{RHO};"
|
||||
"T<>{TAU};"
|
||||
"U <> ({vowel}) {YPSILON};"
|
||||
"\u00da <> ({vowel}) {YPSILON+};"
|
||||
"\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
|
||||
"Y<>{YPSILON};"
|
||||
"\u00dd<>{YPSILON+};"
|
||||
"{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
|
||||
"CH<>{CHI};"
|
||||
"{O-MACRON-ACUTE}<>{OMEGA+};"
|
||||
"{O-MACRON}<>{OMEGA};"
|
||||
|
||||
// Extra English Letters. Mapped for completeness
|
||||
|
||||
"C({softener})>|S;"
|
||||
"C>|K;"
|
||||
"F>|PH;"
|
||||
"H>|CH;"
|
||||
"J>|I;"
|
||||
"Q>|K;"
|
||||
"V>|U;"
|
||||
"W>|U;"
|
||||
|
||||
// ==============================================
|
||||
// Lowercase Forms. Just copy above and lowercase
|
||||
// ==============================================
|
||||
|
||||
// typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
"a`>{alpha+};"
|
||||
"e`>{epsilon+};"
|
||||
"ee`>{eta+};"
|
||||
"ee>{eta};"
|
||||
"i`>{iota+};"
|
||||
"o`>{omicron+};"
|
||||
"oo`>{omega+};"
|
||||
"oo>{omega};"
|
||||
"i\">{iota_diaeresis};"
|
||||
"y\">{ypsilon_diaeresis};"
|
||||
|
||||
// basic letters
|
||||
|
||||
"a<>{alpha};"
|
||||
"\u00e1<>{alpha+};"
|
||||
"b<>{beta};"
|
||||
"n ({n-gamma}) <> {gamma} ({gamma-n});"
|
||||
"g<>{gamma};"
|
||||
"d<>{delta};"
|
||||
"''e <> ([Ee]){epsilon};" // handle EE
|
||||
"e<>{epsilon};"
|
||||
"\u00e9<>{epsilon+};"
|
||||
"z<>{zeta};"
|
||||
"{e-macron-acute}<>{eta+};"
|
||||
"{e-macron}<>{eta};"
|
||||
"th<>{theta};"
|
||||
"i<>{iota};"
|
||||
"\u00ed<>{iota+};"
|
||||
"\u00ef<>{iota_diaeresis};"
|
||||
"k<>{kappa};"
|
||||
"l<>{lambda};"
|
||||
"m<>{mu};"
|
||||
"n'' <> {nu} ({gamma-n});"
|
||||
"n<>{nu};"
|
||||
"x<>{xi};"
|
||||
"''o <> ([Oo]) {omicron};" // handle OO
|
||||
"o<>{omicron};"
|
||||
"\u00f3<>{omicron+};"
|
||||
"ph<>{phi};" // needs ordering before p
|
||||
"ps<>{psi};" // needs ordering before p
|
||||
"p<>{pi};"
|
||||
"r<>{rho};"
|
||||
"t<>{tau};"
|
||||
"u <> ({vowel}){ypsilon};"
|
||||
"\u00fa <> ({vowel}){ypsilon+};"
|
||||
"\u00fc <> ({vowel}){ypsilon_diaeresis};"
|
||||
"y<>{ypsilon};"
|
||||
"\u00fd<>{ypsilon+};"
|
||||
"{y-umlaut}<>{ypsilon_diaeresis};"
|
||||
"ch<>{chi};"
|
||||
"{o-macron-acute}<>{omega+};"
|
||||
"{o-macron}<>{omega};"
|
||||
|
||||
// extra english letters. mapped for completeness
|
||||
|
||||
"c({softener})>|s;"
|
||||
"c>|k;"
|
||||
"f>|ph;"
|
||||
"h>|ch;"
|
||||
"j>|i;"
|
||||
"q>|k;"
|
||||
"v>|u;"
|
||||
"w>|u;"
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
//+ "''>;"
|
||||
}
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
// This file is obsolete
|
||||
// See fullhalf
|
@ -1,278 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Hebrew
|
||||
|
||||
lhebrew {
|
||||
Rule {
|
||||
//variable names, derived from the Unicode names.
|
||||
|
||||
"POINT_SHEVA=\u05B0;"
|
||||
"POINT_HATAF_SEGOL=\u05B1;"
|
||||
"POINT_HATAF_PATAH=\u05B2;"
|
||||
"POINT_HATAF_QAMATS=\u05B3;"
|
||||
"POINT_HIRIQ=\u05B4;"
|
||||
"POINT_TSERE=\u05B5;"
|
||||
"POINT_SEGOL=\u05B6;"
|
||||
"POINT_PATAH=\u05B7;"
|
||||
"POINT_QAMATS=\u05B8;"
|
||||
"POINT_HOLAM=\u05B9;"
|
||||
"POINT_QUBUTS=\u05BB;"
|
||||
"POINT_DAGESH_OR_MAPIQ=\u05BC;"
|
||||
"POINT_METEG=\u05BD;"
|
||||
"PUNCTUATION_MAQAF=\u05BE;"
|
||||
"POINT_RAFE=\u05BF;"
|
||||
"PUNCTUATION_PASEQ=\u05C0;"
|
||||
"POINT_SHIN_DOT=\u05C1;"
|
||||
"POINT_SIN_DOT=\u05C2;"
|
||||
"PUNCTUATION_SOF_PASUQ=\u05C3;"
|
||||
"ALEF=\u05D0;"
|
||||
"BET=\u05D1;"
|
||||
"GIMEL=\u05D2;"
|
||||
"DALET=\u05D3;"
|
||||
"HE=\u05D4;"
|
||||
"VAV=\u05D5;"
|
||||
"ZAYIN=\u05D6;"
|
||||
"HET=\u05D7;"
|
||||
"TET=\u05D8;"
|
||||
"YOD=\u05D9;"
|
||||
"FINAL_KAF=\u05DA;"
|
||||
"KAF=\u05DB;"
|
||||
"LAMED=\u05DC;"
|
||||
"FINAL_MEM=\u05DD;"
|
||||
"MEM=\u05DE;"
|
||||
"FINAL_NUN=\u05DF;"
|
||||
"NUN=\u05E0;"
|
||||
"SAMEKH=\u05E1;"
|
||||
"AYIN=\u05E2;"
|
||||
"FINAL_PE=\u05E3;"
|
||||
"PE=\u05E4;"
|
||||
"FINAL_TSADI=\u05E5;"
|
||||
"TSADI=\u05E6;"
|
||||
"QOF=\u05E7;"
|
||||
"RESH=\u05E8;"
|
||||
"SHIN=\u05E9;"
|
||||
"TAV=\u05EA;"
|
||||
"YIDDISH_DOUBLE_VAV=\u05F0;"
|
||||
"YIDDISH_VAV_YOD=\u05F1;"
|
||||
"YIDDISH_DOUBLE_YOD=\u05F2;"
|
||||
"PUNCTUATION_GERESH=\u05F3;"
|
||||
"PUNCTUATION_GERSHAYIM=\u05F4;"
|
||||
|
||||
//wildcards
|
||||
//The values can be anything we don't use in this file: start at E000.
|
||||
|
||||
"letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
|
||||
|
||||
"softvowel=[eiyEIY];"
|
||||
|
||||
"vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
|
||||
|
||||
//?>{POINT_SHEVA}
|
||||
//?>{POINT_HATAF_SEGOL}
|
||||
//?>{POINT_HATAF_PATAH}
|
||||
//?>{POINT_HATAF_QAMATS}
|
||||
//?>{POINT_HIRIQ}
|
||||
//?>{POINT_TSERE}
|
||||
//?>{POINT_SEGOL}
|
||||
//?>{POINT_PATAH}
|
||||
//?>{POINT_QAMATS}
|
||||
//?>{POINT_HOLAM}
|
||||
//?>{POINT_QUBUTS}
|
||||
//?>{POINT_DAGESH_OR_MAPIQ}
|
||||
//?>{POINT_METEG}
|
||||
//?>{PUNCTUATION_MAQAF}
|
||||
//?>{POINT_RAFE}
|
||||
//?>{PUNCTUATION_PASEQ}
|
||||
//?>{POINT_SHIN_DOT}
|
||||
//?>{POINT_SIN_DOT}
|
||||
//?>{PUNCTUATION_SOF_PASUQ}
|
||||
|
||||
"a>{ALEF};"
|
||||
"A>{ALEF};"
|
||||
|
||||
"b>{BET};"
|
||||
"B>{BET};"
|
||||
|
||||
"c({softvowel}>{SAMEKH};"
|
||||
"C({softvowel}>{SAMEKH};"
|
||||
"c({letter}>{KAF};"
|
||||
"C({letter}>{KAF};"
|
||||
"c>{FINAL_KAF};"
|
||||
"C>{FINAL_KAF};"
|
||||
|
||||
"d>{DALET};"
|
||||
"D>{DALET};"
|
||||
|
||||
"e>{AYIN};"
|
||||
"E>{AYIN};"
|
||||
|
||||
"f({letter}>{PE};"
|
||||
"f>{FINAL_PE};"
|
||||
"F({letter}>{PE};"
|
||||
"F>{FINAL_PE};"
|
||||
|
||||
"g>{GIMEL};"
|
||||
"G>{GIMEL};"
|
||||
|
||||
"h>{HE};"
|
||||
"H>{HE};"
|
||||
|
||||
"i>{YOD};"
|
||||
"I>{YOD};"
|
||||
|
||||
"j>{DALET}{SHIN};"
|
||||
"J>{DALET}{SHIN};"
|
||||
|
||||
"kH>{HET};"
|
||||
"kh>{HET};"
|
||||
"KH>{HET};"
|
||||
"Kh>{HET};"
|
||||
"k({letter}>{KAF};"
|
||||
"K({letter}>{KAF};"
|
||||
"k>{FINAL_KAF};"
|
||||
"K>{FINAL_KAF};"
|
||||
|
||||
"l>{LAMED};"
|
||||
"L>{LAMED};"
|
||||
|
||||
"m({letter}>{MEM};"
|
||||
"m>{FINAL_MEM};"
|
||||
"M({letter}>{MEM};"
|
||||
"M>{FINAL_MEM};"
|
||||
|
||||
"n({letter}>{NUN};"
|
||||
"n>{FINAL_NUN};"
|
||||
"N({letter}>{NUN};"
|
||||
"N>{FINAL_NUN};"
|
||||
|
||||
"o>{VAV};"
|
||||
"O>{VAV};"
|
||||
|
||||
"p({letter}>{PE};"
|
||||
"p>{FINAL_PE};"
|
||||
"P({letter}>{PE};"
|
||||
"P>{FINAL_PE};"
|
||||
|
||||
"q>{QOF};"
|
||||
"Q>{QOF};"
|
||||
|
||||
"r>{RESH};"
|
||||
"R>{RESH};"
|
||||
|
||||
"sH>{SHIN};"
|
||||
"sh>{SHIN};"
|
||||
"SH>{SHIN};"
|
||||
"Sh>{SHIN};"
|
||||
"s>{SAMEKH};"
|
||||
"S>{SAMEKH};"
|
||||
|
||||
"th>{TAV};"
|
||||
"tH>{TAV};"
|
||||
"TH>{TAV};"
|
||||
"Th>{TAV};"
|
||||
"tS({letter}>{TSADI};"
|
||||
"ts({letter}>{TSADI};"
|
||||
"Ts({letter}>{TSADI};"
|
||||
"TS({letter}>{TSADI};"
|
||||
"tS>{FINAL_TSADI};"
|
||||
"ts>{FINAL_TSADI};"
|
||||
"Ts>{FINAL_TSADI};"
|
||||
"TS>{FINAL_TSADI};"
|
||||
"t>{TET};"
|
||||
"T>{TET};"
|
||||
|
||||
"u>{VAV};"
|
||||
"U>{VAV};"
|
||||
|
||||
"v>{VAV};"
|
||||
"V>{VAV};"
|
||||
|
||||
"w>{VAV};"
|
||||
"W>{VAV};"
|
||||
|
||||
"x>{KAF}{SAMEKH};"
|
||||
"X>{KAF}{SAMEKH};"
|
||||
|
||||
"y>{YOD};"
|
||||
"Y>{YOD};"
|
||||
|
||||
"z>{ZAYIN};"
|
||||
"Z>{ZAYIN};"
|
||||
|
||||
//#?>{YIDDISH_DOUBLE_VAV}
|
||||
//?>{YIDDISH_VAV_YOD}
|
||||
//?>{YIDDISH_DOUBLE_YOD}
|
||||
//?>{PUNCTUATION_GERESH}
|
||||
//?>{PUNCTUATION_GERSHAYIM}
|
||||
|
||||
"''>;"
|
||||
|
||||
//{POINT_SHEVA}>@
|
||||
//{POINT_HATAF_SEGOL}>@
|
||||
//{POINT_HATAF_PATAH}>@
|
||||
//{POINT_HATAF_QAMATS}>@
|
||||
//{POINT_HIRIQ}>@
|
||||
//{POINT_TSERE}>@
|
||||
//{POINT_SEGOL}>@
|
||||
//{POINT_PATAH}>@
|
||||
//{POINT_QAMATS}>@
|
||||
//{POINT_HOLAM}>@
|
||||
//{POINT_QUBUTS}>@
|
||||
//{POINT_DAGESH_OR_MAPIQ}>@
|
||||
//{POINT_METEG}>@
|
||||
//{PUNCTUATION_MAQAF}>@
|
||||
//{POINT_RAFE}>@
|
||||
//{PUNCTUATION_PASEQ}>@
|
||||
//{POINT_SHIN_DOT}>@
|
||||
//{POINT_SIN_DOT}>@
|
||||
//{PUNCTUATION_SOF_PASUQ}>@
|
||||
|
||||
"a<{ALEF};"
|
||||
"e<{AYIN};"
|
||||
"b<{BET};"
|
||||
"d<{DALET};"
|
||||
"k<{FINAL_KAF};"
|
||||
"m<{FINAL_MEM};"
|
||||
"n<{FINAL_NUN};"
|
||||
"p<{FINAL_PE};"
|
||||
"ts<{FINAL_TSADI};"
|
||||
"g<{GIMEL};"
|
||||
"kh<{HET};"
|
||||
"h<{HE};"
|
||||
"k''<{KAF}({HE};"
|
||||
"k<{KAF};"
|
||||
"l<{LAMED};"
|
||||
"m<{MEM};"
|
||||
"n<{NUN};"
|
||||
"p<{PE};"
|
||||
"q<{QOF};"
|
||||
"r<{RESH};"
|
||||
"s''<{SAMEKH}({HE};"
|
||||
"s<{SAMEKH};"
|
||||
"sh<{SHIN};"
|
||||
"th<{TAV};"
|
||||
"t''<{TET}({HE};"
|
||||
"t''<{TET}({SAMEKH};"
|
||||
"t''<{TET}({SHIN};"
|
||||
"t<{TET};"
|
||||
"ts<{TSADI};"
|
||||
"v<{VAV}({vowellike};"
|
||||
"u<{VAV};"
|
||||
"y<{YOD};"
|
||||
"z<{ZAYIN};"
|
||||
|
||||
//{YIDDISH_DOUBLE_VAV}>@
|
||||
//{YIDDISH_VAV_YOD}>@
|
||||
//{YIDDISH_DOUBLE_YOD}>@
|
||||
//{PUNCTUATION_GERESH}>@
|
||||
//{PUNCTUATION_GERSHAYIM}>@
|
||||
|
||||
"<'';"
|
||||
}
|
||||
}
|
@ -1,322 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 2000, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 01/13/2000 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
ljamo {
|
||||
Rule {
|
||||
// VARIABLES
|
||||
|
||||
"initial=[\u1100-\u115F];"
|
||||
"medial=[\u1160-\u11A7];"
|
||||
"final=[\u11A8-\u11F9];" // added - aliu
|
||||
"vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
"consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
|
||||
"ye=[yeYE];"
|
||||
"ywe=[yweYWE];"
|
||||
"yw=[ywYW];"
|
||||
"nl=[nlNL];"
|
||||
"gnl=[gnlGNL];"
|
||||
"lsgb=[lsgbLSGB];"
|
||||
"ywao=[ywaoYWAO];"
|
||||
"bl=[blBL];"
|
||||
|
||||
// RULES
|
||||
|
||||
// Hangul structure is IMF or IM
|
||||
// So you can have, because of adjacent sequences
|
||||
// IM, but not II or IF
|
||||
// MF or MI, but not MM
|
||||
// FI, but not FF or FM
|
||||
|
||||
// For English, we just have C or V.
|
||||
// To generate valid Hangul:
|
||||
// Vowels:
|
||||
// We insert IEUNG between VV, and otherwise map V to M
|
||||
// We also insert IEUNG if there is no
|
||||
// Consonants:
|
||||
// We don't break doubles
|
||||
// Cases like lmgg, we have to break at lm
|
||||
// So to guess whether a consonant is I or F
|
||||
// we map all C's to F, except when followed by a vowel, e.g.
|
||||
// X[{vowel}>CHOSEONG (initial)
|
||||
// X>JONGSEONG (final)
|
||||
|
||||
// special insertion for funny sequences of vowels, and for empty consonant
|
||||
|
||||
"'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
|
||||
"({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
||||
|
||||
// Fix casing.
|
||||
// Because Korean is caseless, we just want to treat everything as
|
||||
// lowercase.
|
||||
// we could do this by always preceeding this transliterator with
|
||||
// an upper-lowercase transformation, but that wouldn't invert nicely.
|
||||
// We use the "revisit" syntax to just convert latin to latin
|
||||
// so that we can avoid
|
||||
// having to restate all the Latin=>Jamo rules, with the I/F handling.
|
||||
|
||||
// We don't have to add titlecase, since that will be picked up
|
||||
// since the first letter is converted, then revisited. E.g.
|
||||
// |Gg => |gg => {sang kiyeok}
|
||||
// We do have to have all caps, since otherwise we could get:
|
||||
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
|
||||
|
||||
"Z > |z;"
|
||||
"YU > |yu;"
|
||||
"YO > |yo;"
|
||||
"YI > |yi;"
|
||||
"YEO > |yeo;"
|
||||
"YE > |ye;"
|
||||
"YAE > |yae;"
|
||||
"YA > |ya;"
|
||||
"Y > |y;"
|
||||
"WI > |wi;"
|
||||
"WEO > |weo;"
|
||||
"WE > |we;"
|
||||
"WAE > |wae;"
|
||||
"WA > |wa;"
|
||||
"W > |w;"
|
||||
"U > |u;"
|
||||
"T > |t;"
|
||||
"SS > |ss;"
|
||||
"S > |s;"
|
||||
"P > |p;"
|
||||
"OE > |oe;"
|
||||
"O > |o;"
|
||||
"NJ > |nj;"
|
||||
"NH > |nh;"
|
||||
"NG > |ng;"
|
||||
"N > |n;"
|
||||
"M > |m;"
|
||||
"LT > |lt;"
|
||||
"LS > |ls;"
|
||||
"LP > |lp;"
|
||||
"LM > |lm;"
|
||||
"LH > |lh;"
|
||||
"LG > |lg;"
|
||||
"LB > |lb;"
|
||||
"L > |l;"
|
||||
"K > |k;"
|
||||
"JJ > |jj;"
|
||||
"J > |j;"
|
||||
"I > |i;"
|
||||
"H > |h;"
|
||||
"GS > |gs;"
|
||||
"GG > |gg;"
|
||||
"G > |g;"
|
||||
"EU > |eu;"
|
||||
"EO > |eo;"
|
||||
"E > |e;"
|
||||
"DD > |dd;"
|
||||
"D > |d;"
|
||||
"BS > |bs;"
|
||||
"BB > |bb;"
|
||||
"B > |b;"
|
||||
"AE > |ae;"
|
||||
"A > |a;"
|
||||
|
||||
// APOSTROPHE
|
||||
|
||||
// As always, an apostrophe is used to separate digraphs into
|
||||
// singles. That is, if you really wanted [KAN][GGAN], instead
|
||||
// of [KANG][GAN] you would write "kan'ggan".
|
||||
|
||||
// Rules for inserting ' when mapping separated digraphs back
|
||||
// from Hangul to Latin. Catch every letter that can be the
|
||||
// LAST of a digraph (or multigraph) AND first of an initial
|
||||
|
||||
"'' < (l) (\u11c0;" // hangul jongseong thieuth
|
||||
"'' < ({lsgb}) (\u11ba;" // hangul jongseong sios
|
||||
"'' < (l) (\u11c1;" // hangul jongseong phieuph
|
||||
"'' < (l) (\u11b7;" // hangul jongseong mieum
|
||||
"'' < (n) (\u11bd;" // hangul jongseong cieuc
|
||||
"'' < ({nl}) (\u11c2;" // hangul jongseong hieuh
|
||||
"'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok
|
||||
"'' < ({bl}) (\u11b8;" // hangul jongseong pieup
|
||||
"'' < (d) (\u11ae;" // hangul jongseong tikeut
|
||||
|
||||
"'' < ({ye}) (\u116e;" // hangul jungseong u
|
||||
"'' < ({ywe}) (\u1169;" // hangul jungseong o
|
||||
"'' < ({yw}) (\u1175;" // hangul jungseong i
|
||||
"'' < ({ywao}) (\u1166;" // hangul jungseong e
|
||||
"'' < ({yw}) (\u1161;" // hangul jungseong a
|
||||
|
||||
"'' < (l) (\u1110;" // hangul choseong thieuth
|
||||
"'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios
|
||||
"'' < ({lsgb}) (\u1109;" // hangul choseong sios
|
||||
"'' < (l) (\u1111;" // hangul choseong phieuph
|
||||
"'' < (l) (\u1106;" // hangul choseong mieum
|
||||
"'' < (n) (\u110c;" // hangul choseong cieuc
|
||||
"'' < (n) (\u110d;"
|
||||
"'' < ({nl}) (\u1112;" // hangul choseong hieuh
|
||||
"'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok
|
||||
"'' < ({gnl}) (\u1100;" // hangul choseong kiyeok
|
||||
"'' < (d) (\u1103;" // hangul choseong tikeut
|
||||
"'' < (d) (\u1104;"
|
||||
"'' < ({bl}) (\u1107;" // hangul choseong pieup
|
||||
"'' < ({bl}) (\u1108;"
|
||||
|
||||
// INITIALS
|
||||
|
||||
"t ({vowel}) <> \u1110;" // hangul choseong thieuth
|
||||
"ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
|
||||
"s ({vowel}) <> \u1109;" // hangul choseong sios
|
||||
"p ({vowel}) <> \u1111;" // hangul choseong phieuph
|
||||
"n ({vowel}) <> \u1102;" // hangul choseong nieun
|
||||
"m ({vowel}) <> \u1106;" // hangul choseong mieum
|
||||
"l ({vowel}) <> \u1105;" // hangul choseong rieul
|
||||
"k ({vowel}) <> \u110f;" // hangul choseong khieukh
|
||||
"j ({vowel}) <> \u110c;" // hangul choseong cieuc
|
||||
"h ({vowel}) <> \u1112;" // hangul choseong hieuh
|
||||
"gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
|
||||
"g ({vowel}) <> \u1100;" // hangul choseong kiyeok
|
||||
"d ({vowel}) <> \u1103;" // hangul choseong tikeut
|
||||
"c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
||||
"b ({vowel}) <> \u1107;" // hangul choseong pieup
|
||||
"bb ({vowel}) <> \u1108;"
|
||||
"jj ({vowel}) <> \u110d;"
|
||||
"dd ({vowel}) <> \u1104;"
|
||||
|
||||
// If we have gotten through to these rules, and we start with
|
||||
// a consonant, then the remaining mappings would be to F,
|
||||
// because must have CC (or C<non-letter>), not CV.
|
||||
// If we have F before us, then
|
||||
// we would end up with FF, which is wrong. The simplest fix is
|
||||
// to still make it an initial, but also insert an "u",
|
||||
// so we end up with F, I, u, and then continue with the C
|
||||
|
||||
// special, only initial
|
||||
"bb > \u1108\u116e;" // hangul choseong ssangpieup
|
||||
"jj > \u1108\u110d;" // hangul choseong ssangcieuc
|
||||
"dd > \u1108\u1104;" // hangul choseong ssangtikeut
|
||||
|
||||
"({final}) t > \u1110\u116e;" // hangul choseong thieuth
|
||||
"({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
|
||||
"({final}) s > \u1109\u116e;" // hangul choseong sios
|
||||
"({final}) p > \u1111\u116e;" // hangul choseong phieuph
|
||||
"({final}) n > \u1102\u116e;" // hangul choseong nieun
|
||||
"({final}) m > \u1106\u116e;" // hangul choseong mieum
|
||||
"({final}) l > \u1105\u116e;" // hangul choseong rieul
|
||||
"({final}) k > \u110f\u116e;" // hangul choseong khieukh
|
||||
"({final}) j > \u110c\u116e;" // hangul choseong cieuc
|
||||
"({final}) h > \u1112\u116e;" // hangul choseong hieuh
|
||||
"({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
||||
"({final}) g > \u1100\u116e;" // hangul choseong kiyeok
|
||||
"({final}) d > \u1103\u116e;" // hangul choseong tikeut
|
||||
"({final}) c > \u110e\u116e;" // hangul choseong chieuch
|
||||
"({final}) b > \u1107\u116e;" // hangul choseong pieup
|
||||
|
||||
// MEDIALS after INITIALS
|
||||
|
||||
"({initial}) yu <> \u1172;" // hangul jungseong yu
|
||||
"({initial}) yo <> \u116d;" // hangul jungseong yo
|
||||
"({initial}) yi <> \u1174;" // hangul jungseong yi
|
||||
"({initial}) yeo <> \u1167;" // hangul jungseong yeo
|
||||
"({initial}) ye <> \u1168;" // hangul jungseong ye
|
||||
"({initial}) yae <> \u1164;" // hangul jungseong yae
|
||||
"({initial}) ya <> \u1163;" // hangul jungseong ya
|
||||
"({initial}) wi <> \u1171;" // hangul jungseong wi
|
||||
"({initial}) weo <> \u116f;" // hangul jungseong weo
|
||||
"({initial}) we <> \u1170;" // hangul jungseong we
|
||||
"({initial}) wae <> \u116b;" // hangul jungseong wae
|
||||
"({initial}) wa <> \u116a;" // hangul jungseong wa
|
||||
"({initial}) u <> \u116e;" // hangul jungseong u
|
||||
"({initial}) oe <> \u116c;" // hangul jungseong oe
|
||||
"({initial}) o <> \u1169;" // hangul jungseong o
|
||||
"({initial}) i <> \u1175;" // hangul jungseong i
|
||||
"({initial}) eu <> \u1173;" // hangul jungseong eu
|
||||
"({initial}) eo <> \u1165;" // hangul jungseong eo
|
||||
"({initial}) e <> \u1166;" // hangul jungseong e
|
||||
"({initial}) ae <> \u1162;" // hangul jungseong ae
|
||||
"({initial}) a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// MEDIALS (vowels) not after INITIALs
|
||||
|
||||
"yu > \u110B\u1172;" // hangul jungseong yu
|
||||
"yo > \u110B\u116d;" // hangul jungseong yo
|
||||
"yi > \u110B\u1174;" // hangul jungseong yi
|
||||
"yeo > \u110B\u1167;" // hangul jungseong yeo
|
||||
"ye > \u110B\u1168;" // hangul jungseong ye
|
||||
"yae > \u110B\u1164;" // hangul jungseong yae
|
||||
"ya > \u110B\u1163;" // hangul jungseong ya
|
||||
"wi > \u110B\u1171;" // hangul jungseong wi
|
||||
"weo > \u110B\u116f;" // hangul jungseong weo
|
||||
"we > \u110B\u1170;" // hangul jungseong we
|
||||
"wae > \u110B\u116b;" // hangul jungseong wae
|
||||
"wa > \u110B\u116a;" // hangul jungseong wa
|
||||
"u > \u110B\u116e;" // hangul jungseong u
|
||||
"oe > \u110B\u116c;" // hangul jungseong oe
|
||||
"o > \u110B\u1169;" // hangul jungseong o
|
||||
"i > \u110B\u1175;" // hangul jungseong i
|
||||
"eu > \u110B\u1173;" // hangul jungseong eu
|
||||
"eo > \u110B\u1165;" // hangul jungseong eo
|
||||
"e > \u110B\u1166;" // hangul jungseong e
|
||||
"ae > \u110B\u1162;" // hangul jungseong ae
|
||||
"a > \u110B\u1161;" // hangul jungseong a
|
||||
|
||||
|
||||
// FINALS
|
||||
|
||||
"t <> \u11c0;" // hangul jongseong thieuth
|
||||
"ss <> \u11bb;" // hangul jongseong ssangsios
|
||||
"s <> \u11ba;" // hangul jongseong sios
|
||||
"p <> \u11c1;" // hangul jongseong phieuph
|
||||
"nj <> \u11ac;" // hangul jongseong nieun-cieuc
|
||||
"nh <> \u11ad;" // hangul jongseong nieun-hieuh
|
||||
"ng <> \u11bc;" // hangul jongseong ieung
|
||||
"n <> \u11ab;" // hangul jongseong nieun
|
||||
"m <> \u11b7;" // hangul jongseong mieum
|
||||
"lt <> \u11b4;" // hangul jongseong rieul-thieuth
|
||||
"ls <> \u11b3;" // hangul jongseong rieul-sios
|
||||
"lp <> \u11b5;" // hangul jongseong rieul-phieuph
|
||||
"lm <> \u11b1;" // hangul jongseong rieul-mieum
|
||||
"lh <> \u11b6;" // hangul jongseong rieul-hieuh
|
||||
"lg <> \u11b0;" // hangul jongseong rieul-kiyeok
|
||||
"lb <> \u11b2;" // hangul jongseong rieul-pieup
|
||||
"l <> \u11af;" // hangul jongseong rieul
|
||||
"k <> \u11bf;" // hangul jongseong khieukh
|
||||
"j <> \u11bd;" // hangul jongseong cieuc
|
||||
"h <> \u11c2;" // hangul jongseong hieuh
|
||||
"gs <> \u11aa;" // hangul jongseong kiyeok-sios
|
||||
"gg <> \u11a9;" // hangul jongseong ssangkiyeok
|
||||
"g <> \u11a8;" // hangul jongseong kiyeok
|
||||
"d <> \u11ae;" // hangul jongseong tikeut
|
||||
"c <> \u11be;" // hangul jongseong chieuch
|
||||
"bs <> \u11b9;" // hangul jongseong pieup-sios
|
||||
"b <> \u11b8;" // hangul jongseong pieup
|
||||
|
||||
// extra English letters
|
||||
// {moved to bottom - aliu}
|
||||
|
||||
"z > |s;"
|
||||
//{ + "Z > |s;" } masked
|
||||
"x > |ks;"
|
||||
"X > |ks;"
|
||||
"v > |b;"
|
||||
"V > |b;"
|
||||
"r > |l;"
|
||||
"R > |l;"
|
||||
"q > |k;"
|
||||
"Q > |k;"
|
||||
"f > |p;"
|
||||
"F > |p;"
|
||||
//{ + "c > |k;" } masked
|
||||
"C > |k;"
|
||||
|
||||
"y > \u1172;" // hangul jungseong yu
|
||||
"w > \u1171;" // hangul jungseong wi
|
||||
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
"''>;"
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,2 +0,0 @@
|
||||
// This file is obsolete
|
||||
// See lcyril
|
@ -1,83 +0,0 @@
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (C) 1999, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// Date Name Description
|
||||
// 11/17/99 aliu Creation.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// StraightQuotes-CurlyQuotes
|
||||
|
||||
quotes {
|
||||
Rule {
|
||||
// Rewritten using character codes [LIU]
|
||||
"white=[[:Zs:][:Zl:][:Zp:]];"
|
||||
"black=[^{white}];"
|
||||
"open=[:Ps:];"
|
||||
"dquote=\";"
|
||||
|
||||
"lAng=\u3008;"
|
||||
"ldAng=\u300A;"
|
||||
"lBrk='[';"
|
||||
"lBrc='{';"
|
||||
|
||||
"lquote=\u2018;"
|
||||
"rquote=\u2019;"
|
||||
"ldquote=\u201C;"
|
||||
"rdquote=\u201D;"
|
||||
|
||||
"ldguill=\u00AB;"
|
||||
"rdguill=\u00BB;"
|
||||
"lguill=\u2039;"
|
||||
"rguill=\u203A;"
|
||||
|
||||
"mdash=\u2014;"
|
||||
|
||||
//#######################################
|
||||
// Conversions from input
|
||||
//#######################################
|
||||
|
||||
// join single quotes
|
||||
"{lquote}''>{ldquote};"
|
||||
"{lquote}{lquote}>{ldquote};"
|
||||
"{rquote}''>{rdquote};"
|
||||
"{rquote}{rquote}>{rdquote};"
|
||||
|
||||
//smart single quotes
|
||||
"{white})''>{lquote};"
|
||||
"{open})''>{lquote};"
|
||||
"{black})''>{rquote};"
|
||||
"''>{lquote};"
|
||||
|
||||
//smart doubles
|
||||
"{white}){dquote}>{ldquote};"
|
||||
"{open}){dquote}>{ldquote};"
|
||||
"{black}){dquote}>{rdquote};"
|
||||
"{dquote}>{ldquote};"
|
||||
|
||||
// join single guillemets
|
||||
"{rguill}{rguill}>{rdguill};"
|
||||
"'>>'>{rdguill};"
|
||||
"{lguill}{lguill}>{ldguill};"
|
||||
"'<<'>{ldguill};"
|
||||
|
||||
// prevent double spaces
|
||||
"\\ )\\ >;"
|
||||
|
||||
// join hyphens into dash
|
||||
"-->{mdash};"
|
||||
|
||||
//#######################################
|
||||
// Conversions back to input
|
||||
//#######################################
|
||||
|
||||
//smart quotes
|
||||
"''<{lquote};"
|
||||
"''<{rquote};"
|
||||
"{dquote}<{ldquote};"
|
||||
"{dquote}<{rdquote};"
|
||||
|
||||
//hyphens
|
||||
"--<{mdash};"
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user