no message
X-SVN-Rev: 791
This commit is contained in:
parent
485377d2bf
commit
49f2784a30
@ -1,276 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Fullwidth$Halfwidth extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule", ""
|
||||
|
||||
/* Mechanically generated from Unicode Character Database
|
||||
*/
|
||||
|
||||
// multicharacter
|
||||
|
||||
+ "\u30AC<>\uFF76\uFF9E;" // to KATAKANA LETTER GA
|
||||
+ "\u30AE<>\uFF77\uFF9E;" // to KATAKANA LETTER GI
|
||||
+ "\u30B0<>\uFF78\uFF9E;" // to KATAKANA LETTER GU
|
||||
+ "\u30B2<>\uFF79\uFF9E;" // to KATAKANA LETTER GE
|
||||
+ "\u30B4<>\uFF7A\uFF9E;" // to KATAKANA LETTER GO
|
||||
+ "\u30B6<>\uFF7B\uFF9E;" // to KATAKANA LETTER ZA
|
||||
+ "\u30B8<>\uFF7C\uFF9E;" // to KATAKANA LETTER ZI
|
||||
+ "\u30BA<>\uFF7D\uFF9E;" // to KATAKANA LETTER ZU
|
||||
+ "\u30BC<>\uFF7E\uFF9E;" // to KATAKANA LETTER ZE
|
||||
+ "\u30BE<>\uFF7F\uFF9E;" // to KATAKANA LETTER ZO
|
||||
+ "\u30C0<>\uFF80\uFF9E;" // to KATAKANA LETTER DA
|
||||
+ "\u30C2<>\uFF81\uFF9E;" // to KATAKANA LETTER DI
|
||||
+ "\u30C5<>\uFF82\uFF9E;" // to KATAKANA LETTER DU
|
||||
+ "\u30C7<>\uFF83\uFF9E;" // to KATAKANA LETTER DE
|
||||
+ "\u30C9<>\uFF84\uFF9E;" // to KATAKANA LETTER DO
|
||||
+ "\u30D0<>\uFF8A\uFF9E;" // to KATAKANA LETTER BA
|
||||
+ "\u30D1<>\uFF8A\uFF9F;" // to KATAKANA LETTER PA
|
||||
+ "\u30D3<>\uFF8B\uFF9E;" // to KATAKANA LETTER BI
|
||||
+ "\u30D4<>\uFF8B\uFF9F;" // to KATAKANA LETTER PI
|
||||
+ "\u30D6<>\uFF8C\uFF9E;" // to KATAKANA LETTER BU
|
||||
+ "\u30D7<>\uFF8C\uFF9F;" // to KATAKANA LETTER PU
|
||||
+ "\u30D9<>\uFF8D\uFF9E;" // to KATAKANA LETTER BE
|
||||
+ "\u30DA<>\uFF8D\uFF9F;" // to KATAKANA LETTER PE
|
||||
+ "\u30DC<>\uFF8E\uFF9E;" // to KATAKANA LETTER BO
|
||||
+ "\u30DD<>\uFF8E\uFF9F;" // to KATAKANA LETTER PO
|
||||
+ "\u30F4<>\uFF73\uFF9E;" // to KATAKANA LETTER VU
|
||||
+ "\u30F7<>\uFF9C\uFF9E;" // to KATAKANA LETTER VA
|
||||
+ "\u30FA<>\uFF66\uFF9E;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
+ "\uFF01<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
+ "\uFF02<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
+ "\uFF03<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
+ "\uFF04<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
+ "\uFF05<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
+ "\uFF06<>'&';" // from FULLWIDTH AMPERSAND
|
||||
+ "\uFF07<>'';" // from FULLWIDTH APOSTROPHE
|
||||
+ "\uFF08<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
+ "\uFF09<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
+ "\uFF0A<>'*';" // from FULLWIDTH ASTERISK
|
||||
+ "\uFF0B<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
+ "\uFF0C<>',';" // from FULLWIDTH COMMA
|
||||
+ "\uFF0D<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
+ "\uFF0E<>'.';" // from FULLWIDTH FULL STOP
|
||||
+ "\uFF0F<>'/';" // from FULLWIDTH SOLIDUS
|
||||
+ "\uFF10<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
+ "\uFF11<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
+ "\uFF12<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
+ "\uFF13<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
+ "\uFF14<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
+ "\uFF15<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
+ "\uFF16<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
+ "\uFF17<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
+ "\uFF18<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
+ "\uFF19<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
+ "\uFF1A<>':';" // from FULLWIDTH COLON
|
||||
+ "\uFF1B<>';';" // from FULLWIDTH SEMICOLON
|
||||
+ "\uFF1C<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
+ "\uFF1D<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
+ "\uFF1E<>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
+ "\uFF1F<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
+ "\uFF20<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
+ "\uFF21<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
+ "\uFF22<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
+ "\uFF23<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
+ "\uFF24<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
+ "\uFF25<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
+ "\uFF26<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
+ "\uFF27<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
+ "\uFF28<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
+ "\uFF29<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
+ "\uFF2A<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
+ "\uFF2B<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
+ "\uFF2C<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
+ "\uFF2D<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
+ "\uFF2E<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
+ "\uFF2F<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
+ "\uFF30<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
+ "\uFF31<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
+ "\uFF32<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
+ "\uFF33<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
+ "\uFF34<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
+ "\uFF35<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
+ "\uFF36<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
+ "\uFF37<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
+ "\uFF38<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
+ "\uFF39<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
+ "\uFF3A<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
+ "\uFF3B<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
+ "\uFF3C<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
+ "\uFF3D<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
+ "\uFF3E<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
+ "\uFF3F<>'_';" // from FULLWIDTH LOW LINE
|
||||
+ "\uFF40<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
+ "\uFF41<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
+ "\uFF42<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
+ "\uFF43<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
+ "\uFF44<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
+ "\uFF45<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
+ "\uFF46<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
+ "\uFF47<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
+ "\uFF48<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
+ "\uFF49<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
+ "\uFF4A<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
+ "\uFF4B<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
+ "\uFF4C<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
+ "\uFF4D<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
+ "\uFF4E<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
+ "\uFF4F<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
+ "\uFF50<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
+ "\uFF51<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
+ "\uFF52<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
+ "\uFF53<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
+ "\uFF54<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
+ "\uFF55<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
+ "\uFF56<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
+ "\uFF57<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
+ "\uFF58<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
+ "\uFF59<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
+ "\uFF5A<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
+ "\uFF5B<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
+ "\uFF5C<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
+ "\uFF5D<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
+ "\uFF5E<>'~';" // from FULLWIDTH TILDE
|
||||
+ "\u3002<>\uFF61;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
+ "\u300C<>\uFF62;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
+ "\u300D<>\uFF63;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
+ "\u3001<>\uFF64;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
+ "\u30FB<>\uFF65;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
+ "\u30F2<>\uFF66;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
+ "\u30A1<>\uFF67;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
+ "\u30A3<>\uFF68;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
+ "\u30A5<>\uFF69;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
+ "\u30A7<>\uFF6A;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
+ "\u30A9<>\uFF6B;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
+ "\u30E3<>\uFF6C;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
+ "\u30E5<>\uFF6D;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
+ "\u30E7<>\uFF6E;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
+ "\u30C3<>\uFF6F;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
+ "\u30FC<>\uFF70;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
+ "\u30A2<>\uFF71;" // to HALFWIDTH KATAKANA LETTER A
|
||||
+ "\u30A4<>\uFF72;" // to HALFWIDTH KATAKANA LETTER I
|
||||
+ "\u30A6<>\uFF73;" // to HALFWIDTH KATAKANA LETTER U
|
||||
+ "\u30A8<>\uFF74;" // to HALFWIDTH KATAKANA LETTER E
|
||||
+ "\u30AA<>\uFF75;" // to HALFWIDTH KATAKANA LETTER O
|
||||
+ "\u30AB<>\uFF76;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
+ "\u30AD<>\uFF77;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
+ "\u30AF<>\uFF78;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
+ "\u30B1<>\uFF79;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
+ "\u30B3<>\uFF7A;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
+ "\u30B5<>\uFF7B;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
+ "\u30B7<>\uFF7C;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
+ "\u30B9<>\uFF7D;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
+ "\u30BB<>\uFF7E;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
+ "\u30BD<>\uFF7F;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
+ "\u30BF<>\uFF80;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
+ "\u30C1<>\uFF81;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
+ "\u30C4<>\uFF82;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
+ "\u30C6<>\uFF83;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
+ "\u30C8<>\uFF84;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
+ "\u30CA<>\uFF85;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
+ "\u30CB<>\uFF86;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
+ "\u30CC<>\uFF87;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
+ "\u30CD<>\uFF88;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
+ "\u30CE<>\uFF89;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
+ "\u30CF<>\uFF8A;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
+ "\u30D2<>\uFF8B;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
+ "\u30D5<>\uFF8C;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
+ "\u30D8<>\uFF8D;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
+ "\u30DB<>\uFF8E;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
+ "\u30DE<>\uFF8F;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
+ "\u30DF<>\uFF90;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
+ "\u30E0<>\uFF91;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
+ "\u30E1<>\uFF92;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
+ "\u30E2<>\uFF93;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
+ "\u30E4<>\uFF94;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
+ "\u30E6<>\uFF95;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
+ "\u30E8<>\uFF96;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
+ "\u30E9<>\uFF97;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
+ "\u30EA<>\uFF98;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
+ "\u30EB<>\uFF99;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
+ "\u30EC<>\uFF9A;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
+ "\u30ED<>\uFF9B;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
+ "\u30EF<>\uFF9C;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
+ "\u30F3<>\uFF9D;" // to HALFWIDTH KATAKANA LETTER N
|
||||
+ "\u3099<>\uFF9E;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
+ "\u309A<>\uFF9F;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
+ "\u1160<>\uFFA0;" // to HALFWIDTH HANGUL FILLER
|
||||
+ "\u1100<>\uFFA1;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
+ "\u1101<>\uFFA2;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
+ "\u11AA<>\uFFA3;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
+ "\u1102<>\uFFA4;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
+ "\u11AC<>\uFFA5;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
+ "\u11AD<>\uFFA6;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
+ "\u1103<>\uFFA7;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
+ "\u1104<>\uFFA8;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
+ "\u1105<>\uFFA9;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
+ "\u11B0<>\uFFAA;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
+ "\u11B1<>\uFFAB;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
+ "\u11B2<>\uFFAC;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
+ "\u11B3<>\uFFAD;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
+ "\u11B4<>\uFFAE;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
+ "\u11B5<>\uFFAF;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
+ "\u111A<>\uFFB0;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
+ "\u1106<>\uFFB1;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
+ "\u1107<>\uFFB2;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
+ "\u1108<>\uFFB3;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
+ "\u1121<>\uFFB4;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
+ "\u1109<>\uFFB5;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
+ "\u110A<>\uFFB6;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
+ "\u110B<>\uFFB7;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
+ "\u110C<>\uFFB8;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
+ "\u110D<>\uFFB9;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
+ "\u110E<>\uFFBA;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
+ "\u110F<>\uFFBB;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
+ "\u1110<>\uFFBC;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
+ "\u1111<>\uFFBD;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
+ "\u1112<>\uFFBE;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
+ "\u1161<>\uFFC2;" // to HALFWIDTH HANGUL LETTER A
|
||||
+ "\u1162<>\uFFC3;" // to HALFWIDTH HANGUL LETTER AE
|
||||
+ "\u1163<>\uFFC4;" // to HALFWIDTH HANGUL LETTER YA
|
||||
+ "\u1164<>\uFFC5;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
+ "\u1165<>\uFFC6;" // to HALFWIDTH HANGUL LETTER EO
|
||||
+ "\u1166<>\uFFC7;" // to HALFWIDTH HANGUL LETTER E
|
||||
+ "\u1167<>\uFFCA;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
+ "\u1168<>\uFFCB;" // to HALFWIDTH HANGUL LETTER YE
|
||||
+ "\u1169<>\uFFCC;" // to HALFWIDTH HANGUL LETTER O
|
||||
+ "\u116A<>\uFFCD;" // to HALFWIDTH HANGUL LETTER WA
|
||||
+ "\u116B<>\uFFCE;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
+ "\u116C<>\uFFCF;" // to HALFWIDTH HANGUL LETTER OE
|
||||
+ "\u116D<>\uFFD2;" // to HALFWIDTH HANGUL LETTER YO
|
||||
+ "\u116E<>\uFFD3;" // to HALFWIDTH HANGUL LETTER U
|
||||
+ "\u116F<>\uFFD4;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
+ "\u1170<>\uFFD5;" // to HALFWIDTH HANGUL LETTER WE
|
||||
+ "\u1171<>\uFFD6;" // to HALFWIDTH HANGUL LETTER WI
|
||||
+ "\u1172<>\uFFD7;" // to HALFWIDTH HANGUL LETTER YU
|
||||
+ "\u1173<>\uFFDA;" // to HALFWIDTH HANGUL LETTER EU
|
||||
+ "\u1174<>\uFFDB;" // to HALFWIDTH HANGUL LETTER YI
|
||||
+ "\u1175<>\uFFDC;" // to HALFWIDTH HANGUL LETTER I
|
||||
+ "\uFFE0<>'\u00a2';" // from FULLWIDTH CENT SIGN
|
||||
+ "\uFFE1<>'\u00a3';" // from FULLWIDTH POUND SIGN
|
||||
+ "\uFFE2<>'\u00ac';" // from FULLWIDTH NOT SIGN
|
||||
+ "\uFFE3<>' '\u0304;" // from FULLWIDTH MACRON
|
||||
+ "\uFFE4<>'\u00a6';" // from FULLWIDTH BROKEN BAR
|
||||
+ "\uFFE5<>'\u00a5';" // from FULLWIDTH YEN SIGN
|
||||
+ "\uFFE6<>\u20A9;" // from FULLWIDTH WON SIGN
|
||||
+ "\u2502<>\uFFE8;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
+ "\u2190<>\uFFE9;" // to HALFWIDTH LEFTWARDS ARROW
|
||||
+ "\u2191<>\uFFEA;" // to HALFWIDTH UPWARDS ARROW
|
||||
+ "\u2192<>\uFFEB;" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
+ "\u2193<>\uFFEC;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
+ "\u25A0<>\uFFED;" // to HALFWIDTH BLACK SQUARE
|
||||
+ "\u25CB<>\uFFEE;" // to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,129 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$KeyboardEscape$Latin1 extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
"esc='';"
|
||||
+ "grave=`;"
|
||||
+ "acute='';"
|
||||
+ "hat=^;"
|
||||
+ "tilde=~;"
|
||||
+ "umlaut=:;"
|
||||
+ "ring=.;"
|
||||
+ "cedilla=,;"
|
||||
+ "slash=/;"
|
||||
+ "super=^;"
|
||||
|
||||
// Make keyboard entry of {esc} possible
|
||||
// and of backslash
|
||||
+ "'\\'{esc}>{esc};"
|
||||
+ "'\\\\'>'\\';"
|
||||
|
||||
// Long keys
|
||||
+ "cur{esc}>\u00A4;"
|
||||
+ "sec{esc}>\u00A7;"
|
||||
+ "not{esc}>\u00AC;"
|
||||
+ "mul{esc}>\u00D7;"
|
||||
+ "div{esc}>\u00F7;"
|
||||
|
||||
+ "\\ {esc}>\u00A0;" // non-breaking space
|
||||
+ "!{esc}>\u00A1;" // inverted exclamation
|
||||
+ "c/{esc}>\u00A2;" // cent sign
|
||||
+ "lb{esc}>\u00A3;" // pound sign
|
||||
+ "'|'{esc}>\u00A6;" // broken vertical bar
|
||||
+ ":{esc}>\u00A8;" // umlaut
|
||||
+ "{super}a{esc}>\u00AA;" // feminine ordinal
|
||||
+ "'<<'{esc}>\u00AB;"
|
||||
+ "r{esc}>\u00AE;"
|
||||
+ "--{esc}>\u00AF;"
|
||||
+ "-{esc}>\u00AD;"
|
||||
+ "+-{esc}>\u00B1;"
|
||||
+ "{super}2{esc}>\u00B2;"
|
||||
+ "{super}3{esc}>\u00B3;"
|
||||
+ "{acute}{esc}>\u00B4;"
|
||||
+ "m{esc}>\u00B5;"
|
||||
+ "para{esc}>\u00B6;"
|
||||
+ "dot{esc}>\u00B7;"
|
||||
+ "{cedilla}{esc}>\u00B8;"
|
||||
+ "{super}1{esc}>\u00B9;"
|
||||
+ "{super}o{esc}>\u00BA;" // masculine ordinal
|
||||
+ "'>>'{esc}>\u00BB;"
|
||||
+ "1/4{esc}>\u00BC;"
|
||||
+ "1/2{esc}>\u00BD;"
|
||||
+ "3/4{esc}>\u00BE;"
|
||||
+ "?{esc}>\u00BF;"
|
||||
+ "A{grave}{esc}>\u00C0;"
|
||||
+ "A{acute}{esc}>\u00C1;"
|
||||
+ "A{hat}{esc}>\u00C2;"
|
||||
+ "A{tilde}{esc}>\u00C3;"
|
||||
+ "A{umlaut}{esc}>\u00C4;"
|
||||
+ "A{ring}{esc}>\u00C5;"
|
||||
+ "AE{esc}>\u00C6;"
|
||||
+ "C{cedilla}{esc}>\u00C7;"
|
||||
+ "E{grave}{esc}>\u00C8;"
|
||||
+ "E{acute}{esc}>\u00C9;"
|
||||
+ "E{hat}{esc}>\u00CA;"
|
||||
+ "E{umlaut}{esc}>\u00CB;"
|
||||
+ "I{grave}{esc}>\u00CC;"
|
||||
+ "I{acute}{esc}>\u00CD;"
|
||||
+ "I{hat}{esc}>\u00CE;"
|
||||
+ "I{umlaut}{esc}>\u00CF;"
|
||||
+ "D-{esc}>\u00D0;"
|
||||
+ "N{tilde}{esc}>\u00D1;"
|
||||
+ "O{grave}{esc}>\u00D2;"
|
||||
+ "O{acute}{esc}>\u00D3;"
|
||||
+ "O{hat}{esc}>\u00D4;"
|
||||
+ "O{tilde}{esc}>\u00D5;"
|
||||
+ "O{umlaut}{esc}>\u00D6;"
|
||||
+ "O{slash}{esc}>\u00D8;"
|
||||
+ "U{grave}{esc}>\u00D9;"
|
||||
+ "U{acute}{esc}>\u00DA;"
|
||||
+ "U{hat}{esc}>\u00DB;"
|
||||
+ "U{umlaut}{esc}>\u00DC;"
|
||||
+ "Y{acute}{esc}>\u00DD;"
|
||||
+ "TH{esc}>\u00DE;"
|
||||
+ "ss{esc}>\u00DF;"
|
||||
+ "a{grave}{esc}>\u00E0;"
|
||||
+ "a{acute}{esc}>\u00E1;"
|
||||
+ "a{hat}{esc}>\u00E2;"
|
||||
+ "a{tilde}{esc}>\u00E3;"
|
||||
+ "a{umlaut}{esc}>\u00E4;"
|
||||
+ "a{ring}{esc}>\u00E5;"
|
||||
+ "ae{esc}>\u00E6;"
|
||||
+ "c{cedilla}{esc}>\u00E7;"
|
||||
+ "c{esc}>\u00A9;" // copyright - after c{cedilla}
|
||||
+ "e{grave}{esc}>\u00E8;"
|
||||
+ "e{acute}{esc}>\u00E9;"
|
||||
+ "e{hat}{esc}>\u00EA;"
|
||||
+ "e{umlaut}{esc}>\u00EB;"
|
||||
+ "i{grave}{esc}>\u00EC;"
|
||||
+ "i{acute}{esc}>\u00ED;"
|
||||
+ "i{hat}{esc}>\u00EE;"
|
||||
+ "i{umlaut}{esc}>\u00EF;"
|
||||
+ "d-{esc}>\u00F0;"
|
||||
+ "n{tilde}{esc}>\u00F1;"
|
||||
+ "o{grave}{esc}>\u00F2;"
|
||||
+ "o{acute}{esc}>\u00F3;"
|
||||
+ "o{hat}{esc}>\u00F4;"
|
||||
+ "o{tilde}{esc}>\u00F5;"
|
||||
+ "o{umlaut}{esc}>\u00F6;"
|
||||
+ "o{slash}{esc}>\u00F8;"
|
||||
+ "o{esc}>\u00B0;"
|
||||
+ "u{grave}{esc}>\u00F9;"
|
||||
+ "u{acute}{esc}>\u00FA;"
|
||||
+ "u{hat}{esc}>\u00FB;"
|
||||
+ "u{umlaut}{esc}>\u00FC;"
|
||||
+ "y{acute}{esc}>\u00FD;"
|
||||
+ "y{esc}>\u00A5;" // yen sign
|
||||
+ "th{esc}>\u00FE;"
|
||||
//masked: + "ss{esc}>\u00FF;"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,241 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Arabic extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
// To Do: finish adding shadda, add sokoon
|
||||
|
||||
"alefmadda=\u0622;"+
|
||||
"alefuhamza=\u0623;"+
|
||||
"wauuhamza=\u0624;"+
|
||||
"alefhamza=\u0625;"+
|
||||
"yehuhamza=\u0626;"+
|
||||
"alef=\u0627;"+
|
||||
"beh=\u0628;"+
|
||||
"tehmarbuta=\u0629;"+
|
||||
"teh=\u062A;"+
|
||||
"theh=\u062B;"+
|
||||
"geem=\u062C;"+
|
||||
"hah=\u062D;"+
|
||||
"kha=\u062E;"+
|
||||
"dal=\u062F;"+
|
||||
"dhal=\u0630;"+
|
||||
"reh=\u0631;"+
|
||||
"zain=\u0632;"+
|
||||
"seen=\u0633;"+
|
||||
"sheen=\u0634;"+
|
||||
"sad=\u0635;"+
|
||||
"dad=\u0636;"+
|
||||
"tah=\u0637;"+
|
||||
"zah=\u0638;"+
|
||||
"ein=\u0639;"+
|
||||
"ghein=\u063A;"+
|
||||
"feh=\u0641;"+
|
||||
"qaaf=\u0642;"+
|
||||
"kaf=\u0643;"+
|
||||
"lam=\u0644;"+
|
||||
"meem=\u0645;"+
|
||||
"noon=\u0646;"+
|
||||
"heh=\u0647;"+
|
||||
"wau=\u0648;"+
|
||||
"yehmaqsura=\u0649;"+
|
||||
"yeh=\u064A;"+
|
||||
"peh=\u06A4;"+
|
||||
|
||||
"hamza=\u0621;"+
|
||||
"fathatein=\u064B;"+
|
||||
"dammatein=\u064C;"+
|
||||
"kasratein=\u064D;"+
|
||||
"fatha=\u064E;"+
|
||||
"damma=\u064F;"+
|
||||
"kasra=\u0650;"+
|
||||
"shadda=\u0651;"+
|
||||
"sokoon=\u0652;"+
|
||||
|
||||
// convert English to Arabic
|
||||
"Arabic>"+
|
||||
"\u062a\u062a\u0645\u062a\u0639' '"+
|
||||
"\u0627\u0644\u0644\u063a\u0629' '"+
|
||||
"\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629' '"+
|
||||
"\u0628\u0628\u0646\u0638\u0645' '"+
|
||||
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"+
|
||||
"\u062c\u0645\u064a\u0644\u0629;"+
|
||||
|
||||
"ai>{alefmadda};"+
|
||||
"ae>{alefuhamza};"+
|
||||
"ao>{alefhamza};"+
|
||||
"aa>{alef};"+
|
||||
"an>{fathatein};"+
|
||||
"a>{fatha};"+
|
||||
"b>{beh};"+
|
||||
"c>{kaf};"+
|
||||
"{dhal})dh>{shadda};"+
|
||||
"dh>{dhal};"+
|
||||
"{dad})dd>{shadda};"+
|
||||
"dd>{dad};"+
|
||||
"{dal})d>{shadda};"+
|
||||
"d>{dal};"+
|
||||
"e>{ein};"+
|
||||
"f>{feh};"+
|
||||
"gh>{ghein};"+
|
||||
"g>{geem};"+
|
||||
"hh>{hah};"+
|
||||
"h>{heh};"+
|
||||
"ii>{kasratein};"+
|
||||
"i>{kasra};"+
|
||||
"j>{geem};"+
|
||||
"kh>{kha};"+
|
||||
"k>{kaf};"+
|
||||
"l>{lam};"+
|
||||
"m>{meem};"+
|
||||
"n>{noon};"+
|
||||
"o>{hamza};"+
|
||||
"p>{peh};"+
|
||||
"q>{qaaf};"+
|
||||
"r>{reh};"+
|
||||
"sh>{sheen};"+
|
||||
"ss>{sad};"+
|
||||
"s>{seen};"+
|
||||
"th>{theh};"+
|
||||
"tm>{tehmarbuta};"+
|
||||
"tt>{tah};"+
|
||||
"t>{teh};"+
|
||||
"uu>{dammatein};"+
|
||||
"u>{damma};"+
|
||||
"v>{beh};"+
|
||||
"we>{wauuhamza};"+
|
||||
"w>{wau};"+
|
||||
"x>{kaf}{shadda}{seen};"+
|
||||
"ye>{yehuhamza};"+
|
||||
"ym>{yehmaqsura};"+
|
||||
"y>{yeh};"+
|
||||
"zz>{zah};"+
|
||||
"z>{zain};"+
|
||||
|
||||
"0>\u0660;"+ // Arabic digit 0
|
||||
"1>\u0661;"+ // Arabic digit 1
|
||||
"2>\u0662;"+ // Arabic digit 2
|
||||
"3>\u0663;"+ // Arabic digit 3
|
||||
"4>\u0664;"+ // Arabic digit 4
|
||||
"5>\u0665;"+ // Arabic digit 5
|
||||
"6>\u0666;"+ // Arabic digit 6
|
||||
"7>\u0667;"+ // Arabic digit 7
|
||||
"8>\u0668;"+ // Arabic digit 8
|
||||
"9>\u0669;"+ // Arabic digit 9
|
||||
"%>\u066A;"+ // Arabic %
|
||||
".>\u066B;"+ // Arabic decimal separator
|
||||
",>\u066C;"+ // Arabic thousands separator
|
||||
"*>\u066D;"+ // Arabic five-pointed star
|
||||
|
||||
"`0>0;"+ // Escaped forms of the above
|
||||
"`1>1;"+
|
||||
"`2>2;"+
|
||||
"`3>3;"+
|
||||
"`4>4;"+
|
||||
"`5>5;"+
|
||||
"`6>6;"+
|
||||
"`7>7;"+
|
||||
"`8>8;"+
|
||||
"`9>9;"+
|
||||
"`%>%;"+
|
||||
"`.>.;"+
|
||||
"`,>,;"+
|
||||
"`*>*;"+
|
||||
"``>`;"+
|
||||
|
||||
"''>;"+
|
||||
|
||||
// now Arabic to English
|
||||
|
||||
"''ai<a){alefmadda};"+
|
||||
"ai<{alefmadda};"+
|
||||
"''ae<a){alefuhamza};"+
|
||||
"ae<{alefuhamza};"+
|
||||
"''ao<a){alefhamza};"+
|
||||
"ao<{alefhamza};"+
|
||||
"''aa<a){alef};"+
|
||||
"aa<{alef};"+
|
||||
"''an<a){fathatein};"+
|
||||
"an<{fathatein};"+
|
||||
"''a<a){fatha};"+
|
||||
"a<{fatha};"+
|
||||
"b<{beh};"+
|
||||
"''dh<d){dhal};"+
|
||||
"dh<{dhal};"+
|
||||
"''dd<d){dad};"+
|
||||
"dd<{dad};"+
|
||||
"''d<d){dal};"+
|
||||
"d<{dal};"+
|
||||
"''e<a){ein};"+
|
||||
"''e<w){ein};"+
|
||||
"''e<y){ein};"+
|
||||
"e<{ein};"+
|
||||
"f<{feh};"+
|
||||
"gh<{ghein};"+
|
||||
"''hh<d){hah};"+
|
||||
"''hh<t){hah};"+
|
||||
"''hh<k){hah};"+
|
||||
"''hh<s){hah};"+
|
||||
"hh<{hah};"+
|
||||
"''h<d){heh};"+
|
||||
"''h<t){heh};"+
|
||||
"''h<k){heh};"+
|
||||
"''h<s){heh};"+
|
||||
"h<{heh};"+
|
||||
"''ii<i){kasratein};"+
|
||||
"ii<{kasratein};"+
|
||||
"''i<i){kasra};"+
|
||||
"i<{kasra};"+
|
||||
"j<{geem};"+
|
||||
"kh<{kha};"+
|
||||
"x<{kaf}{shadda}{seen};"+
|
||||
"k<{kaf};"+
|
||||
"l<{lam};"+
|
||||
"''m<y){meem};"+
|
||||
"''m<t){meem};"+
|
||||
"m<{meem};"+
|
||||
"n<{noon};"+
|
||||
"''o<a){hamza};"+
|
||||
"o<{hamza};"+
|
||||
"p<{peh};"+
|
||||
"q<{qaaf};"+
|
||||
"r<{reh};"+
|
||||
"sh<{sheen};"+
|
||||
"''ss<s){sad};"+
|
||||
"ss<{sad};"+
|
||||
"''s<s){seen};"+
|
||||
"s<{seen};"+
|
||||
"th<{theh};"+
|
||||
"tm<{tehmarbuta};"+
|
||||
"''tt<t){tah};"+
|
||||
"tt<{tah};"+
|
||||
"''t<t){teh};"+
|
||||
"t<{teh};"+
|
||||
"''uu<u){dammatein};"+
|
||||
"uu<{dammatein};"+
|
||||
"''u<u){damma};"+
|
||||
"u<{damma};"+
|
||||
"we<{wauuhamza};"+
|
||||
"w<{wau};"+
|
||||
"ye<{yehuhamza};"+
|
||||
"ym<{yehmaqsura};"+
|
||||
"''y<y){yeh};"+
|
||||
"y<{yeh};"+
|
||||
"''zz<z){zah};"+
|
||||
"zz<{zah};"+
|
||||
"''z<z){zain};"+
|
||||
"z<{zain};"+
|
||||
|
||||
"dh<dh){shadda};"+
|
||||
"dd<dd){shadda};"+
|
||||
"''d<d){shadda};"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,310 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Cyrillic extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule", ""
|
||||
|
||||
/* This class is designed to be a general Latin-Cyrillic
|
||||
transliteration. The standard Russian transliterations
|
||||
are generally used for the letters from Russian,
|
||||
with additional Cyrillic characters given consistent
|
||||
mappings.
|
||||
*/
|
||||
|
||||
+ "S-hacek=\u0160;"
|
||||
+ "s-hacek=\u0161;"
|
||||
|
||||
+ "YO=\u0401;"
|
||||
+ "J=\u0408;"
|
||||
+ "A=\u0410;"
|
||||
+ "B=\u0411;"
|
||||
+ "V=\u0412;"
|
||||
+ "G=\u0413;"
|
||||
+ "D=\u0414;"
|
||||
+ "YE=\u0415;"
|
||||
+ "ZH=\u0416;"
|
||||
+ "Z=\u0417;"
|
||||
+ "YI=\u0418;"
|
||||
+ "Y=\u0419;"
|
||||
+ "K=\u041A;"
|
||||
+ "L=\u041B;"
|
||||
+ "M=\u041C;"
|
||||
+ "N=\u041D;"
|
||||
+ "O=\u041E;"
|
||||
+ "P=\u041F;"
|
||||
+ "R=\u0420;"
|
||||
+ "S=\u0421;"
|
||||
+ "T=\u0422;"
|
||||
+ "U=\u0423;"
|
||||
+ "F=\u0424;"
|
||||
+ "KH=\u0425;"
|
||||
+ "TS=\u0426;"
|
||||
+ "CH=\u0427;"
|
||||
+ "SH=\u0428;"
|
||||
+ "SHCH=\u0429;"
|
||||
+ "HARD=\u042A;"
|
||||
+ "I=\u042B;"
|
||||
+ "SOFT=\u042C;"
|
||||
+ "E=\u042D;"
|
||||
+ "YU=\u042E;"
|
||||
+ "YA=\u042F;"
|
||||
|
||||
// Lowercase
|
||||
|
||||
+ "a=\u0430;"
|
||||
+ "b=\u0431;"
|
||||
+ "v=\u0432;"
|
||||
+ "g=\u0433;"
|
||||
+ "d=\u0434;"
|
||||
+ "ye=\u0435;"
|
||||
+ "zh=\u0436;"
|
||||
+ "z=\u0437;"
|
||||
+ "yi=\u0438;"
|
||||
+ "y=\u0439;"
|
||||
+ "k=\u043a;"
|
||||
+ "l=\u043b;"
|
||||
+ "m=\u043c;"
|
||||
+ "n=\u043d;"
|
||||
+ "o=\u043e;"
|
||||
+ "p=\u043f;"
|
||||
+ "r=\u0440;"
|
||||
+ "s=\u0441;"
|
||||
+ "t=\u0442;"
|
||||
+ "u=\u0443;"
|
||||
+ "f=\u0444;"
|
||||
+ "kh=\u0445;"
|
||||
+ "ts=\u0446;"
|
||||
+ "ch=\u0447;"
|
||||
+ "sh=\u0448;"
|
||||
+ "shch=\u0449;"
|
||||
+ "hard=\u044a;"
|
||||
+ "i=\u044b;"
|
||||
+ "soft=\u044c;"
|
||||
+ "e=\u044d;"
|
||||
+ "yu=\u044e;"
|
||||
+ "ya=\u044f;"
|
||||
|
||||
+ "yo=\u0451;"
|
||||
+ "j=\u0458;"
|
||||
|
||||
// variables
|
||||
// some are duplicated so lowercasing works
|
||||
|
||||
+ "csoft=[eiyEIY];"
|
||||
+ "CSOFT=[eiyEIY];"
|
||||
|
||||
+ "BECOMES_H=[{HARD}{hard}];"
|
||||
+ "becomes_h=[{HARD}{hard}];"
|
||||
|
||||
+ "BECOMES_S=[{S}{s}];"
|
||||
+ "becomes_s=[{S}{s}];"
|
||||
|
||||
+ "BECOMES_C=[{CH}{ch}];"
|
||||
+ "becomes_c=[{CH}{ch}];"
|
||||
|
||||
+ "BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
+ "becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
|
||||
+ "letter=[[:Lu:][:Ll:]];"
|
||||
+ "lower=[[:Ll:]];"
|
||||
|
||||
/*
|
||||
Modified to combine display transliterator and typing transliterator.
|
||||
The display mapping uses accents for the "soft" vowels.
|
||||
It does not, although it could, use characters like \u0161 instead of digraphs
|
||||
like sh.
|
||||
*/
|
||||
|
||||
// #############################################
|
||||
// Special titlecase forms, not duplicated
|
||||
// #############################################
|
||||
|
||||
+ "Ch>{CH};" + "Ch<{CH}({lower};"
|
||||
+ "Kh>{KH};" + "Kh<{KH}({lower};"
|
||||
+ "Shch>{SHCH};" + "Shch<{SHCH}({lower};"
|
||||
+ "Sh>{SH};" + "Sh<{SH}({lower};"
|
||||
+ "Ts>{TS};" + "Ts<{TS}({lower};"
|
||||
+ "Zh>{ZH};" + "Zh<{ZH}({lower};"
|
||||
+ "Yi>{YI};" //+ "Yi<{YI}({lower};"
|
||||
+ "Ye>{YE};" //+ "Ye<{YE}({lower};"
|
||||
+ "Yo>{YO};" //+ "Yo<{YO}({lower};"
|
||||
+ "Yu>{YU};" //+ "Yu<{YU}({lower};"
|
||||
+ "Ya>{YA};" //+ "Ya<{YA}({lower};"
|
||||
|
||||
// #############################################
|
||||
// Rules to Duplicate
|
||||
// To get the lowercase versions, copy these and lowercase
|
||||
// #############################################
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
+ "SHTCH>{SHCH};"
|
||||
+ "TCH>{CH};"
|
||||
+ "TH>{Z};"
|
||||
+ "Q>{K};"
|
||||
+ "WH>{V};"
|
||||
+ "W>{V};"
|
||||
+ "X>{K}{S};" //+ "X<{K}{S};"
|
||||
|
||||
// Separate letters that would otherwise join
|
||||
|
||||
+ "SH''<{SH}({BECOMES_C};"
|
||||
+ "T''<{T}({BECOMES_S};"
|
||||
|
||||
+ "K''<{K}({BECOMES_H};"
|
||||
+ "S''<{S}({BECOMES_H};"
|
||||
+ "T''<{T}({BECOMES_H};"
|
||||
+ "Z''<{Z}({BECOMES_H};"
|
||||
|
||||
+ "Y''<{Y}({BECOMES_VOWEL};"
|
||||
|
||||
// Main letters
|
||||
|
||||
+ "A<>{A};"
|
||||
+ "B<>{B};"
|
||||
+ "CH<>{CH};"
|
||||
+ "D<>{D};"
|
||||
+ "E<>{E};"
|
||||
+ "F<>{F};"
|
||||
+ "G<>{G};"
|
||||
+ "\u00cc<>{YI};"
|
||||
+ "I<>{I};"
|
||||
+ "KH<>{KH};"
|
||||
+ "K<>{K};"
|
||||
+ "L<>{L};"
|
||||
+ "M<>{M};"
|
||||
+ "N<>{N};"
|
||||
+ "O<>{O};"
|
||||
+ "P<>{P};"
|
||||
+ "R<>{R};"
|
||||
+ "SHCH<>{SHCH};"
|
||||
+ "SH>{SH};" //+ "SH<{SH};"
|
||||
+ "{S-hacek}<>{SH};"
|
||||
+ "S<>{S};"
|
||||
+ "TS<>{TS};"
|
||||
+ "T<>{T};"
|
||||
+ "U<>{U};"
|
||||
+ "V<>{V};"
|
||||
//\u00cc\u00c0\u00c8\u00d2\u00d9
|
||||
+ "YE>{YE};" //+ "YE<{YE};"
|
||||
+ "\u00c8<>{YE};"
|
||||
+ "YO>{YO};" //+ "YO<{YO};"
|
||||
+ "\u00d2<>{YO};"
|
||||
+ "YU>{YU};" //+ "YU<{YU};"
|
||||
+ "\u00d9<>{YU};"
|
||||
+ "YA>{YA};" //+ "YA<{YA};"
|
||||
+ "\u00c0<>{YA};"
|
||||
+ "Y<>{Y};"
|
||||
+ "ZH<>{ZH};"
|
||||
+ "Z<>{Z};"
|
||||
|
||||
+ "H<>{HARD};"
|
||||
+ "\u0178<>{SOFT};"
|
||||
|
||||
// Non-russian
|
||||
|
||||
+ "J<>{J};"
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
+ "C({csoft}>{S};"
|
||||
+ "C>{K};"
|
||||
|
||||
// #############################################
|
||||
// Duplicated Rules
|
||||
// Copy and lowercase the above rules
|
||||
// #############################################
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
+ "shtch>{shch};"
|
||||
+ "tch>{ch};"
|
||||
+ "th>{z};"
|
||||
+ "q>{k};"
|
||||
+ "wh>{v};"
|
||||
+ "w>{v};"
|
||||
+ "x>{k}{s};" //+ "x<{k}{s};"
|
||||
|
||||
// separate letters that would otherwise join
|
||||
|
||||
+ "sh''<{sh}({becomes_c};"
|
||||
+ "t''<{t}({becomes_s};"
|
||||
|
||||
+ "k''<{k}({becomes_h};"
|
||||
+ "s''<{s}({becomes_h};"
|
||||
+ "t''<{t}({becomes_h};"
|
||||
+ "z''<{z}({becomes_h};"
|
||||
|
||||
+ "y''<{y}({becomes_vowel};"
|
||||
|
||||
// main letters
|
||||
|
||||
+ "a<>{a};"
|
||||
+ "b<>{b};"
|
||||
+ "ch<>{ch};"
|
||||
+ "d<>{d};"
|
||||
+ "e<>{e};"
|
||||
+ "f<>{f};"
|
||||
+ "g<>{g};"
|
||||
+ "\u00ec<>{yi};"
|
||||
+ "i<>{i};"
|
||||
+ "kh<>{kh};"
|
||||
+ "k<>{k};"
|
||||
+ "l<>{l};"
|
||||
+ "m<>{m};"
|
||||
+ "n<>{n};"
|
||||
+ "o<>{o};"
|
||||
+ "p<>{p};"
|
||||
+ "r<>{r};"
|
||||
+ "shch<>{shch};"
|
||||
+ "sh>{sh};" //+ "sh<{sh};"
|
||||
+ "{s-hacek}<>{sh};"
|
||||
+ "s<>{s};"
|
||||
+ "ts<>{ts};"
|
||||
+ "t<>{t};"
|
||||
+ "u<>{u};"
|
||||
+ "v<>{v};"
|
||||
//\u00ec\u00e0\u00e8\u00f2\u00f9
|
||||
+ "ye>{ye};" //+ "ye<{ye};"
|
||||
+ "\u00e8<>{ye};"
|
||||
+ "yo>{yo};" //+ "yo<{yo};"
|
||||
+ "\u00f2<>{yo};"
|
||||
+ "yu>{yu};" //+ "yu<{yu};"
|
||||
+ "\u00f9<>{yu};"
|
||||
+ "ya>{ya};" //+ "ya<{ya};"
|
||||
+ "\u00e0<>{ya};"
|
||||
+ "y<>{y};"
|
||||
+ "zh<>{zh};"
|
||||
+ "z<>{z};"
|
||||
|
||||
+ "h<>{hard};"
|
||||
+ "\u00ff<>{soft};"
|
||||
|
||||
// non-russian
|
||||
|
||||
+ "j<>{j};"
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
+ "c({csoft}>{s};"
|
||||
+ "c>{k};"
|
||||
|
||||
|
||||
|
||||
// #############################################
|
||||
// End of Duplicated Rules
|
||||
// #############################################
|
||||
|
||||
//generally the last rule
|
||||
+ "''>;"
|
||||
//the end
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,409 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Devanagari extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
//#####################################################################
|
||||
// Keyboard Transliteration Table
|
||||
//#####################################################################
|
||||
// Conversions should be:
|
||||
// 1. complete
|
||||
// * convert every sequence of Latin letters (a to z plus apostrophe)
|
||||
// to a sequence of Native letters
|
||||
// * convert every sequence of Native letters to Latin letters
|
||||
// 2. reversable
|
||||
// * any string of Native converted to Latin and back should be the same
|
||||
// * this is not true for English converted to Native & back, e.g.:
|
||||
// k -> {kaf} -> k
|
||||
// c -> {kaf} -> k
|
||||
//#####################################################################
|
||||
// Sequences of Latin letters may convert to a single Native letter.
|
||||
// When this is the case, an apostrophe can be used to indicate separate
|
||||
// letters.$
|
||||
// E.g. sh -> {shin}
|
||||
// s'h -> {sin}{heh}
|
||||
// ss -> {sad}
|
||||
// s's -> {sin}{shadda}
|
||||
//#####################################################################
|
||||
// To Do:
|
||||
// finish adding shadda, add sokoon, fix uppercase
|
||||
// make two transliteration tables: one with vowels, one without
|
||||
//#####################################################################
|
||||
// Modifications
|
||||
// Devanagari Transliterator: broken up with consonsants/vowels
|
||||
//#####################################################################
|
||||
// Unicode character name definitions
|
||||
//#####################################################################
|
||||
|
||||
//consonants
|
||||
"candrabindu=\u0901;"
|
||||
+ "bindu=\u0902;"
|
||||
+ "visarga=\u0903;"
|
||||
|
||||
// w<vowel> represents the stand-alone form
|
||||
+ "wa=\u0905;"
|
||||
+ "waa=\u0906;"
|
||||
+ "wi=\u0907;"
|
||||
+ "wii=\u0908;"
|
||||
+ "wu=\u0909;"
|
||||
+ "wuu=\u090A;"
|
||||
+ "wr=\u090B;"
|
||||
+ "wl=\u090C;"
|
||||
+ "we=\u090F;"
|
||||
+ "wai=\u0910;"
|
||||
+ "wo=\u0913;"
|
||||
+ "wau=\u0914;"
|
||||
|
||||
+ "ka=\u0915;"
|
||||
+ "kha=\u0916;"
|
||||
+ "ga=\u0917;"
|
||||
+ "gha=\u0918;"
|
||||
+ "nga=\u0919;"
|
||||
|
||||
+ "ca=\u091A;"
|
||||
+ "cha=\u091B;"
|
||||
+ "ja=\u091C;"
|
||||
+ "jha=\u091D;"
|
||||
+ "nya=\u091E;"
|
||||
|
||||
+ "tta=\u091F;"
|
||||
+ "ttha=\u0920;"
|
||||
+ "dda=\u0921;"
|
||||
+ "ddha=\u0922;"
|
||||
+ "nna=\u0923;"
|
||||
|
||||
+ "ta=\u0924;"
|
||||
+ "tha=\u0925;"
|
||||
+ "da=\u0926;"
|
||||
+ "dha=\u0927;"
|
||||
+ "na=\u0928;"
|
||||
|
||||
+ "pa=\u092A;"
|
||||
+ "pha=\u092B;"
|
||||
+ "ba=\u092C;"
|
||||
+ "bha=\u092D;"
|
||||
+ "ma=\u092E;"
|
||||
|
||||
+ "ya=\u092F;"
|
||||
+ "ra=\u0930;"
|
||||
+ "rra=\u0931;"
|
||||
+ "la=\u0933;"
|
||||
+ "va=\u0935;"
|
||||
|
||||
+ "sha=\u0936;"
|
||||
+ "ssa=\u0937;"
|
||||
+ "sa=\u0938;"
|
||||
+ "ha=\u0939;"
|
||||
|
||||
// <vowel> represents the dependent form
|
||||
+ "aa=\u093E;"
|
||||
+ "i=\u093F;"
|
||||
+ "ii=\u0940;"
|
||||
+ "u=\u0941;"
|
||||
+ "uu=\u0942;"
|
||||
+ "rh=\u0943;"
|
||||
+ "lh=\u0944;"
|
||||
+ "e=\u0947;"
|
||||
+ "ai=\u0948;"
|
||||
+ "o=\u094B;"
|
||||
+ "au=\u094C;"
|
||||
|
||||
+ "virama=\u094D;"
|
||||
|
||||
+ "wrr=\u0960;"
|
||||
+ "rrh=\u0962;"
|
||||
|
||||
+ "danda=\u0964;"
|
||||
+ "doubleDanda=\u0965;"
|
||||
+ "depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
|
||||
+ "depVowelBelow=[\u0941-\u0944];"
|
||||
+ "endThing=[{danda}{doubleDanda}\u0000-\u08FF\u0980-\uFFFF];"
|
||||
|
||||
+ "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
|
||||
+ "%=[bcdfghjklmnpqrstvwxyz];"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Latin letters to Native letters
|
||||
//#####################################################################
|
||||
//Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
|
||||
|
||||
// special forms with no good conversion
|
||||
|
||||
+ "mm>{bindu};"
|
||||
+ "x>{visarga};"
|
||||
|
||||
// convert to independent forms at start of word or syllable:
|
||||
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
// Moved up [LIU]
|
||||
|
||||
+ "aa>{waa};"
|
||||
+ "ai>{wai};"
|
||||
+ "au>{wau};"
|
||||
+ "ii>{wii};"
|
||||
+ "i>{wi};"
|
||||
+ "uu>{wuu};"
|
||||
+ "u>{wu};"
|
||||
+ "rrh>{wrr};"
|
||||
+ "rh>{wr};"
|
||||
+ "lh>{wl};"
|
||||
+ "e>{we};"
|
||||
+ "o>{wo};"
|
||||
+ "a>{wa};"
|
||||
|
||||
// normal consonants
|
||||
|
||||
+ "kh>{kha}|{virama};"
|
||||
+ "k>{ka}|{virama};"
|
||||
+ "q>{ka}|{virama};"
|
||||
+ "gh>{gha}|{virama};"
|
||||
+ "g>{ga}|{virama};"
|
||||
+ "ng>{nga}|{virama};"
|
||||
+ "ch>{cha}|{virama};"
|
||||
+ "c>{ca}|{virama};"
|
||||
+ "jh>{jha}|{virama};"
|
||||
+ "j>{ja}|{virama};"
|
||||
+ "ny>{nya}|{virama};"
|
||||
+ "tth>{ttha}|{virama};"
|
||||
+ "tt>{tta}|{virama};"
|
||||
+ "ddh>{ddha}|{virama};"
|
||||
+ "dd>{dda}|{virama};"
|
||||
+ "nn>{nna}|{virama};"
|
||||
+ "th>{tha}|{virama};"
|
||||
+ "t>{ta}|{virama};"
|
||||
+ "dh>{dha}|{virama};"
|
||||
+ "d>{da}|{virama};"
|
||||
+ "n>{na}|{virama};"
|
||||
+ "ph>{pha}|{virama};"
|
||||
+ "p>{pa}|{virama};"
|
||||
+ "bh>{bha}|{virama};"
|
||||
+ "b>{ba}|{virama};"
|
||||
+ "m>{ma}|{virama};"
|
||||
+ "y>{ya}|{virama};"
|
||||
+ "r>{ra}|{virama};"
|
||||
+ "l>{la}|{virama};"
|
||||
+ "v>{va}|{virama};"
|
||||
+ "f>{va}|{virama};"
|
||||
+ "w>{va}|{virama};"
|
||||
+ "sh>{sha}|{virama};"
|
||||
+ "ss>{ssa}|{virama};"
|
||||
+ "s>{sa}|{virama};"
|
||||
+ "z>{sa}|{virama};"
|
||||
+ "h>{ha}|{virama};"
|
||||
|
||||
+ ".>{danda};"
|
||||
+ "{danda}.>{doubleDanda};"
|
||||
+ "{depVowelAbove})~>{bindu};"
|
||||
+ "{depVowelBelow})~>{candrabindu};"
|
||||
|
||||
// convert to dependent forms after consonant with no vowel:
|
||||
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
|
||||
+ "{virama}aa>{aa};"
|
||||
+ "{virama}ai>{ai};"
|
||||
+ "{virama}au>{au};"
|
||||
+ "{virama}ii>{ii};"
|
||||
+ "{virama}i>{i};"
|
||||
+ "{virama}uu>{uu};"
|
||||
+ "{virama}u>{u};"
|
||||
+ "{virama}rrh>{rrh};"
|
||||
+ "{virama}rh>{rh};"
|
||||
+ "{virama}lh>{lh};"
|
||||
+ "{virama}e>{e};"
|
||||
+ "{virama}o>{o};"
|
||||
+ "{virama}a>;"
|
||||
|
||||
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
|
||||
+ "{virama}''aa>{waa};"
|
||||
+ "{virama}''ai>{wai};"
|
||||
+ "{virama}''au>{wau};"
|
||||
+ "{virama}''ii>{wii};"
|
||||
+ "{virama}''i>{wi};"
|
||||
+ "{virama}''uu>{wuu};"
|
||||
+ "{virama}''u>{wu};"
|
||||
+ "{virama}''rrh>{wrr};"
|
||||
+ "{virama}''rh>{wr};"
|
||||
+ "{virama}''lh>{wl};"
|
||||
+ "{virama}''e>{we};"
|
||||
+ "{virama}''o>{wo};"
|
||||
+ "{virama}''a>{wa};"
|
||||
|
||||
+ "{virama}({endThing}>;"
|
||||
|
||||
// convert any left-over apostrophes used for separation
|
||||
|
||||
+ "''>;"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Native letters to Latin letters
|
||||
//#####################################################################
|
||||
|
||||
// special forms with no good conversion
|
||||
|
||||
+ "mm<{bindu};"
|
||||
+ "x<{visarga};"
|
||||
|
||||
// normal consonants
|
||||
|
||||
+ "kh<{kha}(&;"
|
||||
+ "kha<{kha};"
|
||||
+ "k''<{ka}{virama}({ha};"
|
||||
+ "k<{ka}(&;"
|
||||
+ "ka<{ka};"
|
||||
+ "gh<{gha}(&;"
|
||||
+ "gha<{gha};"
|
||||
+ "g''<{ga}{virama}({ha};"
|
||||
+ "g<{ga}(&;"
|
||||
+ "ga<{ga};"
|
||||
+ "ng<{nga}(&;"
|
||||
+ "nga<{nga};"
|
||||
+ "ch<{cha}(&;"
|
||||
+ "cha<{cha};"
|
||||
+ "c''<{ca}{virama}({ha};"
|
||||
+ "c<{ca}(&;"
|
||||
+ "ca<{ca};"
|
||||
+ "jh<{jha}(&;"
|
||||
+ "jha<{jha};"
|
||||
+ "j''<{ja}{virama}({ha};"
|
||||
+ "j<{ja}(&;"
|
||||
+ "ja<{ja};"
|
||||
+ "ny<{nya}(&;"
|
||||
+ "nya<{nya};"
|
||||
+ "tth<{ttha}(&;"
|
||||
+ "ttha<{ttha};"
|
||||
+ "tt''<{tta}{virama}({ha};"
|
||||
+ "tt<{tta}(&;"
|
||||
+ "tta<{tta};"
|
||||
+ "ddh<{ddha}(&;"
|
||||
+ "ddha<{ddha};"
|
||||
+ "dd''<{dda}(&{ha};"
|
||||
+ "dd<{dda}(&;"
|
||||
+ "dda<{dda};"
|
||||
+ "dh<{dha}(&;"
|
||||
+ "dha<{dha};"
|
||||
+ "d''<{da}{virama}({ha};"
|
||||
+ "d''<{da}{virama}({ddha};"
|
||||
+ "d''<{da}{virama}({dda};"
|
||||
+ "d''<{da}{virama}({dha};"
|
||||
+ "d''<{da}{virama}({da};"
|
||||
+ "d<{da}(&;"
|
||||
+ "da<{da};"
|
||||
+ "th<{tha}(&;"
|
||||
+ "tha<{tha};"
|
||||
+ "t''<{ta}{virama}({ha};"
|
||||
+ "t''<{ta}{virama}({ttha};"
|
||||
+ "t''<{ta}{virama}({tta};"
|
||||
+ "t''<{ta}{virama}({tha};"
|
||||
+ "t''<{ta}{virama}({ta};"
|
||||
+ "t<{ta}(&;"
|
||||
+ "ta<{ta};"
|
||||
+ "n''<{na}{virama}({ga};"
|
||||
+ "n''<{na}{virama}({ya};"
|
||||
+ "n<{na}(&;"
|
||||
+ "na<{na};"
|
||||
+ "ph<{pha}(&;"
|
||||
+ "pha<{pha};"
|
||||
+ "p''<{pa}{virama}({ha};"
|
||||
+ "p<{pa}(&;"
|
||||
+ "pa<{pa};"
|
||||
+ "bh<{bha}(&;"
|
||||
+ "bha<{bha};"
|
||||
+ "b''<{ba}{virama}({ha};"
|
||||
+ "b<{ba}(&;"
|
||||
+ "ba<{ba};"
|
||||
+ "m''<{ma}{virama}({ma};"
|
||||
+ "m''<{ma}{virama}({bindu};"
|
||||
+ "m<{ma}(&;"
|
||||
+ "ma<{ma};"
|
||||
+ "y<{ya}(&;"
|
||||
+ "ya<{ya};"
|
||||
+ "r''<{ra}{virama}({ha};"
|
||||
+ "r<{ra}(&;"
|
||||
+ "ra<{ra};"
|
||||
+ "l''<{la}{virama}({ha};"
|
||||
+ "l<{la}(&;"
|
||||
+ "la<{la};"
|
||||
+ "v<{va}(&;"
|
||||
+ "va<{va};"
|
||||
+ "sh<{sha}(&;"
|
||||
+ "sha<{sha};"
|
||||
+ "ss<{ssa}(&;"
|
||||
+ "ssa<{ssa};"
|
||||
+ "s''<{sa}{virama}({ha};"
|
||||
+ "s''<{sa}{virama}({sha};"
|
||||
+ "s''<{sa}{virama}({ssa};"
|
||||
+ "s''<{sa}{virama}({sa};"
|
||||
+ "s<{sa}(&;"
|
||||
+ "sa<{sa};"
|
||||
+ "h<{ha}(&;"
|
||||
+ "ha<{ha};"
|
||||
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
|
||||
+ "aa<{aa};"
|
||||
+ "ai<{ai};"
|
||||
+ "au<{au};"
|
||||
+ "ii<{ii};"
|
||||
+ "i<{i};"
|
||||
+ "uu<{uu};"
|
||||
+ "u<{u};"
|
||||
+ "rrh<{rrh};"
|
||||
+ "rh<{rh};"
|
||||
+ "lh<{lh};"
|
||||
+ "e<{e};"
|
||||
+ "o<{o};"
|
||||
|
||||
// independent vowels (when following consonants)
|
||||
|
||||
+ "''aa<a){waa};"
|
||||
+ "''aa<%){waa};"
|
||||
+ "''ai<a){wai};"
|
||||
+ "''ai<%){wai};"
|
||||
+ "''au<a){wau};"
|
||||
+ "''au<%){wau};"
|
||||
+ "''ii<a){wii};"
|
||||
+ "''ii<%){wii};"
|
||||
+ "''i<a){wi};"
|
||||
+ "''i<%){wi};"
|
||||
+ "''uu<a){wuu};"
|
||||
+ "''uu<%){wuu};"
|
||||
+ "''u<a){wu};"
|
||||
+ "''u<%){wu};"
|
||||
+ "''rrh<%){wrr};"
|
||||
+ "''rh<%){wr};"
|
||||
+ "''lh<%){wl};"
|
||||
+ "''e<%){we};"
|
||||
+ "''o<%){wo};"
|
||||
+ "''a<a){wa};"
|
||||
+ "''a<%){wa};"
|
||||
|
||||
|
||||
// independent vowels (otherwise)
|
||||
|
||||
+ "aa<{waa};"
|
||||
+ "ai<{wai};"
|
||||
+ "au<{wau};"
|
||||
+ "ii<{wii};"
|
||||
+ "i<{wi};"
|
||||
+ "uu<{wuu};"
|
||||
+ "u<{wu};"
|
||||
+ "rrh<{wrr};"
|
||||
+ "rh<{wr};"
|
||||
+ "lh<{wl};"
|
||||
+ "e<{we};"
|
||||
+ "o<{wo};"
|
||||
+ "a<{wa};"
|
||||
|
||||
// blow away any remaining viramas
|
||||
|
||||
+ "<{virama};"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,377 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Greek extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule", ""
|
||||
// ==============================================
|
||||
// Modern Greek Transliteration Rules
|
||||
//
|
||||
// This transliterates modern Greek characters, but using rules
|
||||
// that are traditional for Ancient Greek, and
|
||||
// thus more resemble Greek words that have become part
|
||||
// of English. It differs from the official Greek
|
||||
// transliteration, which is more phonetic (since
|
||||
// most modern Greek vowels, for example, have
|
||||
// degenerated simply to sound like "ee").
|
||||
//
|
||||
// There are only a few tricky parts.
|
||||
// 1. eta and omega don't map directly to Latin vowels,
|
||||
// so we use a macron on e and o, and some
|
||||
// other combinations if they are accented.
|
||||
// 2. The accented, diaeresis i and y are substituted too.
|
||||
// 3. Some letters use digraphs, like "ph". While typical,
|
||||
// they need some special handling.
|
||||
// 4. A gamma before a gamma or a few other letters is
|
||||
// transliterated as an "n", as in "Anglo"
|
||||
// 5. An ypsilon after a vowel is a "u", as in
|
||||
// "Mouseio". Otherwise it is a "y" as in "Physikon"
|
||||
// 6. The construction of the rules is made simpler by making sure
|
||||
// that most rules for lowercase letters exactly correspond to the
|
||||
// rules for uppercase letters, *except* for the case of the letters
|
||||
// in the rule itself. That way, after modifying the uppercase rules,
|
||||
// you can just copy, paste, and "set to lowercase" to get
|
||||
// the rules for lowercase letters!
|
||||
// ==============================================
|
||||
|
||||
// ==============================================
|
||||
// Variables, used to make the rules more comprehensible
|
||||
// and for conditionals.
|
||||
// ==============================================
|
||||
|
||||
// Latin Letters
|
||||
|
||||
+ "E-MACRON=\u0112;"
|
||||
+ "e-macron=\u0113;"
|
||||
+ "O-MACRON=\u014C;"
|
||||
+ "o-macron=\u014D;"
|
||||
+ "Y-UMLAUT=\u0178;"
|
||||
+ "y-umlaut=\u00FF;"
|
||||
|
||||
//! // with real accents.
|
||||
//! + "E-MACRON-ACUTE=\u0112\u0301;"
|
||||
//! + "e-macron-acute=\u0113\u0301;"
|
||||
//! + "O-MACRON-ACUTE=\u014C\u0301;"
|
||||
//! + "o-macron-acute=\u014D\u0301;"
|
||||
//! + "y-umlaut-acute=\u00FF\u0301;"
|
||||
//! + "\u00ef-acute=\u00ef\u0301;"
|
||||
//! + "\u00fc-acute=\u00fc\u0301;"
|
||||
//! //
|
||||
|
||||
// single letter equivalents
|
||||
|
||||
+ "E-MACRON-ACUTE=\u00CA;"
|
||||
+ "e-macron-acute=\u00EA;"
|
||||
+ "O-MACRON-ACUTE=\u00D4;"
|
||||
+ "o-macron-acute=\u00F4;"
|
||||
+ "y-umlaut-acute=\u0177;"
|
||||
+ "\u00ef-acute=\u00EE;"
|
||||
+ "\u00fc-acute=\u00FB;"
|
||||
|
||||
// Greek Letters
|
||||
|
||||
+ "ALPHA=\u0391;"
|
||||
+ "BETA=\u0392;"
|
||||
+ "GAMMA=\u0393;"
|
||||
+ "DELTA=\u0394;"
|
||||
+ "EPSILON=\u0395;"
|
||||
+ "ZETA=\u0396;"
|
||||
+ "ETA=\u0397;"
|
||||
+ "THETA=\u0398;"
|
||||
+ "IOTA=\u0399;"
|
||||
+ "KAPPA=\u039A;"
|
||||
+ "LAMBDA=\u039B;"
|
||||
+ "MU=\u039C;"
|
||||
+ "NU=\u039D;"
|
||||
+ "XI=\u039E;"
|
||||
+ "OMICRON=\u039F;"
|
||||
+ "PI=\u03A0;"
|
||||
+ "RHO=\u03A1;"
|
||||
+ "SIGMA=\u03A3;"
|
||||
+ "TAU=\u03A4;"
|
||||
+ "YPSILON=\u03A5;"
|
||||
+ "PHI=\u03A6;"
|
||||
+ "CHI=\u03A7;"
|
||||
+ "PSI=\u03A8;"
|
||||
+ "OMEGA=\u03A9;"
|
||||
|
||||
+ "ALPHA+=\u0386;"
|
||||
+ "EPSILON+=\u0388;"
|
||||
+ "ETA+=\u0389;"
|
||||
+ "IOTA+=\u038A;"
|
||||
+ "OMICRON+=\u038C;"
|
||||
+ "YPSILON+=\u038E;"
|
||||
+ "OMEGA+=\u038F;"
|
||||
+ "IOTA_DIAERESIS=\u03AA;"
|
||||
+ "YPSILON_DIAERESIS=\u03AB;"
|
||||
|
||||
+ "alpha=\u03B1;"
|
||||
+ "beta=\u03B2;"
|
||||
+ "gamma=\u03B3;"
|
||||
+ "delta=\u03B4;"
|
||||
+ "epsilon=\u03B5;"
|
||||
+ "zeta=\u03B6;"
|
||||
+ "eta=\u03B7;"
|
||||
+ "theta=\u03B8;"
|
||||
+ "iota=\u03B9;"
|
||||
+ "kappa=\u03BA;"
|
||||
+ "lambda=\u03BB;"
|
||||
+ "mu=\u03BC;"
|
||||
+ "nu=\u03BD;"
|
||||
+ "xi=\u03BE;"
|
||||
+ "omicron=\u03BF;"
|
||||
+ "pi=\u03C0;"
|
||||
+ "rho=\u03C1;"
|
||||
+ "sigma=\u03C3;"
|
||||
+ "tau=\u03C4;"
|
||||
+ "ypsilon=\u03C5;"
|
||||
+ "phi=\u03C6;"
|
||||
+ "chi=\u03C7;"
|
||||
+ "psi=\u03C8;"
|
||||
+ "omega=\u03C9;"
|
||||
|
||||
//forms
|
||||
|
||||
+ "alpha+=\u03AC;"
|
||||
+ "epsilon+=\u03AD;"
|
||||
+ "eta+=\u03AE;"
|
||||
+ "iota+=\u03AF;"
|
||||
+ "omicron+=\u03CC;"
|
||||
+ "ypsilon+=\u03CD;"
|
||||
+ "omega+=\u03CE;"
|
||||
+ "iota_diaeresis=\u03CA;"
|
||||
+ "ypsilon_diaeresis=\u03CB;"
|
||||
+ "iota_diaeresis+=\u0390;"
|
||||
+ "ypsilon_diaeresis+=\u03B0;"
|
||||
+ "sigma+=\u03C2;"
|
||||
|
||||
// Variables for conditional mappings
|
||||
|
||||
// Use lowercase for all variable names, to allow cut/paste below.
|
||||
|
||||
+ "letter=[~[:Lu:][:Ll:]];"
|
||||
+ "lower=[[:Ll:]];"
|
||||
+ "softener=[eiyEIY];"
|
||||
+ "vowel=[aeiouAEIOU"
|
||||
+ "{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
|
||||
+ "{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
|
||||
+ "{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
|
||||
+ "{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
|
||||
+ "{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
|
||||
+ "{iota_diaeresis}{ypsilon_diaeresis}"
|
||||
+ "{iota_diaeresis+}{ypsilon_diaeresis+}"
|
||||
+ "];"
|
||||
+ "n-gamma=[GKXCgkxc];"
|
||||
+ "gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
|
||||
+ "pp=[Pp];"
|
||||
|
||||
// ==============================================
|
||||
// Rules
|
||||
// ==============================================
|
||||
// The following are special titlecases, and should
|
||||
// not be copied when duplicating the lowercase
|
||||
// ==============================================
|
||||
|
||||
+ "Th <> {THETA}({lower};"
|
||||
+ "Ph <> {PHI}({lower};"
|
||||
+ "Ch <> {CHI}({lower};"
|
||||
//masked: + "Ps<{PHI}({lower};"
|
||||
|
||||
// Because there is no uppercase forms for final sigma,
|
||||
// we had to move all the sigma rules up here.
|
||||
|
||||
// Remember to insert ' to preserve round trip, for double letters
|
||||
// don't need to do this for the digraphs with h,
|
||||
// since it is not created when mapping back from greek
|
||||
|
||||
// use special form for s
|
||||
|
||||
+ "''S <> ({pp}) {SIGMA} ;" // handle PS
|
||||
+ "S <> {SIGMA};"
|
||||
|
||||
// The following are a bit tricky. 's' takes two forms in greek
|
||||
// final or non final.
|
||||
// We use ~s to represent the abnormal form: final before letter
|
||||
// or non-final before non-letter.
|
||||
// We use 's to separate p and s (otherwise ps is one letter)
|
||||
// so, we break out the following forms:
|
||||
|
||||
+ "''s < ({pp}) {sigma} ({letter});"
|
||||
+ "s < {sigma} ({letter});"
|
||||
+ "~s < {sigma} ;"
|
||||
|
||||
+ "~s < {sigma+} ({letter});"
|
||||
+ "''s < ({pp}) {sigma+} ;"
|
||||
+ "s < {sigma+} ;"
|
||||
|
||||
+ "~s ({letter}) > {sigma+};"
|
||||
+ "~s > {sigma};"
|
||||
+ "''s ({letter}) > {sigma};"
|
||||
+ "''s > {sigma+};"
|
||||
+ "s ({letter}) > {sigma};"
|
||||
+ "s > {sigma+};"
|
||||
|
||||
// because there are no uppercase forms, had to move these up too.
|
||||
|
||||
+ "i\"`>{iota_diaeresis+};"
|
||||
+ "y\"`>{ypsilon_diaeresis+};"
|
||||
|
||||
+ "{\u00ef-acute} <> {iota_diaeresis+};"
|
||||
+ "{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
|
||||
+ "{y-umlaut-acute} <> {ypsilon_diaeresis+};"
|
||||
|
||||
// ==============================================
|
||||
// Uppercase Forms.
|
||||
// To make lowercase forms, just copy and lowercase below
|
||||
// ==============================================
|
||||
|
||||
// Typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
+ "A`>{ALPHA+};"
|
||||
+ "E`>{EPSILON+};"
|
||||
+ "EE`>{ETA+};"
|
||||
+ "EE>{ETA};"
|
||||
+ "I`>{IOTA+};"
|
||||
+ "O`>{OMICRON+};"
|
||||
+ "OO`>{OMEGA+};"
|
||||
+ "OO>{OMEGA};"
|
||||
+ "I\">{IOTA_DIAERESIS};"
|
||||
+ "Y\">{YPSILON_DIAERESIS};"
|
||||
|
||||
// Basic Letters
|
||||
|
||||
+ "A<>{ALPHA};"
|
||||
+ "\u00c1<>{ALPHA+};"
|
||||
+ "B<>{BETA};"
|
||||
+ "N ({n-gamma}) <> {GAMMA} ({gamma-n});"
|
||||
+ "G<>{GAMMA};"
|
||||
+ "D<>{DELTA};"
|
||||
+ "''E <> ([Ee]){EPSILON};" // handle EE
|
||||
+ "E<>{EPSILON};"
|
||||
+ "\u00c9<>{EPSILON+};"
|
||||
+ "Z<>{ZETA};"
|
||||
+ "{E-MACRON-ACUTE}<>{ETA+};"
|
||||
+ "{E-MACRON}<>{ETA};"
|
||||
+ "TH<>{THETA};"
|
||||
+ "I<>{IOTA};"
|
||||
+ "\u00cd<>{IOTA+};"
|
||||
+ "\u00cf<>{IOTA_DIAERESIS};"
|
||||
+ "K<>{KAPPA};"
|
||||
+ "L<>{LAMBDA};"
|
||||
+ "M<>{MU};"
|
||||
+ "N'' <> {NU} ({gamma-n});"
|
||||
+ "N<>{NU};"
|
||||
+ "X<>{XI};"
|
||||
+ "''O <> ([Oo]) {OMICRON};" // handle OO
|
||||
+ "O<>{OMICRON};"
|
||||
+ "\u00d3<>{OMICRON+};"
|
||||
+ "PH<>{PHI};" // needs ordering before P
|
||||
+ "PS<>{PSI};" // needs ordering before P
|
||||
+ "P<>{PI};"
|
||||
+ "R<>{RHO};"
|
||||
+ "T<>{TAU};"
|
||||
+ "U <> ({vowel}) {YPSILON};"
|
||||
+ "\u00da <> ({vowel}) {YPSILON+};"
|
||||
+ "\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
|
||||
+ "Y<>{YPSILON};"
|
||||
+ "\u00dd<>{YPSILON+};"
|
||||
+ "{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
|
||||
+ "CH<>{CHI};"
|
||||
+ "{O-MACRON-ACUTE}<>{OMEGA+};"
|
||||
+ "{O-MACRON}<>{OMEGA};"
|
||||
|
||||
// Extra English Letters. Mapped for completeness
|
||||
|
||||
+ "C({softener})>|S;"
|
||||
+ "C>|K;"
|
||||
+ "F>|PH;"
|
||||
+ "H>|CH;"
|
||||
+ "J>|I;"
|
||||
+ "Q>|K;"
|
||||
+ "V>|U;"
|
||||
+ "W>|U;"
|
||||
|
||||
// ==============================================
|
||||
// Lowercase Forms. Just copy above and lowercase
|
||||
// ==============================================
|
||||
|
||||
// typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
+ "a`>{alpha+};"
|
||||
+ "e`>{epsilon+};"
|
||||
+ "ee`>{eta+};"
|
||||
+ "ee>{eta};"
|
||||
+ "i`>{iota+};"
|
||||
+ "o`>{omicron+};"
|
||||
+ "oo`>{omega+};"
|
||||
+ "oo>{omega};"
|
||||
+ "i\">{iota_diaeresis};"
|
||||
+ "y\">{ypsilon_diaeresis};"
|
||||
|
||||
// basic letters
|
||||
|
||||
+ "a<>{alpha};"
|
||||
+ "\u00e1<>{alpha+};"
|
||||
+ "b<>{beta};"
|
||||
+ "n ({n-gamma}) <> {gamma} ({gamma-n});"
|
||||
+ "g<>{gamma};"
|
||||
+ "d<>{delta};"
|
||||
+ "''e <> ([Ee]){epsilon};" // handle EE
|
||||
+ "e<>{epsilon};"
|
||||
+ "\u00e9<>{epsilon+};"
|
||||
+ "z<>{zeta};"
|
||||
+ "{e-macron-acute}<>{eta+};"
|
||||
+ "{e-macron}<>{eta};"
|
||||
+ "th<>{theta};"
|
||||
+ "i<>{iota};"
|
||||
+ "\u00ed<>{iota+};"
|
||||
+ "\u00ef<>{iota_diaeresis};"
|
||||
+ "k<>{kappa};"
|
||||
+ "l<>{lambda};"
|
||||
+ "m<>{mu};"
|
||||
+ "n'' <> {nu} ({gamma-n});"
|
||||
+ "n<>{nu};"
|
||||
+ "x<>{xi};"
|
||||
+ "''o <> ([Oo]) {omicron};" // handle OO
|
||||
+ "o<>{omicron};"
|
||||
+ "\u00f3<>{omicron+};"
|
||||
+ "ph<>{phi};" // needs ordering before p
|
||||
+ "ps<>{psi};" // needs ordering before p
|
||||
+ "p<>{pi};"
|
||||
+ "r<>{rho};"
|
||||
+ "t<>{tau};"
|
||||
+ "u <> ({vowel}){ypsilon};"
|
||||
+ "\u00fa <> ({vowel}){ypsilon+};"
|
||||
+ "\u00fc <> ({vowel}){ypsilon_diaeresis};"
|
||||
+ "y<>{ypsilon};"
|
||||
+ "\u00fd<>{ypsilon+};"
|
||||
+ "{y-umlaut}<>{ypsilon_diaeresis};"
|
||||
+ "ch<>{chi};"
|
||||
+ "{o-macron-acute}<>{omega+};"
|
||||
+ "{o-macron}<>{omega};"
|
||||
|
||||
// extra english letters. mapped for completeness
|
||||
|
||||
+ "c({softener})>|s;"
|
||||
+ "c>|k;"
|
||||
+ "f>|ph;"
|
||||
+ "h>|ch;"
|
||||
+ "j>|i;"
|
||||
+ "q>|k;"
|
||||
+ "v>|u;"
|
||||
+ "w>|u;"
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
//+ "''>;"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,279 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Hebrew extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
//variable names, derived from the Unicode names.
|
||||
|
||||
"POINT_SHEVA=\u05B0;"
|
||||
+ "POINT_HATAF_SEGOL=\u05B1;"
|
||||
+ "POINT_HATAF_PATAH=\u05B2;"
|
||||
+ "POINT_HATAF_QAMATS=\u05B3;"
|
||||
+ "POINT_HIRIQ=\u05B4;"
|
||||
+ "POINT_TSERE=\u05B5;"
|
||||
+ "POINT_SEGOL=\u05B6;"
|
||||
+ "POINT_PATAH=\u05B7;"
|
||||
+ "POINT_QAMATS=\u05B8;"
|
||||
+ "POINT_HOLAM=\u05B9;"
|
||||
+ "POINT_QUBUTS=\u05BB;"
|
||||
+ "POINT_DAGESH_OR_MAPIQ=\u05BC;"
|
||||
+ "POINT_METEG=\u05BD;"
|
||||
+ "PUNCTUATION_MAQAF=\u05BE;"
|
||||
+ "POINT_RAFE=\u05BF;"
|
||||
+ "PUNCTUATION_PASEQ=\u05C0;"
|
||||
+ "POINT_SHIN_DOT=\u05C1;"
|
||||
+ "POINT_SIN_DOT=\u05C2;"
|
||||
+ "PUNCTUATION_SOF_PASUQ=\u05C3;"
|
||||
+ "ALEF=\u05D0;"
|
||||
+ "BET=\u05D1;"
|
||||
+ "GIMEL=\u05D2;"
|
||||
+ "DALET=\u05D3;"
|
||||
+ "HE=\u05D4;"
|
||||
+ "VAV=\u05D5;"
|
||||
+ "ZAYIN=\u05D6;"
|
||||
+ "HET=\u05D7;"
|
||||
+ "TET=\u05D8;"
|
||||
+ "YOD=\u05D9;"
|
||||
+ "FINAL_KAF=\u05DA;"
|
||||
+ "KAF=\u05DB;"
|
||||
+ "LAMED=\u05DC;"
|
||||
+ "FINAL_MEM=\u05DD;"
|
||||
+ "MEM=\u05DE;"
|
||||
+ "FINAL_NUN=\u05DF;"
|
||||
+ "NUN=\u05E0;"
|
||||
+ "SAMEKH=\u05E1;"
|
||||
+ "AYIN=\u05E2;"
|
||||
+ "FINAL_PE=\u05E3;"
|
||||
+ "PE=\u05E4;"
|
||||
+ "FINAL_TSADI=\u05E5;"
|
||||
+ "TSADI=\u05E6;"
|
||||
+ "QOF=\u05E7;"
|
||||
+ "RESH=\u05E8;"
|
||||
+ "SHIN=\u05E9;"
|
||||
+ "TAV=\u05EA;"
|
||||
+ "YIDDISH_DOUBLE_VAV=\u05F0;"
|
||||
+ "YIDDISH_VAV_YOD=\u05F1;"
|
||||
+ "YIDDISH_DOUBLE_YOD=\u05F2;"
|
||||
+ "PUNCTUATION_GERESH=\u05F3;"
|
||||
+ "PUNCTUATION_GERSHAYIM=\u05F4;"
|
||||
|
||||
//wildcards
|
||||
//The values can be anything we don't use in this file: start at E000.
|
||||
|
||||
+ "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
|
||||
|
||||
+ "softvowel=[eiyEIY];"
|
||||
|
||||
+ "vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
|
||||
|
||||
//?>{POINT_SHEVA}
|
||||
//?>{POINT_HATAF_SEGOL}
|
||||
//?>{POINT_HATAF_PATAH}
|
||||
//?>{POINT_HATAF_QAMATS}
|
||||
//?>{POINT_HIRIQ}
|
||||
//?>{POINT_TSERE}
|
||||
//?>{POINT_SEGOL}
|
||||
//?>{POINT_PATAH}
|
||||
//?>{POINT_QAMATS}
|
||||
//?>{POINT_HOLAM}
|
||||
//?>{POINT_QUBUTS}
|
||||
//?>{POINT_DAGESH_OR_MAPIQ}
|
||||
//?>{POINT_METEG}
|
||||
//?>{PUNCTUATION_MAQAF}
|
||||
//?>{POINT_RAFE}
|
||||
//?>{PUNCTUATION_PASEQ}
|
||||
//?>{POINT_SHIN_DOT}
|
||||
//?>{POINT_SIN_DOT}
|
||||
//?>{PUNCTUATION_SOF_PASUQ}
|
||||
|
||||
+ "a>{ALEF};"
|
||||
+ "A>{ALEF};"
|
||||
|
||||
+ "b>{BET};"
|
||||
+ "B>{BET};"
|
||||
|
||||
+ "c({softvowel}>{SAMEKH};"
|
||||
+ "C({softvowel}>{SAMEKH};"
|
||||
+ "c({letter}>{KAF};"
|
||||
+ "C({letter}>{KAF};"
|
||||
+ "c>{FINAL_KAF};"
|
||||
+ "C>{FINAL_KAF};"
|
||||
|
||||
+ "d>{DALET};"
|
||||
+ "D>{DALET};"
|
||||
|
||||
+ "e>{AYIN};"
|
||||
+ "E>{AYIN};"
|
||||
|
||||
+ "f({letter}>{PE};"
|
||||
+ "f>{FINAL_PE};"
|
||||
+ "F({letter}>{PE};"
|
||||
+ "F>{FINAL_PE};"
|
||||
|
||||
+ "g>{GIMEL};"
|
||||
+ "G>{GIMEL};"
|
||||
|
||||
+ "h>{HE};"
|
||||
+ "H>{HE};"
|
||||
|
||||
+ "i>{YOD};"
|
||||
+ "I>{YOD};"
|
||||
|
||||
+ "j>{DALET}{SHIN};"
|
||||
+ "J>{DALET}{SHIN};"
|
||||
|
||||
+ "kH>{HET};"
|
||||
+ "kh>{HET};"
|
||||
+ "KH>{HET};"
|
||||
+ "Kh>{HET};"
|
||||
+ "k({letter}>{KAF};"
|
||||
+ "K({letter}>{KAF};"
|
||||
+ "k>{FINAL_KAF};"
|
||||
+ "K>{FINAL_KAF};"
|
||||
|
||||
+ "l>{LAMED};"
|
||||
+ "L>{LAMED};"
|
||||
|
||||
+ "m({letter}>{MEM};"
|
||||
+ "m>{FINAL_MEM};"
|
||||
+ "M({letter}>{MEM};"
|
||||
+ "M>{FINAL_MEM};"
|
||||
|
||||
+ "n({letter}>{NUN};"
|
||||
+ "n>{FINAL_NUN};"
|
||||
+ "N({letter}>{NUN};"
|
||||
+ "N>{FINAL_NUN};"
|
||||
|
||||
+ "o>{VAV};"
|
||||
+ "O>{VAV};"
|
||||
|
||||
+ "p({letter}>{PE};"
|
||||
+ "p>{FINAL_PE};"
|
||||
+ "P({letter}>{PE};"
|
||||
+ "P>{FINAL_PE};"
|
||||
|
||||
+ "q>{QOF};"
|
||||
+ "Q>{QOF};"
|
||||
|
||||
+ "r>{RESH};"
|
||||
+ "R>{RESH};"
|
||||
|
||||
+ "sH>{SHIN};"
|
||||
+ "sh>{SHIN};"
|
||||
+ "SH>{SHIN};"
|
||||
+ "Sh>{SHIN};"
|
||||
+ "s>{SAMEKH};"
|
||||
+ "S>{SAMEKH};"
|
||||
|
||||
+ "th>{TAV};"
|
||||
+ "tH>{TAV};"
|
||||
+ "TH>{TAV};"
|
||||
+ "Th>{TAV};"
|
||||
+ "tS({letter}>{TSADI};"
|
||||
+ "ts({letter}>{TSADI};"
|
||||
+ "Ts({letter}>{TSADI};"
|
||||
+ "TS({letter}>{TSADI};"
|
||||
+ "tS>{FINAL_TSADI};"
|
||||
+ "ts>{FINAL_TSADI};"
|
||||
+ "Ts>{FINAL_TSADI};"
|
||||
+ "TS>{FINAL_TSADI};"
|
||||
+ "t>{TET};"
|
||||
+ "T>{TET};"
|
||||
|
||||
+ "u>{VAV};"
|
||||
+ "U>{VAV};"
|
||||
|
||||
+ "v>{VAV};"
|
||||
+ "V>{VAV};"
|
||||
|
||||
+ "w>{VAV};"
|
||||
+ "W>{VAV};"
|
||||
|
||||
+ "x>{KAF}{SAMEKH};"
|
||||
+ "X>{KAF}{SAMEKH};"
|
||||
|
||||
+ "y>{YOD};"
|
||||
+ "Y>{YOD};"
|
||||
|
||||
+ "z>{ZAYIN};"
|
||||
+ "Z>{ZAYIN};"
|
||||
|
||||
//#?>{YIDDISH_DOUBLE_VAV}
|
||||
//?>{YIDDISH_VAV_YOD}
|
||||
//?>{YIDDISH_DOUBLE_YOD}
|
||||
//?>{PUNCTUATION_GERESH}
|
||||
//?>{PUNCTUATION_GERSHAYIM}
|
||||
|
||||
+ "''>;"
|
||||
|
||||
//{POINT_SHEVA}>@
|
||||
//{POINT_HATAF_SEGOL}>@
|
||||
//{POINT_HATAF_PATAH}>@
|
||||
//{POINT_HATAF_QAMATS}>@
|
||||
//{POINT_HIRIQ}>@
|
||||
//{POINT_TSERE}>@
|
||||
//{POINT_SEGOL}>@
|
||||
//{POINT_PATAH}>@
|
||||
//{POINT_QAMATS}>@
|
||||
//{POINT_HOLAM}>@
|
||||
//{POINT_QUBUTS}>@
|
||||
//{POINT_DAGESH_OR_MAPIQ}>@
|
||||
//{POINT_METEG}>@
|
||||
//{PUNCTUATION_MAQAF}>@
|
||||
//{POINT_RAFE}>@
|
||||
//{PUNCTUATION_PASEQ}>@
|
||||
//{POINT_SHIN_DOT}>@
|
||||
//{POINT_SIN_DOT}>@
|
||||
//{PUNCTUATION_SOF_PASUQ}>@
|
||||
|
||||
+ "a<{ALEF};"
|
||||
+ "e<{AYIN};"
|
||||
+ "b<{BET};"
|
||||
+ "d<{DALET};"
|
||||
+ "k<{FINAL_KAF};"
|
||||
+ "m<{FINAL_MEM};"
|
||||
+ "n<{FINAL_NUN};"
|
||||
+ "p<{FINAL_PE};"
|
||||
+ "ts<{FINAL_TSADI};"
|
||||
+ "g<{GIMEL};"
|
||||
+ "kh<{HET};"
|
||||
+ "h<{HE};"
|
||||
+ "k''<{KAF}({HE};"
|
||||
+ "k<{KAF};"
|
||||
+ "l<{LAMED};"
|
||||
+ "m<{MEM};"
|
||||
+ "n<{NUN};"
|
||||
+ "p<{PE};"
|
||||
+ "q<{QOF};"
|
||||
+ "r<{RESH};"
|
||||
+ "s''<{SAMEKH}({HE};"
|
||||
+ "s<{SAMEKH};"
|
||||
+ "sh<{SHIN};"
|
||||
+ "th<{TAV};"
|
||||
+ "t''<{TET}({HE};"
|
||||
+ "t''<{TET}({SAMEKH};"
|
||||
+ "t''<{TET}({SHIN};"
|
||||
+ "t<{TET};"
|
||||
+ "ts<{TSADI};"
|
||||
+ "v<{VAV}({vowellike};"
|
||||
+ "u<{VAV};"
|
||||
+ "y<{YOD};"
|
||||
+ "z<{ZAYIN};"
|
||||
|
||||
//{YIDDISH_DOUBLE_VAV}>@
|
||||
//{YIDDISH_VAV_YOD}>@
|
||||
//{YIDDISH_DOUBLE_YOD}>@
|
||||
//{PUNCTUATION_GERESH}>@
|
||||
//{PUNCTUATION_GERSHAYIM}>@
|
||||
|
||||
+ "<'';"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,325 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule", ""
|
||||
|
||||
// VARIABLES
|
||||
|
||||
+ "initial=[\u1100-\u115F];"
|
||||
+ "medial=[\u1160-\u11A7];"
|
||||
+ "final=[\u11A8-\u11F9];" // added - aliu
|
||||
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
+ "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
|
||||
+ "ye=[yeYE];"
|
||||
+ "ywe=[yweYWE];"
|
||||
+ "yw=[ywYW];"
|
||||
+ "nl=[nlNL];"
|
||||
+ "gnl=[gnlGNL];"
|
||||
+ "lsgb=[lsgbLSGB];"
|
||||
+ "ywao=[ywaoYWAO];"
|
||||
+ "bl=[blBL];"
|
||||
|
||||
// RULES
|
||||
|
||||
// Hangul structure is IMF or IM
|
||||
// So you can have, because of adjacent sequences
|
||||
// IM, but not II or IF
|
||||
// MF or MI, but not MM
|
||||
// FI, but not FF or FM
|
||||
|
||||
// For English, we just have C or V.
|
||||
// To generate valid Hangul:
|
||||
// Vowels:
|
||||
// We insert IEUNG between VV, and otherwise map V to M
|
||||
// We also insert IEUNG if there is no
|
||||
// Consonants:
|
||||
// We don't break doubles
|
||||
// Cases like lmgg, we have to break at lm
|
||||
// So to guess whether a consonant is I or F
|
||||
// we map all C's to F, except when followed by a vowel, e.g.
|
||||
// X[{vowel}>CHOSEONG (initial)
|
||||
// X>JONGSEONG (final)
|
||||
|
||||
// special insertion for funny sequences of vowels, and for empty consonant
|
||||
|
||||
+ "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
|
||||
+ "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
||||
|
||||
// Fix casing.
|
||||
// Because Korean is caseless, we just want to treat everything as
|
||||
// lowercase.
|
||||
// we could do this by always preceeding this transliterator with
|
||||
// an upper-lowercase transformation, but that wouldn't invert nicely.
|
||||
// We use the "revisit" syntax to just convert latin to latin
|
||||
// so that we can avoid
|
||||
// having to restate all the Latin=>Jamo rules, with the I/F handling.
|
||||
|
||||
// We don't have to add titlecase, since that will be picked up
|
||||
// since the first letter is converted, then revisited. E.g.
|
||||
// |Gg => |gg => {sang kiyeok}
|
||||
// We do have to have all caps, since otherwise we could get:
|
||||
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
|
||||
|
||||
+ "Z > |z;"
|
||||
+ "YU > |yu;"
|
||||
+ "YO > |yo;"
|
||||
+ "YI > |yi;"
|
||||
+ "YEO > |yeo;"
|
||||
+ "YE > |ye;"
|
||||
+ "YAE > |yae;"
|
||||
+ "YA > |ya;"
|
||||
+ "Y > |y;"
|
||||
+ "WI > |wi;"
|
||||
+ "WEO > |weo;"
|
||||
+ "WE > |we;"
|
||||
+ "WAE > |wae;"
|
||||
+ "WA > |wa;"
|
||||
+ "W > |w;"
|
||||
+ "U > |u;"
|
||||
+ "T > |t;"
|
||||
+ "SS > |ss;"
|
||||
+ "S > |s;"
|
||||
+ "P > |p;"
|
||||
+ "OE > |oe;"
|
||||
+ "O > |o;"
|
||||
+ "NJ > |nj;"
|
||||
+ "NH > |nh;"
|
||||
+ "NG > |ng;"
|
||||
+ "N > |n;"
|
||||
+ "M > |m;"
|
||||
+ "LT > |lt;"
|
||||
+ "LS > |ls;"
|
||||
+ "LP > |lp;"
|
||||
+ "LM > |lm;"
|
||||
+ "LH > |lh;"
|
||||
+ "LG > |lg;"
|
||||
+ "LB > |lb;"
|
||||
+ "L > |l;"
|
||||
+ "K > |k;"
|
||||
+ "JJ > |jj;"
|
||||
+ "J > |j;"
|
||||
+ "I > |i;"
|
||||
+ "H > |h;"
|
||||
+ "GS > |gs;"
|
||||
+ "GG > |gg;"
|
||||
+ "G > |g;"
|
||||
+ "EU > |eu;"
|
||||
+ "EO > |eo;"
|
||||
+ "E > |e;"
|
||||
+ "DD > |dd;"
|
||||
+ "D > |d;"
|
||||
+ "BS > |bs;"
|
||||
+ "BB > |bb;"
|
||||
+ "B > |b;"
|
||||
+ "AE > |ae;"
|
||||
+ "A > |a;"
|
||||
|
||||
// APOSTROPHE
|
||||
|
||||
// As always, an apostrophe is used to separate digraphs into
|
||||
// singles. That is, if you really wanted [KAN][GGAN], instead
|
||||
// of [KANG][GAN] you would write "kan'ggan".
|
||||
|
||||
// Rules for inserting ' when mapping separated digraphs back
|
||||
// from Hangul to Latin. Catch every letter that can be the
|
||||
// LAST of a digraph (or multigraph) AND first of an initial
|
||||
|
||||
+ "'' < (l) (\u11c0;" // hangul jongseong thieuth
|
||||
+ "'' < ({lsgb}) (\u11ba;" // hangul jongseong sios
|
||||
+ "'' < (l) (\u11c1;" // hangul jongseong phieuph
|
||||
+ "'' < (l) (\u11b7;" // hangul jongseong mieum
|
||||
+ "'' < (n) (\u11bd;" // hangul jongseong cieuc
|
||||
+ "'' < ({nl}) (\u11c2;" // hangul jongseong hieuh
|
||||
+ "'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "'' < ({bl}) (\u11b8;" // hangul jongseong pieup
|
||||
+ "'' < (d) (\u11ae;" // hangul jongseong tikeut
|
||||
|
||||
+ "'' < ({ye}) (\u116e;" // hangul jungseong u
|
||||
+ "'' < ({ywe}) (\u1169;" // hangul jungseong o
|
||||
+ "'' < ({yw}) (\u1175;" // hangul jungseong i
|
||||
+ "'' < ({ywao}) (\u1166;" // hangul jungseong e
|
||||
+ "'' < ({yw}) (\u1161;" // hangul jungseong a
|
||||
|
||||
+ "'' < (l) (\u1110;" // hangul choseong thieuth
|
||||
+ "'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios
|
||||
+ "'' < ({lsgb}) (\u1109;" // hangul choseong sios
|
||||
+ "'' < (l) (\u1111;" // hangul choseong phieuph
|
||||
+ "'' < (l) (\u1106;" // hangul choseong mieum
|
||||
+ "'' < (n) (\u110c;" // hangul choseong cieuc
|
||||
+ "'' < (n) (\u110d;"
|
||||
+ "'' < ({nl}) (\u1112;" // hangul choseong hieuh
|
||||
+ "'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok
|
||||
+ "'' < ({gnl}) (\u1100;" // hangul choseong kiyeok
|
||||
+ "'' < (d) (\u1103;" // hangul choseong tikeut
|
||||
+ "'' < (d) (\u1104;"
|
||||
+ "'' < ({bl}) (\u1107;" // hangul choseong pieup
|
||||
+ "'' < ({bl}) (\u1108;"
|
||||
|
||||
// INITIALS
|
||||
|
||||
+ "t ({vowel}) <> \u1110;" // hangul choseong thieuth
|
||||
+ "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
|
||||
+ "s ({vowel}) <> \u1109;" // hangul choseong sios
|
||||
+ "p ({vowel}) <> \u1111;" // hangul choseong phieuph
|
||||
+ "n ({vowel}) <> \u1102;" // hangul choseong nieun
|
||||
+ "m ({vowel}) <> \u1106;" // hangul choseong mieum
|
||||
+ "l ({vowel}) <> \u1105;" // hangul choseong rieul
|
||||
+ "k ({vowel}) <> \u110f;" // hangul choseong khieukh
|
||||
+ "j ({vowel}) <> \u110c;" // hangul choseong cieuc
|
||||
+ "h ({vowel}) <> \u1112;" // hangul choseong hieuh
|
||||
+ "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
|
||||
+ "g ({vowel}) <> \u1100;" // hangul choseong kiyeok
|
||||
+ "d ({vowel}) <> \u1103;" // hangul choseong tikeut
|
||||
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
||||
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
|
||||
+ "bb ({vowel}) <> \u1108;"
|
||||
+ "jj ({vowel}) <> \u110d;"
|
||||
+ "dd ({vowel}) <> \u1104;"
|
||||
|
||||
// If we have gotten through to these rules, and we start with
|
||||
// a consonant, then the remaining mappings would be to F,
|
||||
// because must have CC (or C<non-letter>), not CV.
|
||||
// If we have F before us, then
|
||||
// we would end up with FF, which is wrong. The simplest fix is
|
||||
// to still make it an initial, but also insert an "u",
|
||||
// so we end up with F, I, u, and then continue with the C
|
||||
|
||||
// special, only initial
|
||||
+ "bb > \u1108\u116e;" // hangul choseong ssangpieup
|
||||
+ "jj > \u1108\u110d;" // hangul choseong ssangcieuc
|
||||
+ "dd > \u1108\u1104;" // hangul choseong ssangtikeut
|
||||
|
||||
+ "({final}) t > \u1110\u116e;" // hangul choseong thieuth
|
||||
+ "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
|
||||
+ "({final}) s > \u1109\u116e;" // hangul choseong sios
|
||||
+ "({final}) p > \u1111\u116e;" // hangul choseong phieuph
|
||||
+ "({final}) n > \u1102\u116e;" // hangul choseong nieun
|
||||
+ "({final}) m > \u1106\u116e;" // hangul choseong mieum
|
||||
+ "({final}) l > \u1105\u116e;" // hangul choseong rieul
|
||||
+ "({final}) k > \u110f\u116e;" // hangul choseong khieukh
|
||||
+ "({final}) j > \u110c\u116e;" // hangul choseong cieuc
|
||||
+ "({final}) h > \u1112\u116e;" // hangul choseong hieuh
|
||||
+ "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
||||
+ "({final}) g > \u1100\u116e;" // hangul choseong kiyeok
|
||||
+ "({final}) d > \u1103\u116e;" // hangul choseong tikeut
|
||||
+ "({final}) c > \u110e\u116e;" // hangul choseong chieuch
|
||||
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
|
||||
|
||||
// MEDIALS after INITIALS
|
||||
|
||||
+ "({initial}) yu <> \u1172;" // hangul jungseong yu
|
||||
+ "({initial}) yo <> \u116d;" // hangul jungseong yo
|
||||
+ "({initial}) yi <> \u1174;" // hangul jungseong yi
|
||||
+ "({initial}) yeo <> \u1167;" // hangul jungseong yeo
|
||||
+ "({initial}) ye <> \u1168;" // hangul jungseong ye
|
||||
+ "({initial}) yae <> \u1164;" // hangul jungseong yae
|
||||
+ "({initial}) ya <> \u1163;" // hangul jungseong ya
|
||||
+ "({initial}) wi <> \u1171;" // hangul jungseong wi
|
||||
+ "({initial}) weo <> \u116f;" // hangul jungseong weo
|
||||
+ "({initial}) we <> \u1170;" // hangul jungseong we
|
||||
+ "({initial}) wae <> \u116b;" // hangul jungseong wae
|
||||
+ "({initial}) wa <> \u116a;" // hangul jungseong wa
|
||||
+ "({initial}) u <> \u116e;" // hangul jungseong u
|
||||
+ "({initial}) oe <> \u116c;" // hangul jungseong oe
|
||||
+ "({initial}) o <> \u1169;" // hangul jungseong o
|
||||
+ "({initial}) i <> \u1175;" // hangul jungseong i
|
||||
+ "({initial}) eu <> \u1173;" // hangul jungseong eu
|
||||
+ "({initial}) eo <> \u1165;" // hangul jungseong eo
|
||||
+ "({initial}) e <> \u1166;" // hangul jungseong e
|
||||
+ "({initial}) ae <> \u1162;" // hangul jungseong ae
|
||||
+ "({initial}) a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// MEDIALS (vowels) not after INITIALs
|
||||
|
||||
+ "yu > \u110B\u1172;" // hangul jungseong yu
|
||||
+ "yo > \u110B\u116d;" // hangul jungseong yo
|
||||
+ "yi > \u110B\u1174;" // hangul jungseong yi
|
||||
+ "yeo > \u110B\u1167;" // hangul jungseong yeo
|
||||
+ "ye > \u110B\u1168;" // hangul jungseong ye
|
||||
+ "yae > \u110B\u1164;" // hangul jungseong yae
|
||||
+ "ya > \u110B\u1163;" // hangul jungseong ya
|
||||
+ "wi > \u110B\u1171;" // hangul jungseong wi
|
||||
+ "weo > \u110B\u116f;" // hangul jungseong weo
|
||||
+ "we > \u110B\u1170;" // hangul jungseong we
|
||||
+ "wae > \u110B\u116b;" // hangul jungseong wae
|
||||
+ "wa > \u110B\u116a;" // hangul jungseong wa
|
||||
+ "u > \u110B\u116e;" // hangul jungseong u
|
||||
+ "oe > \u110B\u116c;" // hangul jungseong oe
|
||||
+ "o > \u110B\u1169;" // hangul jungseong o
|
||||
+ "i > \u110B\u1175;" // hangul jungseong i
|
||||
+ "eu > \u110B\u1173;" // hangul jungseong eu
|
||||
+ "eo > \u110B\u1165;" // hangul jungseong eo
|
||||
+ "e > \u110B\u1166;" // hangul jungseong e
|
||||
+ "ae > \u110B\u1162;" // hangul jungseong ae
|
||||
+ "a > \u110B\u1161;" // hangul jungseong a
|
||||
|
||||
|
||||
// FINALS
|
||||
|
||||
+ "t <> \u11c0;" // hangul jongseong thieuth
|
||||
+ "ss <> \u11bb;" // hangul jongseong ssangsios
|
||||
+ "s <> \u11ba;" // hangul jongseong sios
|
||||
+ "p <> \u11c1;" // hangul jongseong phieuph
|
||||
+ "nj <> \u11ac;" // hangul jongseong nieun-cieuc
|
||||
+ "nh <> \u11ad;" // hangul jongseong nieun-hieuh
|
||||
+ "ng <> \u11bc;" // hangul jongseong ieung
|
||||
+ "n <> \u11ab;" // hangul jongseong nieun
|
||||
+ "m <> \u11b7;" // hangul jongseong mieum
|
||||
+ "lt <> \u11b4;" // hangul jongseong rieul-thieuth
|
||||
+ "ls <> \u11b3;" // hangul jongseong rieul-sios
|
||||
+ "lp <> \u11b5;" // hangul jongseong rieul-phieuph
|
||||
+ "lm <> \u11b1;" // hangul jongseong rieul-mieum
|
||||
+ "lh <> \u11b6;" // hangul jongseong rieul-hieuh
|
||||
+ "lg <> \u11b0;" // hangul jongseong rieul-kiyeok
|
||||
+ "lb <> \u11b2;" // hangul jongseong rieul-pieup
|
||||
+ "l <> \u11af;" // hangul jongseong rieul
|
||||
+ "k <> \u11bf;" // hangul jongseong khieukh
|
||||
+ "j <> \u11bd;" // hangul jongseong cieuc
|
||||
+ "h <> \u11c2;" // hangul jongseong hieuh
|
||||
+ "gs <> \u11aa;" // hangul jongseong kiyeok-sios
|
||||
+ "gg <> \u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "g <> \u11a8;" // hangul jongseong kiyeok
|
||||
+ "d <> \u11ae;" // hangul jongseong tikeut
|
||||
+ "c <> \u11be;" // hangul jongseong chieuch
|
||||
+ "bs <> \u11b9;" // hangul jongseong pieup-sios
|
||||
+ "b <> \u11b8;" // hangul jongseong pieup
|
||||
|
||||
// extra English letters
|
||||
// {moved to bottom - aliu}
|
||||
|
||||
+ "z > |s;"
|
||||
//{ + "Z > |s;" } masked
|
||||
+ "x > |ks;"
|
||||
+ "X > |ks;"
|
||||
+ "v > |b;"
|
||||
+ "V > |b;"
|
||||
+ "r > |l;"
|
||||
+ "R > |l;"
|
||||
+ "q > |k;"
|
||||
+ "Q > |k;"
|
||||
+ "f > |p;"
|
||||
+ "F > |p;"
|
||||
//{ + "c > |k;" } masked
|
||||
+ "C > |k;"
|
||||
|
||||
+ "y > \u1172;" // hangul jungseong yu
|
||||
+ "w > \u1171;" // hangul jungseong wi
|
||||
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
+ "''>;"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,84 +0,0 @@
|
||||
package com.ibm.text.resources;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$StraightQuotes$CurlyQuotes extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule",
|
||||
// Rewritten using character codes [LIU]
|
||||
"white=[[:Zs:][:Zl:][:Zp:]];"
|
||||
+ "black=[^{white}];"
|
||||
+ "open=[:Ps:];"
|
||||
+ "dquote=\";"
|
||||
|
||||
+ "lAng=\u3008;"
|
||||
+ "ldAng=\u300A;"
|
||||
+ "lBrk='[';"
|
||||
+ "lBrc='{';"
|
||||
|
||||
+ "lquote=\u2018;"
|
||||
+ "rquote=\u2019;"
|
||||
+ "ldquote=\u201C;"
|
||||
+ "rdquote=\u201D;"
|
||||
|
||||
+ "ldguill=\u00AB;"
|
||||
+ "rdguill=\u00BB;"
|
||||
+ "lguill=\u2039;"
|
||||
+ "rguill=\u203A;"
|
||||
|
||||
+ "mdash=\u2014;"
|
||||
|
||||
//#######################################
|
||||
// Conversions from input
|
||||
//#######################################
|
||||
|
||||
// join single quotes
|
||||
+ "{lquote}''>{ldquote};"
|
||||
+ "{lquote}{lquote}>{ldquote};"
|
||||
+ "{rquote}''>{rdquote};"
|
||||
+ "{rquote}{rquote}>{rdquote};"
|
||||
|
||||
//smart single quotes
|
||||
+ "{white})''>{lquote};"
|
||||
+ "{open})''>{lquote};"
|
||||
+ "{black})''>{rquote};"
|
||||
+ "''>{lquote};"
|
||||
|
||||
//smart doubles
|
||||
+ "{white}){dquote}>{ldquote};"
|
||||
+ "{open}){dquote}>{ldquote};"
|
||||
+ "{black}){dquote}>{rdquote};"
|
||||
+ "{dquote}>{ldquote};"
|
||||
|
||||
// join single guillemets
|
||||
+ "{rguill}{rguill}>{rdguill};"
|
||||
+ "'>>'>{rdguill};"
|
||||
+ "{lguill}{lguill}>{ldguill};"
|
||||
+ "'<<'>{ldguill};"
|
||||
|
||||
// prevent double spaces
|
||||
+ "\\ )\\ >;"
|
||||
|
||||
// join hyphens into dash
|
||||
+ "-->{mdash};"
|
||||
|
||||
//#######################################
|
||||
// Conversions back to input
|
||||
//#######################################
|
||||
|
||||
//smart quotes
|
||||
+ "''<{lquote};"
|
||||
+ "''<{rquote};"
|
||||
+ "{dquote}<{ldquote};"
|
||||
+ "{dquote}<{rdquote};"
|
||||
|
||||
//hyphens
|
||||
+ "--<{mdash};"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user