no message

X-SVN-Rev: 791
This commit is contained in:
John Fitzpatrick 2000-02-18 18:02:55 +00:00
parent 485377d2bf
commit 49f2784a30
14 changed files with 0 additions and 38209 deletions

View File

@ -1,276 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Fullwidth$Halfwidth extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule", ""
/* Mechanically generated from Unicode Character Database
*/
// multicharacter
+ "\u30AC<>\uFF76\uFF9E;" // to KATAKANA LETTER GA
+ "\u30AE<>\uFF77\uFF9E;" // to KATAKANA LETTER GI
+ "\u30B0<>\uFF78\uFF9E;" // to KATAKANA LETTER GU
+ "\u30B2<>\uFF79\uFF9E;" // to KATAKANA LETTER GE
+ "\u30B4<>\uFF7A\uFF9E;" // to KATAKANA LETTER GO
+ "\u30B6<>\uFF7B\uFF9E;" // to KATAKANA LETTER ZA
+ "\u30B8<>\uFF7C\uFF9E;" // to KATAKANA LETTER ZI
+ "\u30BA<>\uFF7D\uFF9E;" // to KATAKANA LETTER ZU
+ "\u30BC<>\uFF7E\uFF9E;" // to KATAKANA LETTER ZE
+ "\u30BE<>\uFF7F\uFF9E;" // to KATAKANA LETTER ZO
+ "\u30C0<>\uFF80\uFF9E;" // to KATAKANA LETTER DA
+ "\u30C2<>\uFF81\uFF9E;" // to KATAKANA LETTER DI
+ "\u30C5<>\uFF82\uFF9E;" // to KATAKANA LETTER DU
+ "\u30C7<>\uFF83\uFF9E;" // to KATAKANA LETTER DE
+ "\u30C9<>\uFF84\uFF9E;" // to KATAKANA LETTER DO
+ "\u30D0<>\uFF8A\uFF9E;" // to KATAKANA LETTER BA
+ "\u30D1<>\uFF8A\uFF9F;" // to KATAKANA LETTER PA
+ "\u30D3<>\uFF8B\uFF9E;" // to KATAKANA LETTER BI
+ "\u30D4<>\uFF8B\uFF9F;" // to KATAKANA LETTER PI
+ "\u30D6<>\uFF8C\uFF9E;" // to KATAKANA LETTER BU
+ "\u30D7<>\uFF8C\uFF9F;" // to KATAKANA LETTER PU
+ "\u30D9<>\uFF8D\uFF9E;" // to KATAKANA LETTER BE
+ "\u30DA<>\uFF8D\uFF9F;" // to KATAKANA LETTER PE
+ "\u30DC<>\uFF8E\uFF9E;" // to KATAKANA LETTER BO
+ "\u30DD<>\uFF8E\uFF9F;" // to KATAKANA LETTER PO
+ "\u30F4<>\uFF73\uFF9E;" // to KATAKANA LETTER VU
+ "\u30F7<>\uFF9C\uFF9E;" // to KATAKANA LETTER VA
+ "\u30FA<>\uFF66\uFF9E;" // to KATAKANA LETTER VO
// single character
+ "\uFF01<>'!';" // from FULLWIDTH EXCLAMATION MARK
+ "\uFF02<>'\"';" // from FULLWIDTH QUOTATION MARK
+ "\uFF03<>'#';" // from FULLWIDTH NUMBER SIGN
+ "\uFF04<>'$';" // from FULLWIDTH DOLLAR SIGN
+ "\uFF05<>'%';" // from FULLWIDTH PERCENT SIGN
+ "\uFF06<>'&';" // from FULLWIDTH AMPERSAND
+ "\uFF07<>'';" // from FULLWIDTH APOSTROPHE
+ "\uFF08<>'(';" // from FULLWIDTH LEFT PARENTHESIS
+ "\uFF09<>')';" // from FULLWIDTH RIGHT PARENTHESIS
+ "\uFF0A<>'*';" // from FULLWIDTH ASTERISK
+ "\uFF0B<>'+';" // from FULLWIDTH PLUS SIGN
+ "\uFF0C<>',';" // from FULLWIDTH COMMA
+ "\uFF0D<>'-';" // from FULLWIDTH HYPHEN-MINUS
+ "\uFF0E<>'.';" // from FULLWIDTH FULL STOP
+ "\uFF0F<>'/';" // from FULLWIDTH SOLIDUS
+ "\uFF10<>'0';" // from FULLWIDTH DIGIT ZERO
+ "\uFF11<>'1';" // from FULLWIDTH DIGIT ONE
+ "\uFF12<>'2';" // from FULLWIDTH DIGIT TWO
+ "\uFF13<>'3';" // from FULLWIDTH DIGIT THREE
+ "\uFF14<>'4';" // from FULLWIDTH DIGIT FOUR
+ "\uFF15<>'5';" // from FULLWIDTH DIGIT FIVE
+ "\uFF16<>'6';" // from FULLWIDTH DIGIT SIX
+ "\uFF17<>'7';" // from FULLWIDTH DIGIT SEVEN
+ "\uFF18<>'8';" // from FULLWIDTH DIGIT EIGHT
+ "\uFF19<>'9';" // from FULLWIDTH DIGIT NINE
+ "\uFF1A<>':';" // from FULLWIDTH COLON
+ "\uFF1B<>';';" // from FULLWIDTH SEMICOLON
+ "\uFF1C<>'<';" // from FULLWIDTH LESS-THAN SIGN
+ "\uFF1D<>'=';" // from FULLWIDTH EQUALS SIGN
+ "\uFF1E<>'>';" // from FULLWIDTH GREATER-THAN SIGN
+ "\uFF1F<>'?';" // from FULLWIDTH QUESTION MARK
+ "\uFF20<>'@';" // from FULLWIDTH COMMERCIAL AT
+ "\uFF21<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
+ "\uFF22<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
+ "\uFF23<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
+ "\uFF24<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
+ "\uFF25<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
+ "\uFF26<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
+ "\uFF27<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
+ "\uFF28<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
+ "\uFF29<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
+ "\uFF2A<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
+ "\uFF2B<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
+ "\uFF2C<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
+ "\uFF2D<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
+ "\uFF2E<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
+ "\uFF2F<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
+ "\uFF30<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
+ "\uFF31<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
+ "\uFF32<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
+ "\uFF33<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
+ "\uFF34<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
+ "\uFF35<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
+ "\uFF36<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
+ "\uFF37<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
+ "\uFF38<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
+ "\uFF39<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
+ "\uFF3A<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
+ "\uFF3B<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
+ "\uFF3C<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
+ "\uFF3D<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
+ "\uFF3E<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
+ "\uFF3F<>'_';" // from FULLWIDTH LOW LINE
+ "\uFF40<>'`';" // from FULLWIDTH GRAVE ACCENT
+ "\uFF41<>a;" // from FULLWIDTH LATIN SMALL LETTER A
+ "\uFF42<>b;" // from FULLWIDTH LATIN SMALL LETTER B
+ "\uFF43<>c;" // from FULLWIDTH LATIN SMALL LETTER C
+ "\uFF44<>d;" // from FULLWIDTH LATIN SMALL LETTER D
+ "\uFF45<>e;" // from FULLWIDTH LATIN SMALL LETTER E
+ "\uFF46<>f;" // from FULLWIDTH LATIN SMALL LETTER F
+ "\uFF47<>g;" // from FULLWIDTH LATIN SMALL LETTER G
+ "\uFF48<>h;" // from FULLWIDTH LATIN SMALL LETTER H
+ "\uFF49<>i;" // from FULLWIDTH LATIN SMALL LETTER I
+ "\uFF4A<>j;" // from FULLWIDTH LATIN SMALL LETTER J
+ "\uFF4B<>k;" // from FULLWIDTH LATIN SMALL LETTER K
+ "\uFF4C<>l;" // from FULLWIDTH LATIN SMALL LETTER L
+ "\uFF4D<>m;" // from FULLWIDTH LATIN SMALL LETTER M
+ "\uFF4E<>n;" // from FULLWIDTH LATIN SMALL LETTER N
+ "\uFF4F<>o;" // from FULLWIDTH LATIN SMALL LETTER O
+ "\uFF50<>p;" // from FULLWIDTH LATIN SMALL LETTER P
+ "\uFF51<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
+ "\uFF52<>r;" // from FULLWIDTH LATIN SMALL LETTER R
+ "\uFF53<>s;" // from FULLWIDTH LATIN SMALL LETTER S
+ "\uFF54<>t;" // from FULLWIDTH LATIN SMALL LETTER T
+ "\uFF55<>u;" // from FULLWIDTH LATIN SMALL LETTER U
+ "\uFF56<>v;" // from FULLWIDTH LATIN SMALL LETTER V
+ "\uFF57<>w;" // from FULLWIDTH LATIN SMALL LETTER W
+ "\uFF58<>x;" // from FULLWIDTH LATIN SMALL LETTER X
+ "\uFF59<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
+ "\uFF5A<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
+ "\uFF5B<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
+ "\uFF5C<>'|';" // from FULLWIDTH VERTICAL LINE
+ "\uFF5D<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
+ "\uFF5E<>'~';" // from FULLWIDTH TILDE
+ "\u3002<>\uFF61;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
+ "\u300C<>\uFF62;" // to HALFWIDTH LEFT CORNER BRACKET
+ "\u300D<>\uFF63;" // to HALFWIDTH RIGHT CORNER BRACKET
+ "\u3001<>\uFF64;" // to HALFWIDTH IDEOGRAPHIC COMMA
+ "\u30FB<>\uFF65;" // to HALFWIDTH KATAKANA MIDDLE DOT
+ "\u30F2<>\uFF66;" // to HALFWIDTH KATAKANA LETTER WO
+ "\u30A1<>\uFF67;" // to HALFWIDTH KATAKANA LETTER SMALL A
+ "\u30A3<>\uFF68;" // to HALFWIDTH KATAKANA LETTER SMALL I
+ "\u30A5<>\uFF69;" // to HALFWIDTH KATAKANA LETTER SMALL U
+ "\u30A7<>\uFF6A;" // to HALFWIDTH KATAKANA LETTER SMALL E
+ "\u30A9<>\uFF6B;" // to HALFWIDTH KATAKANA LETTER SMALL O
+ "\u30E3<>\uFF6C;" // to HALFWIDTH KATAKANA LETTER SMALL YA
+ "\u30E5<>\uFF6D;" // to HALFWIDTH KATAKANA LETTER SMALL YU
+ "\u30E7<>\uFF6E;" // to HALFWIDTH KATAKANA LETTER SMALL YO
+ "\u30C3<>\uFF6F;" // to HALFWIDTH KATAKANA LETTER SMALL TU
+ "\u30FC<>\uFF70;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ "\u30A2<>\uFF71;" // to HALFWIDTH KATAKANA LETTER A
+ "\u30A4<>\uFF72;" // to HALFWIDTH KATAKANA LETTER I
+ "\u30A6<>\uFF73;" // to HALFWIDTH KATAKANA LETTER U
+ "\u30A8<>\uFF74;" // to HALFWIDTH KATAKANA LETTER E
+ "\u30AA<>\uFF75;" // to HALFWIDTH KATAKANA LETTER O
+ "\u30AB<>\uFF76;" // to HALFWIDTH KATAKANA LETTER KA
+ "\u30AD<>\uFF77;" // to HALFWIDTH KATAKANA LETTER KI
+ "\u30AF<>\uFF78;" // to HALFWIDTH KATAKANA LETTER KU
+ "\u30B1<>\uFF79;" // to HALFWIDTH KATAKANA LETTER KE
+ "\u30B3<>\uFF7A;" // to HALFWIDTH KATAKANA LETTER KO
+ "\u30B5<>\uFF7B;" // to HALFWIDTH KATAKANA LETTER SA
+ "\u30B7<>\uFF7C;" // to HALFWIDTH KATAKANA LETTER SI
+ "\u30B9<>\uFF7D;" // to HALFWIDTH KATAKANA LETTER SU
+ "\u30BB<>\uFF7E;" // to HALFWIDTH KATAKANA LETTER SE
+ "\u30BD<>\uFF7F;" // to HALFWIDTH KATAKANA LETTER SO
+ "\u30BF<>\uFF80;" // to HALFWIDTH KATAKANA LETTER TA
+ "\u30C1<>\uFF81;" // to HALFWIDTH KATAKANA LETTER TI
+ "\u30C4<>\uFF82;" // to HALFWIDTH KATAKANA LETTER TU
+ "\u30C6<>\uFF83;" // to HALFWIDTH KATAKANA LETTER TE
+ "\u30C8<>\uFF84;" // to HALFWIDTH KATAKANA LETTER TO
+ "\u30CA<>\uFF85;" // to HALFWIDTH KATAKANA LETTER NA
+ "\u30CB<>\uFF86;" // to HALFWIDTH KATAKANA LETTER NI
+ "\u30CC<>\uFF87;" // to HALFWIDTH KATAKANA LETTER NU
+ "\u30CD<>\uFF88;" // to HALFWIDTH KATAKANA LETTER NE
+ "\u30CE<>\uFF89;" // to HALFWIDTH KATAKANA LETTER NO
+ "\u30CF<>\uFF8A;" // to HALFWIDTH KATAKANA LETTER HA
+ "\u30D2<>\uFF8B;" // to HALFWIDTH KATAKANA LETTER HI
+ "\u30D5<>\uFF8C;" // to HALFWIDTH KATAKANA LETTER HU
+ "\u30D8<>\uFF8D;" // to HALFWIDTH KATAKANA LETTER HE
+ "\u30DB<>\uFF8E;" // to HALFWIDTH KATAKANA LETTER HO
+ "\u30DE<>\uFF8F;" // to HALFWIDTH KATAKANA LETTER MA
+ "\u30DF<>\uFF90;" // to HALFWIDTH KATAKANA LETTER MI
+ "\u30E0<>\uFF91;" // to HALFWIDTH KATAKANA LETTER MU
+ "\u30E1<>\uFF92;" // to HALFWIDTH KATAKANA LETTER ME
+ "\u30E2<>\uFF93;" // to HALFWIDTH KATAKANA LETTER MO
+ "\u30E4<>\uFF94;" // to HALFWIDTH KATAKANA LETTER YA
+ "\u30E6<>\uFF95;" // to HALFWIDTH KATAKANA LETTER YU
+ "\u30E8<>\uFF96;" // to HALFWIDTH KATAKANA LETTER YO
+ "\u30E9<>\uFF97;" // to HALFWIDTH KATAKANA LETTER RA
+ "\u30EA<>\uFF98;" // to HALFWIDTH KATAKANA LETTER RI
+ "\u30EB<>\uFF99;" // to HALFWIDTH KATAKANA LETTER RU
+ "\u30EC<>\uFF9A;" // to HALFWIDTH KATAKANA LETTER RE
+ "\u30ED<>\uFF9B;" // to HALFWIDTH KATAKANA LETTER RO
+ "\u30EF<>\uFF9C;" // to HALFWIDTH KATAKANA LETTER WA
+ "\u30F3<>\uFF9D;" // to HALFWIDTH KATAKANA LETTER N
+ "\u3099<>\uFF9E;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
+ "\u309A<>\uFF9F;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+ "\u1160<>\uFFA0;" // to HALFWIDTH HANGUL FILLER
+ "\u1100<>\uFFA1;" // to HALFWIDTH HANGUL LETTER KIYEOK
+ "\u1101<>\uFFA2;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
+ "\u11AA<>\uFFA3;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
+ "\u1102<>\uFFA4;" // to HALFWIDTH HANGUL LETTER NIEUN
+ "\u11AC<>\uFFA5;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
+ "\u11AD<>\uFFA6;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
+ "\u1103<>\uFFA7;" // to HALFWIDTH HANGUL LETTER TIKEUT
+ "\u1104<>\uFFA8;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
+ "\u1105<>\uFFA9;" // to HALFWIDTH HANGUL LETTER RIEUL
+ "\u11B0<>\uFFAA;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
+ "\u11B1<>\uFFAB;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
+ "\u11B2<>\uFFAC;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
+ "\u11B3<>\uFFAD;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
+ "\u11B4<>\uFFAE;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
+ "\u11B5<>\uFFAF;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
+ "\u111A<>\uFFB0;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
+ "\u1106<>\uFFB1;" // to HALFWIDTH HANGUL LETTER MIEUM
+ "\u1107<>\uFFB2;" // to HALFWIDTH HANGUL LETTER PIEUP
+ "\u1108<>\uFFB3;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
+ "\u1121<>\uFFB4;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
+ "\u1109<>\uFFB5;" // to HALFWIDTH HANGUL LETTER SIOS
+ "\u110A<>\uFFB6;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
+ "\u110B<>\uFFB7;" // to HALFWIDTH HANGUL LETTER IEUNG
+ "\u110C<>\uFFB8;" // to HALFWIDTH HANGUL LETTER CIEUC
+ "\u110D<>\uFFB9;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
+ "\u110E<>\uFFBA;" // to HALFWIDTH HANGUL LETTER CHIEUCH
+ "\u110F<>\uFFBB;" // to HALFWIDTH HANGUL LETTER KHIEUKH
+ "\u1110<>\uFFBC;" // to HALFWIDTH HANGUL LETTER THIEUTH
+ "\u1111<>\uFFBD;" // to HALFWIDTH HANGUL LETTER PHIEUPH
+ "\u1112<>\uFFBE;" // to HALFWIDTH HANGUL LETTER HIEUH
+ "\u1161<>\uFFC2;" // to HALFWIDTH HANGUL LETTER A
+ "\u1162<>\uFFC3;" // to HALFWIDTH HANGUL LETTER AE
+ "\u1163<>\uFFC4;" // to HALFWIDTH HANGUL LETTER YA
+ "\u1164<>\uFFC5;" // to HALFWIDTH HANGUL LETTER YAE
+ "\u1165<>\uFFC6;" // to HALFWIDTH HANGUL LETTER EO
+ "\u1166<>\uFFC7;" // to HALFWIDTH HANGUL LETTER E
+ "\u1167<>\uFFCA;" // to HALFWIDTH HANGUL LETTER YEO
+ "\u1168<>\uFFCB;" // to HALFWIDTH HANGUL LETTER YE
+ "\u1169<>\uFFCC;" // to HALFWIDTH HANGUL LETTER O
+ "\u116A<>\uFFCD;" // to HALFWIDTH HANGUL LETTER WA
+ "\u116B<>\uFFCE;" // to HALFWIDTH HANGUL LETTER WAE
+ "\u116C<>\uFFCF;" // to HALFWIDTH HANGUL LETTER OE
+ "\u116D<>\uFFD2;" // to HALFWIDTH HANGUL LETTER YO
+ "\u116E<>\uFFD3;" // to HALFWIDTH HANGUL LETTER U
+ "\u116F<>\uFFD4;" // to HALFWIDTH HANGUL LETTER WEO
+ "\u1170<>\uFFD5;" // to HALFWIDTH HANGUL LETTER WE
+ "\u1171<>\uFFD6;" // to HALFWIDTH HANGUL LETTER WI
+ "\u1172<>\uFFD7;" // to HALFWIDTH HANGUL LETTER YU
+ "\u1173<>\uFFDA;" // to HALFWIDTH HANGUL LETTER EU
+ "\u1174<>\uFFDB;" // to HALFWIDTH HANGUL LETTER YI
+ "\u1175<>\uFFDC;" // to HALFWIDTH HANGUL LETTER I
+ "\uFFE0<>'\u00a2';" // from FULLWIDTH CENT SIGN
+ "\uFFE1<>'\u00a3';" // from FULLWIDTH POUND SIGN
+ "\uFFE2<>'\u00ac';" // from FULLWIDTH NOT SIGN
+ "\uFFE3<>' '\u0304;" // from FULLWIDTH MACRON
+ "\uFFE4<>'\u00a6';" // from FULLWIDTH BROKEN BAR
+ "\uFFE5<>'\u00a5';" // from FULLWIDTH YEN SIGN
+ "\uFFE6<>\u20A9;" // from FULLWIDTH WON SIGN
+ "\u2502<>\uFFE8;" // to HALFWIDTH FORMS LIGHT VERTICAL
+ "\u2190<>\uFFE9;" // to HALFWIDTH LEFTWARDS ARROW
+ "\u2191<>\uFFEA;" // to HALFWIDTH UPWARDS ARROW
+ "\u2192<>\uFFEB;" // to HALFWIDTH RIGHTWARDS ARROW
+ "\u2193<>\uFFEC;" // to HALFWIDTH DOWNWARDS ARROW
+ "\u25A0<>\uFFED;" // to HALFWIDTH BLACK SQUARE
+ "\u25CB<>\uFFEE;" // to HALFWIDTH WHITE CIRCLE
}
};
}
}

View File

@ -1,129 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$KeyboardEscape$Latin1 extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule",
"esc='';"
+ "grave=`;"
+ "acute='';"
+ "hat=^;"
+ "tilde=~;"
+ "umlaut=:;"
+ "ring=.;"
+ "cedilla=,;"
+ "slash=/;"
+ "super=^;"
// Make keyboard entry of {esc} possible
// and of backslash
+ "'\\'{esc}>{esc};"
+ "'\\\\'>'\\';"
// Long keys
+ "cur{esc}>\u00A4;"
+ "sec{esc}>\u00A7;"
+ "not{esc}>\u00AC;"
+ "mul{esc}>\u00D7;"
+ "div{esc}>\u00F7;"
+ "\\ {esc}>\u00A0;" // non-breaking space
+ "!{esc}>\u00A1;" // inverted exclamation
+ "c/{esc}>\u00A2;" // cent sign
+ "lb{esc}>\u00A3;" // pound sign
+ "'|'{esc}>\u00A6;" // broken vertical bar
+ ":{esc}>\u00A8;" // umlaut
+ "{super}a{esc}>\u00AA;" // feminine ordinal
+ "'<<'{esc}>\u00AB;"
+ "r{esc}>\u00AE;"
+ "--{esc}>\u00AF;"
+ "-{esc}>\u00AD;"
+ "+-{esc}>\u00B1;"
+ "{super}2{esc}>\u00B2;"
+ "{super}3{esc}>\u00B3;"
+ "{acute}{esc}>\u00B4;"
+ "m{esc}>\u00B5;"
+ "para{esc}>\u00B6;"
+ "dot{esc}>\u00B7;"
+ "{cedilla}{esc}>\u00B8;"
+ "{super}1{esc}>\u00B9;"
+ "{super}o{esc}>\u00BA;" // masculine ordinal
+ "'>>'{esc}>\u00BB;"
+ "1/4{esc}>\u00BC;"
+ "1/2{esc}>\u00BD;"
+ "3/4{esc}>\u00BE;"
+ "?{esc}>\u00BF;"
+ "A{grave}{esc}>\u00C0;"
+ "A{acute}{esc}>\u00C1;"
+ "A{hat}{esc}>\u00C2;"
+ "A{tilde}{esc}>\u00C3;"
+ "A{umlaut}{esc}>\u00C4;"
+ "A{ring}{esc}>\u00C5;"
+ "AE{esc}>\u00C6;"
+ "C{cedilla}{esc}>\u00C7;"
+ "E{grave}{esc}>\u00C8;"
+ "E{acute}{esc}>\u00C9;"
+ "E{hat}{esc}>\u00CA;"
+ "E{umlaut}{esc}>\u00CB;"
+ "I{grave}{esc}>\u00CC;"
+ "I{acute}{esc}>\u00CD;"
+ "I{hat}{esc}>\u00CE;"
+ "I{umlaut}{esc}>\u00CF;"
+ "D-{esc}>\u00D0;"
+ "N{tilde}{esc}>\u00D1;"
+ "O{grave}{esc}>\u00D2;"
+ "O{acute}{esc}>\u00D3;"
+ "O{hat}{esc}>\u00D4;"
+ "O{tilde}{esc}>\u00D5;"
+ "O{umlaut}{esc}>\u00D6;"
+ "O{slash}{esc}>\u00D8;"
+ "U{grave}{esc}>\u00D9;"
+ "U{acute}{esc}>\u00DA;"
+ "U{hat}{esc}>\u00DB;"
+ "U{umlaut}{esc}>\u00DC;"
+ "Y{acute}{esc}>\u00DD;"
+ "TH{esc}>\u00DE;"
+ "ss{esc}>\u00DF;"
+ "a{grave}{esc}>\u00E0;"
+ "a{acute}{esc}>\u00E1;"
+ "a{hat}{esc}>\u00E2;"
+ "a{tilde}{esc}>\u00E3;"
+ "a{umlaut}{esc}>\u00E4;"
+ "a{ring}{esc}>\u00E5;"
+ "ae{esc}>\u00E6;"
+ "c{cedilla}{esc}>\u00E7;"
+ "c{esc}>\u00A9;" // copyright - after c{cedilla}
+ "e{grave}{esc}>\u00E8;"
+ "e{acute}{esc}>\u00E9;"
+ "e{hat}{esc}>\u00EA;"
+ "e{umlaut}{esc}>\u00EB;"
+ "i{grave}{esc}>\u00EC;"
+ "i{acute}{esc}>\u00ED;"
+ "i{hat}{esc}>\u00EE;"
+ "i{umlaut}{esc}>\u00EF;"
+ "d-{esc}>\u00F0;"
+ "n{tilde}{esc}>\u00F1;"
+ "o{grave}{esc}>\u00F2;"
+ "o{acute}{esc}>\u00F3;"
+ "o{hat}{esc}>\u00F4;"
+ "o{tilde}{esc}>\u00F5;"
+ "o{umlaut}{esc}>\u00F6;"
+ "o{slash}{esc}>\u00F8;"
+ "o{esc}>\u00B0;"
+ "u{grave}{esc}>\u00F9;"
+ "u{acute}{esc}>\u00FA;"
+ "u{hat}{esc}>\u00FB;"
+ "u{umlaut}{esc}>\u00FC;"
+ "y{acute}{esc}>\u00FD;"
+ "y{esc}>\u00A5;" // yen sign
+ "th{esc}>\u00FE;"
//masked: + "ss{esc}>\u00FF;"
}
};
}
}

View File

@ -1,241 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Arabic extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule",
// To Do: finish adding shadda, add sokoon
"alefmadda=\u0622;"+
"alefuhamza=\u0623;"+
"wauuhamza=\u0624;"+
"alefhamza=\u0625;"+
"yehuhamza=\u0626;"+
"alef=\u0627;"+
"beh=\u0628;"+
"tehmarbuta=\u0629;"+
"teh=\u062A;"+
"theh=\u062B;"+
"geem=\u062C;"+
"hah=\u062D;"+
"kha=\u062E;"+
"dal=\u062F;"+
"dhal=\u0630;"+
"reh=\u0631;"+
"zain=\u0632;"+
"seen=\u0633;"+
"sheen=\u0634;"+
"sad=\u0635;"+
"dad=\u0636;"+
"tah=\u0637;"+
"zah=\u0638;"+
"ein=\u0639;"+
"ghein=\u063A;"+
"feh=\u0641;"+
"qaaf=\u0642;"+
"kaf=\u0643;"+
"lam=\u0644;"+
"meem=\u0645;"+
"noon=\u0646;"+
"heh=\u0647;"+
"wau=\u0648;"+
"yehmaqsura=\u0649;"+
"yeh=\u064A;"+
"peh=\u06A4;"+
"hamza=\u0621;"+
"fathatein=\u064B;"+
"dammatein=\u064C;"+
"kasratein=\u064D;"+
"fatha=\u064E;"+
"damma=\u064F;"+
"kasra=\u0650;"+
"shadda=\u0651;"+
"sokoon=\u0652;"+
// convert English to Arabic
"Arabic>"+
"\u062a\u062a\u0645\u062a\u0639' '"+
"\u0627\u0644\u0644\u063a\u0629' '"+
"\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629' '"+
"\u0628\u0628\u0646\u0638\u0645' '"+
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"+
"\u062c\u0645\u064a\u0644\u0629;"+
"ai>{alefmadda};"+
"ae>{alefuhamza};"+
"ao>{alefhamza};"+
"aa>{alef};"+
"an>{fathatein};"+
"a>{fatha};"+
"b>{beh};"+
"c>{kaf};"+
"{dhal})dh>{shadda};"+
"dh>{dhal};"+
"{dad})dd>{shadda};"+
"dd>{dad};"+
"{dal})d>{shadda};"+
"d>{dal};"+
"e>{ein};"+
"f>{feh};"+
"gh>{ghein};"+
"g>{geem};"+
"hh>{hah};"+
"h>{heh};"+
"ii>{kasratein};"+
"i>{kasra};"+
"j>{geem};"+
"kh>{kha};"+
"k>{kaf};"+
"l>{lam};"+
"m>{meem};"+
"n>{noon};"+
"o>{hamza};"+
"p>{peh};"+
"q>{qaaf};"+
"r>{reh};"+
"sh>{sheen};"+
"ss>{sad};"+
"s>{seen};"+
"th>{theh};"+
"tm>{tehmarbuta};"+
"tt>{tah};"+
"t>{teh};"+
"uu>{dammatein};"+
"u>{damma};"+
"v>{beh};"+
"we>{wauuhamza};"+
"w>{wau};"+
"x>{kaf}{shadda}{seen};"+
"ye>{yehuhamza};"+
"ym>{yehmaqsura};"+
"y>{yeh};"+
"zz>{zah};"+
"z>{zain};"+
"0>\u0660;"+ // Arabic digit 0
"1>\u0661;"+ // Arabic digit 1
"2>\u0662;"+ // Arabic digit 2
"3>\u0663;"+ // Arabic digit 3
"4>\u0664;"+ // Arabic digit 4
"5>\u0665;"+ // Arabic digit 5
"6>\u0666;"+ // Arabic digit 6
"7>\u0667;"+ // Arabic digit 7
"8>\u0668;"+ // Arabic digit 8
"9>\u0669;"+ // Arabic digit 9
"%>\u066A;"+ // Arabic %
".>\u066B;"+ // Arabic decimal separator
",>\u066C;"+ // Arabic thousands separator
"*>\u066D;"+ // Arabic five-pointed star
"`0>0;"+ // Escaped forms of the above
"`1>1;"+
"`2>2;"+
"`3>3;"+
"`4>4;"+
"`5>5;"+
"`6>6;"+
"`7>7;"+
"`8>8;"+
"`9>9;"+
"`%>%;"+
"`.>.;"+
"`,>,;"+
"`*>*;"+
"``>`;"+
"''>;"+
// now Arabic to English
"''ai<a){alefmadda};"+
"ai<{alefmadda};"+
"''ae<a){alefuhamza};"+
"ae<{alefuhamza};"+
"''ao<a){alefhamza};"+
"ao<{alefhamza};"+
"''aa<a){alef};"+
"aa<{alef};"+
"''an<a){fathatein};"+
"an<{fathatein};"+
"''a<a){fatha};"+
"a<{fatha};"+
"b<{beh};"+
"''dh<d){dhal};"+
"dh<{dhal};"+
"''dd<d){dad};"+
"dd<{dad};"+
"''d<d){dal};"+
"d<{dal};"+
"''e<a){ein};"+
"''e<w){ein};"+
"''e<y){ein};"+
"e<{ein};"+
"f<{feh};"+
"gh<{ghein};"+
"''hh<d){hah};"+
"''hh<t){hah};"+
"''hh<k){hah};"+
"''hh<s){hah};"+
"hh<{hah};"+
"''h<d){heh};"+
"''h<t){heh};"+
"''h<k){heh};"+
"''h<s){heh};"+
"h<{heh};"+
"''ii<i){kasratein};"+
"ii<{kasratein};"+
"''i<i){kasra};"+
"i<{kasra};"+
"j<{geem};"+
"kh<{kha};"+
"x<{kaf}{shadda}{seen};"+
"k<{kaf};"+
"l<{lam};"+
"''m<y){meem};"+
"''m<t){meem};"+
"m<{meem};"+
"n<{noon};"+
"''o<a){hamza};"+
"o<{hamza};"+
"p<{peh};"+
"q<{qaaf};"+
"r<{reh};"+
"sh<{sheen};"+
"''ss<s){sad};"+
"ss<{sad};"+
"''s<s){seen};"+
"s<{seen};"+
"th<{theh};"+
"tm<{tehmarbuta};"+
"''tt<t){tah};"+
"tt<{tah};"+
"''t<t){teh};"+
"t<{teh};"+
"''uu<u){dammatein};"+
"uu<{dammatein};"+
"''u<u){damma};"+
"u<{damma};"+
"we<{wauuhamza};"+
"w<{wau};"+
"ye<{yehuhamza};"+
"ym<{yehmaqsura};"+
"''y<y){yeh};"+
"y<{yeh};"+
"''zz<z){zah};"+
"zz<{zah};"+
"''z<z){zain};"+
"z<{zain};"+
"dh<dh){shadda};"+
"dd<dd){shadda};"+
"''d<d){shadda};"
}
};
}
}

View File

@ -1,310 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Cyrillic extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule", ""
/* This class is designed to be a general Latin-Cyrillic
transliteration. The standard Russian transliterations
are generally used for the letters from Russian,
with additional Cyrillic characters given consistent
mappings.
*/
+ "S-hacek=\u0160;"
+ "s-hacek=\u0161;"
+ "YO=\u0401;"
+ "J=\u0408;"
+ "A=\u0410;"
+ "B=\u0411;"
+ "V=\u0412;"
+ "G=\u0413;"
+ "D=\u0414;"
+ "YE=\u0415;"
+ "ZH=\u0416;"
+ "Z=\u0417;"
+ "YI=\u0418;"
+ "Y=\u0419;"
+ "K=\u041A;"
+ "L=\u041B;"
+ "M=\u041C;"
+ "N=\u041D;"
+ "O=\u041E;"
+ "P=\u041F;"
+ "R=\u0420;"
+ "S=\u0421;"
+ "T=\u0422;"
+ "U=\u0423;"
+ "F=\u0424;"
+ "KH=\u0425;"
+ "TS=\u0426;"
+ "CH=\u0427;"
+ "SH=\u0428;"
+ "SHCH=\u0429;"
+ "HARD=\u042A;"
+ "I=\u042B;"
+ "SOFT=\u042C;"
+ "E=\u042D;"
+ "YU=\u042E;"
+ "YA=\u042F;"
// Lowercase
+ "a=\u0430;"
+ "b=\u0431;"
+ "v=\u0432;"
+ "g=\u0433;"
+ "d=\u0434;"
+ "ye=\u0435;"
+ "zh=\u0436;"
+ "z=\u0437;"
+ "yi=\u0438;"
+ "y=\u0439;"
+ "k=\u043a;"
+ "l=\u043b;"
+ "m=\u043c;"
+ "n=\u043d;"
+ "o=\u043e;"
+ "p=\u043f;"
+ "r=\u0440;"
+ "s=\u0441;"
+ "t=\u0442;"
+ "u=\u0443;"
+ "f=\u0444;"
+ "kh=\u0445;"
+ "ts=\u0446;"
+ "ch=\u0447;"
+ "sh=\u0448;"
+ "shch=\u0449;"
+ "hard=\u044a;"
+ "i=\u044b;"
+ "soft=\u044c;"
+ "e=\u044d;"
+ "yu=\u044e;"
+ "ya=\u044f;"
+ "yo=\u0451;"
+ "j=\u0458;"
// variables
// some are duplicated so lowercasing works
+ "csoft=[eiyEIY];"
+ "CSOFT=[eiyEIY];"
+ "BECOMES_H=[{HARD}{hard}];"
+ "becomes_h=[{HARD}{hard}];"
+ "BECOMES_S=[{S}{s}];"
+ "becomes_s=[{S}{s}];"
+ "BECOMES_C=[{CH}{ch}];"
+ "becomes_c=[{CH}{ch}];"
+ "BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
+ "becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
+ "letter=[[:Lu:][:Ll:]];"
+ "lower=[[:Ll:]];"
/*
Modified to combine display transliterator and typing transliterator.
The display mapping uses accents for the "soft" vowels.
It does not, although it could, use characters like \u0161 instead of digraphs
like sh.
*/
// #############################################
// Special titlecase forms, not duplicated
// #############################################
+ "Ch>{CH};" + "Ch<{CH}({lower};"
+ "Kh>{KH};" + "Kh<{KH}({lower};"
+ "Shch>{SHCH};" + "Shch<{SHCH}({lower};"
+ "Sh>{SH};" + "Sh<{SH}({lower};"
+ "Ts>{TS};" + "Ts<{TS}({lower};"
+ "Zh>{ZH};" + "Zh<{ZH}({lower};"
+ "Yi>{YI};" //+ "Yi<{YI}({lower};"
+ "Ye>{YE};" //+ "Ye<{YE}({lower};"
+ "Yo>{YO};" //+ "Yo<{YO}({lower};"
+ "Yu>{YU};" //+ "Yu<{YU}({lower};"
+ "Ya>{YA};" //+ "Ya<{YA}({lower};"
// #############################################
// Rules to Duplicate
// To get the lowercase versions, copy these and lowercase
// #############################################
// variant spellings in English
+ "SHTCH>{SHCH};"
+ "TCH>{CH};"
+ "TH>{Z};"
+ "Q>{K};"
+ "WH>{V};"
+ "W>{V};"
+ "X>{K}{S};" //+ "X<{K}{S};"
// Separate letters that would otherwise join
+ "SH''<{SH}({BECOMES_C};"
+ "T''<{T}({BECOMES_S};"
+ "K''<{K}({BECOMES_H};"
+ "S''<{S}({BECOMES_H};"
+ "T''<{T}({BECOMES_H};"
+ "Z''<{Z}({BECOMES_H};"
+ "Y''<{Y}({BECOMES_VOWEL};"
// Main letters
+ "A<>{A};"
+ "B<>{B};"
+ "CH<>{CH};"
+ "D<>{D};"
+ "E<>{E};"
+ "F<>{F};"
+ "G<>{G};"
+ "\u00cc<>{YI};"
+ "I<>{I};"
+ "KH<>{KH};"
+ "K<>{K};"
+ "L<>{L};"
+ "M<>{M};"
+ "N<>{N};"
+ "O<>{O};"
+ "P<>{P};"
+ "R<>{R};"
+ "SHCH<>{SHCH};"
+ "SH>{SH};" //+ "SH<{SH};"
+ "{S-hacek}<>{SH};"
+ "S<>{S};"
+ "TS<>{TS};"
+ "T<>{T};"
+ "U<>{U};"
+ "V<>{V};"
//\u00cc\u00c0\u00c8\u00d2\u00d9
+ "YE>{YE};" //+ "YE<{YE};"
+ "\u00c8<>{YE};"
+ "YO>{YO};" //+ "YO<{YO};"
+ "\u00d2<>{YO};"
+ "YU>{YU};" //+ "YU<{YU};"
+ "\u00d9<>{YU};"
+ "YA>{YA};" //+ "YA<{YA};"
+ "\u00c0<>{YA};"
+ "Y<>{Y};"
+ "ZH<>{ZH};"
+ "Z<>{Z};"
+ "H<>{HARD};"
+ "\u0178<>{SOFT};"
// Non-russian
+ "J<>{J};"
// variant spellings in English
+ "C({csoft}>{S};"
+ "C>{K};"
// #############################################
// Duplicated Rules
// Copy and lowercase the above rules
// #############################################
// variant spellings in english
+ "shtch>{shch};"
+ "tch>{ch};"
+ "th>{z};"
+ "q>{k};"
+ "wh>{v};"
+ "w>{v};"
+ "x>{k}{s};" //+ "x<{k}{s};"
// separate letters that would otherwise join
+ "sh''<{sh}({becomes_c};"
+ "t''<{t}({becomes_s};"
+ "k''<{k}({becomes_h};"
+ "s''<{s}({becomes_h};"
+ "t''<{t}({becomes_h};"
+ "z''<{z}({becomes_h};"
+ "y''<{y}({becomes_vowel};"
// main letters
+ "a<>{a};"
+ "b<>{b};"
+ "ch<>{ch};"
+ "d<>{d};"
+ "e<>{e};"
+ "f<>{f};"
+ "g<>{g};"
+ "\u00ec<>{yi};"
+ "i<>{i};"
+ "kh<>{kh};"
+ "k<>{k};"
+ "l<>{l};"
+ "m<>{m};"
+ "n<>{n};"
+ "o<>{o};"
+ "p<>{p};"
+ "r<>{r};"
+ "shch<>{shch};"
+ "sh>{sh};" //+ "sh<{sh};"
+ "{s-hacek}<>{sh};"
+ "s<>{s};"
+ "ts<>{ts};"
+ "t<>{t};"
+ "u<>{u};"
+ "v<>{v};"
//\u00ec\u00e0\u00e8\u00f2\u00f9
+ "ye>{ye};" //+ "ye<{ye};"
+ "\u00e8<>{ye};"
+ "yo>{yo};" //+ "yo<{yo};"
+ "\u00f2<>{yo};"
+ "yu>{yu};" //+ "yu<{yu};"
+ "\u00f9<>{yu};"
+ "ya>{ya};" //+ "ya<{ya};"
+ "\u00e0<>{ya};"
+ "y<>{y};"
+ "zh<>{zh};"
+ "z<>{z};"
+ "h<>{hard};"
+ "\u00ff<>{soft};"
// non-russian
+ "j<>{j};"
// variant spellings in english
+ "c({csoft}>{s};"
+ "c>{k};"
// #############################################
// End of Duplicated Rules
// #############################################
//generally the last rule
+ "''>;"
//the end
}
};
}
}

View File

@ -1,409 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Devanagari extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule",
//#####################################################################
// Keyboard Transliteration Table
//#####################################################################
// Conversions should be:
// 1. complete
// * convert every sequence of Latin letters (a to z plus apostrophe)
// to a sequence of Native letters
// * convert every sequence of Native letters to Latin letters
// 2. reversable
// * any string of Native converted to Latin and back should be the same
// * this is not true for English converted to Native & back, e.g.:
// k -> {kaf} -> k
// c -> {kaf} -> k
//#####################################################################
// Sequences of Latin letters may convert to a single Native letter.
// When this is the case, an apostrophe can be used to indicate separate
// letters.$
// E.g. sh -> {shin}
// s'h -> {sin}{heh}
// ss -> {sad}
// s's -> {sin}{shadda}
//#####################################################################
// To Do:
// finish adding shadda, add sokoon, fix uppercase
// make two transliteration tables: one with vowels, one without
//#####################################################################
// Modifications
// Devanagari Transliterator: broken up with consonsants/vowels
//#####################################################################
// Unicode character name definitions
//#####################################################################
//consonants
"candrabindu=\u0901;"
+ "bindu=\u0902;"
+ "visarga=\u0903;"
// w<vowel> represents the stand-alone form
+ "wa=\u0905;"
+ "waa=\u0906;"
+ "wi=\u0907;"
+ "wii=\u0908;"
+ "wu=\u0909;"
+ "wuu=\u090A;"
+ "wr=\u090B;"
+ "wl=\u090C;"
+ "we=\u090F;"
+ "wai=\u0910;"
+ "wo=\u0913;"
+ "wau=\u0914;"
+ "ka=\u0915;"
+ "kha=\u0916;"
+ "ga=\u0917;"
+ "gha=\u0918;"
+ "nga=\u0919;"
+ "ca=\u091A;"
+ "cha=\u091B;"
+ "ja=\u091C;"
+ "jha=\u091D;"
+ "nya=\u091E;"
+ "tta=\u091F;"
+ "ttha=\u0920;"
+ "dda=\u0921;"
+ "ddha=\u0922;"
+ "nna=\u0923;"
+ "ta=\u0924;"
+ "tha=\u0925;"
+ "da=\u0926;"
+ "dha=\u0927;"
+ "na=\u0928;"
+ "pa=\u092A;"
+ "pha=\u092B;"
+ "ba=\u092C;"
+ "bha=\u092D;"
+ "ma=\u092E;"
+ "ya=\u092F;"
+ "ra=\u0930;"
+ "rra=\u0931;"
+ "la=\u0933;"
+ "va=\u0935;"
+ "sha=\u0936;"
+ "ssa=\u0937;"
+ "sa=\u0938;"
+ "ha=\u0939;"
// <vowel> represents the dependent form
+ "aa=\u093E;"
+ "i=\u093F;"
+ "ii=\u0940;"
+ "u=\u0941;"
+ "uu=\u0942;"
+ "rh=\u0943;"
+ "lh=\u0944;"
+ "e=\u0947;"
+ "ai=\u0948;"
+ "o=\u094B;"
+ "au=\u094C;"
+ "virama=\u094D;"
+ "wrr=\u0960;"
+ "rrh=\u0962;"
+ "danda=\u0964;"
+ "doubleDanda=\u0965;"
+ "depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
+ "depVowelBelow=[\u0941-\u0944];"
+ "endThing=[{danda}{doubleDanda}\u0000-\u08FF\u0980-\uFFFF];"
+ "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
+ "%=[bcdfghjklmnpqrstvwxyz];"
//#####################################################################
// convert from Latin letters to Native letters
//#####################################################################
//Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
// special forms with no good conversion
+ "mm>{bindu};"
+ "x>{visarga};"
// convert to independent forms at start of word or syllable:
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
// Moved up [LIU]
+ "aa>{waa};"
+ "ai>{wai};"
+ "au>{wau};"
+ "ii>{wii};"
+ "i>{wi};"
+ "uu>{wuu};"
+ "u>{wu};"
+ "rrh>{wrr};"
+ "rh>{wr};"
+ "lh>{wl};"
+ "e>{we};"
+ "o>{wo};"
+ "a>{wa};"
// normal consonants
+ "kh>{kha}|{virama};"
+ "k>{ka}|{virama};"
+ "q>{ka}|{virama};"
+ "gh>{gha}|{virama};"
+ "g>{ga}|{virama};"
+ "ng>{nga}|{virama};"
+ "ch>{cha}|{virama};"
+ "c>{ca}|{virama};"
+ "jh>{jha}|{virama};"
+ "j>{ja}|{virama};"
+ "ny>{nya}|{virama};"
+ "tth>{ttha}|{virama};"
+ "tt>{tta}|{virama};"
+ "ddh>{ddha}|{virama};"
+ "dd>{dda}|{virama};"
+ "nn>{nna}|{virama};"
+ "th>{tha}|{virama};"
+ "t>{ta}|{virama};"
+ "dh>{dha}|{virama};"
+ "d>{da}|{virama};"
+ "n>{na}|{virama};"
+ "ph>{pha}|{virama};"
+ "p>{pa}|{virama};"
+ "bh>{bha}|{virama};"
+ "b>{ba}|{virama};"
+ "m>{ma}|{virama};"
+ "y>{ya}|{virama};"
+ "r>{ra}|{virama};"
+ "l>{la}|{virama};"
+ "v>{va}|{virama};"
+ "f>{va}|{virama};"
+ "w>{va}|{virama};"
+ "sh>{sha}|{virama};"
+ "ss>{ssa}|{virama};"
+ "s>{sa}|{virama};"
+ "z>{sa}|{virama};"
+ "h>{ha}|{virama};"
+ ".>{danda};"
+ "{danda}.>{doubleDanda};"
+ "{depVowelAbove})~>{bindu};"
+ "{depVowelBelow})~>{candrabindu};"
// convert to dependent forms after consonant with no vowel:
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
+ "{virama}aa>{aa};"
+ "{virama}ai>{ai};"
+ "{virama}au>{au};"
+ "{virama}ii>{ii};"
+ "{virama}i>{i};"
+ "{virama}uu>{uu};"
+ "{virama}u>{u};"
+ "{virama}rrh>{rrh};"
+ "{virama}rh>{rh};"
+ "{virama}lh>{lh};"
+ "{virama}e>{e};"
+ "{virama}o>{o};"
+ "{virama}a>;"
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
+ "{virama}''aa>{waa};"
+ "{virama}''ai>{wai};"
+ "{virama}''au>{wau};"
+ "{virama}''ii>{wii};"
+ "{virama}''i>{wi};"
+ "{virama}''uu>{wuu};"
+ "{virama}''u>{wu};"
+ "{virama}''rrh>{wrr};"
+ "{virama}''rh>{wr};"
+ "{virama}''lh>{wl};"
+ "{virama}''e>{we};"
+ "{virama}''o>{wo};"
+ "{virama}''a>{wa};"
+ "{virama}({endThing}>;"
// convert any left-over apostrophes used for separation
+ "''>;"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
// special forms with no good conversion
+ "mm<{bindu};"
+ "x<{visarga};"
// normal consonants
+ "kh<{kha}(&;"
+ "kha<{kha};"
+ "k''<{ka}{virama}({ha};"
+ "k<{ka}(&;"
+ "ka<{ka};"
+ "gh<{gha}(&;"
+ "gha<{gha};"
+ "g''<{ga}{virama}({ha};"
+ "g<{ga}(&;"
+ "ga<{ga};"
+ "ng<{nga}(&;"
+ "nga<{nga};"
+ "ch<{cha}(&;"
+ "cha<{cha};"
+ "c''<{ca}{virama}({ha};"
+ "c<{ca}(&;"
+ "ca<{ca};"
+ "jh<{jha}(&;"
+ "jha<{jha};"
+ "j''<{ja}{virama}({ha};"
+ "j<{ja}(&;"
+ "ja<{ja};"
+ "ny<{nya}(&;"
+ "nya<{nya};"
+ "tth<{ttha}(&;"
+ "ttha<{ttha};"
+ "tt''<{tta}{virama}({ha};"
+ "tt<{tta}(&;"
+ "tta<{tta};"
+ "ddh<{ddha}(&;"
+ "ddha<{ddha};"
+ "dd''<{dda}(&{ha};"
+ "dd<{dda}(&;"
+ "dda<{dda};"
+ "dh<{dha}(&;"
+ "dha<{dha};"
+ "d''<{da}{virama}({ha};"
+ "d''<{da}{virama}({ddha};"
+ "d''<{da}{virama}({dda};"
+ "d''<{da}{virama}({dha};"
+ "d''<{da}{virama}({da};"
+ "d<{da}(&;"
+ "da<{da};"
+ "th<{tha}(&;"
+ "tha<{tha};"
+ "t''<{ta}{virama}({ha};"
+ "t''<{ta}{virama}({ttha};"
+ "t''<{ta}{virama}({tta};"
+ "t''<{ta}{virama}({tha};"
+ "t''<{ta}{virama}({ta};"
+ "t<{ta}(&;"
+ "ta<{ta};"
+ "n''<{na}{virama}({ga};"
+ "n''<{na}{virama}({ya};"
+ "n<{na}(&;"
+ "na<{na};"
+ "ph<{pha}(&;"
+ "pha<{pha};"
+ "p''<{pa}{virama}({ha};"
+ "p<{pa}(&;"
+ "pa<{pa};"
+ "bh<{bha}(&;"
+ "bha<{bha};"
+ "b''<{ba}{virama}({ha};"
+ "b<{ba}(&;"
+ "ba<{ba};"
+ "m''<{ma}{virama}({ma};"
+ "m''<{ma}{virama}({bindu};"
+ "m<{ma}(&;"
+ "ma<{ma};"
+ "y<{ya}(&;"
+ "ya<{ya};"
+ "r''<{ra}{virama}({ha};"
+ "r<{ra}(&;"
+ "ra<{ra};"
+ "l''<{la}{virama}({ha};"
+ "l<{la}(&;"
+ "la<{la};"
+ "v<{va}(&;"
+ "va<{va};"
+ "sh<{sha}(&;"
+ "sha<{sha};"
+ "ss<{ssa}(&;"
+ "ssa<{ssa};"
+ "s''<{sa}{virama}({ha};"
+ "s''<{sa}{virama}({sha};"
+ "s''<{sa}{virama}({ssa};"
+ "s''<{sa}{virama}({sa};"
+ "s<{sa}(&;"
+ "sa<{sa};"
+ "h<{ha}(&;"
+ "ha<{ha};"
// dependent vowels (should never occur except following consonants)
+ "aa<{aa};"
+ "ai<{ai};"
+ "au<{au};"
+ "ii<{ii};"
+ "i<{i};"
+ "uu<{uu};"
+ "u<{u};"
+ "rrh<{rrh};"
+ "rh<{rh};"
+ "lh<{lh};"
+ "e<{e};"
+ "o<{o};"
// independent vowels (when following consonants)
+ "''aa<a){waa};"
+ "''aa<%){waa};"
+ "''ai<a){wai};"
+ "''ai<%){wai};"
+ "''au<a){wau};"
+ "''au<%){wau};"
+ "''ii<a){wii};"
+ "''ii<%){wii};"
+ "''i<a){wi};"
+ "''i<%){wi};"
+ "''uu<a){wuu};"
+ "''uu<%){wuu};"
+ "''u<a){wu};"
+ "''u<%){wu};"
+ "''rrh<%){wrr};"
+ "''rh<%){wr};"
+ "''lh<%){wl};"
+ "''e<%){we};"
+ "''o<%){wo};"
+ "''a<a){wa};"
+ "''a<%){wa};"
// independent vowels (otherwise)
+ "aa<{waa};"
+ "ai<{wai};"
+ "au<{wau};"
+ "ii<{wii};"
+ "i<{wi};"
+ "uu<{wuu};"
+ "u<{wu};"
+ "rrh<{wrr};"
+ "rh<{wr};"
+ "lh<{wl};"
+ "e<{we};"
+ "o<{wo};"
+ "a<{wa};"
// blow away any remaining viramas
+ "<{virama};"
}
};
}
}

View File

@ -1,377 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Greek extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule", ""
// ==============================================
// Modern Greek Transliteration Rules
//
// This transliterates modern Greek characters, but using rules
// that are traditional for Ancient Greek, and
// thus more resemble Greek words that have become part
// of English. It differs from the official Greek
// transliteration, which is more phonetic (since
// most modern Greek vowels, for example, have
// degenerated simply to sound like "ee").
//
// There are only a few tricky parts.
// 1. eta and omega don't map directly to Latin vowels,
// so we use a macron on e and o, and some
// other combinations if they are accented.
// 2. The accented, diaeresis i and y are substituted too.
// 3. Some letters use digraphs, like "ph". While typical,
// they need some special handling.
// 4. A gamma before a gamma or a few other letters is
// transliterated as an "n", as in "Anglo"
// 5. An ypsilon after a vowel is a "u", as in
// "Mouseio". Otherwise it is a "y" as in "Physikon"
// 6. The construction of the rules is made simpler by making sure
// that most rules for lowercase letters exactly correspond to the
// rules for uppercase letters, *except* for the case of the letters
// in the rule itself. That way, after modifying the uppercase rules,
// you can just copy, paste, and "set to lowercase" to get
// the rules for lowercase letters!
// ==============================================
// ==============================================
// Variables, used to make the rules more comprehensible
// and for conditionals.
// ==============================================
// Latin Letters
+ "E-MACRON=\u0112;"
+ "e-macron=\u0113;"
+ "O-MACRON=\u014C;"
+ "o-macron=\u014D;"
+ "Y-UMLAUT=\u0178;"
+ "y-umlaut=\u00FF;"
//! // with real accents.
//! + "E-MACRON-ACUTE=\u0112\u0301;"
//! + "e-macron-acute=\u0113\u0301;"
//! + "O-MACRON-ACUTE=\u014C\u0301;"
//! + "o-macron-acute=\u014D\u0301;"
//! + "y-umlaut-acute=\u00FF\u0301;"
//! + "\u00ef-acute=\u00ef\u0301;"
//! + "\u00fc-acute=\u00fc\u0301;"
//! //
// single letter equivalents
+ "E-MACRON-ACUTE=\u00CA;"
+ "e-macron-acute=\u00EA;"
+ "O-MACRON-ACUTE=\u00D4;"
+ "o-macron-acute=\u00F4;"
+ "y-umlaut-acute=\u0177;"
+ "\u00ef-acute=\u00EE;"
+ "\u00fc-acute=\u00FB;"
// Greek Letters
+ "ALPHA=\u0391;"
+ "BETA=\u0392;"
+ "GAMMA=\u0393;"
+ "DELTA=\u0394;"
+ "EPSILON=\u0395;"
+ "ZETA=\u0396;"
+ "ETA=\u0397;"
+ "THETA=\u0398;"
+ "IOTA=\u0399;"
+ "KAPPA=\u039A;"
+ "LAMBDA=\u039B;"
+ "MU=\u039C;"
+ "NU=\u039D;"
+ "XI=\u039E;"
+ "OMICRON=\u039F;"
+ "PI=\u03A0;"
+ "RHO=\u03A1;"
+ "SIGMA=\u03A3;"
+ "TAU=\u03A4;"
+ "YPSILON=\u03A5;"
+ "PHI=\u03A6;"
+ "CHI=\u03A7;"
+ "PSI=\u03A8;"
+ "OMEGA=\u03A9;"
+ "ALPHA+=\u0386;"
+ "EPSILON+=\u0388;"
+ "ETA+=\u0389;"
+ "IOTA+=\u038A;"
+ "OMICRON+=\u038C;"
+ "YPSILON+=\u038E;"
+ "OMEGA+=\u038F;"
+ "IOTA_DIAERESIS=\u03AA;"
+ "YPSILON_DIAERESIS=\u03AB;"
+ "alpha=\u03B1;"
+ "beta=\u03B2;"
+ "gamma=\u03B3;"
+ "delta=\u03B4;"
+ "epsilon=\u03B5;"
+ "zeta=\u03B6;"
+ "eta=\u03B7;"
+ "theta=\u03B8;"
+ "iota=\u03B9;"
+ "kappa=\u03BA;"
+ "lambda=\u03BB;"
+ "mu=\u03BC;"
+ "nu=\u03BD;"
+ "xi=\u03BE;"
+ "omicron=\u03BF;"
+ "pi=\u03C0;"
+ "rho=\u03C1;"
+ "sigma=\u03C3;"
+ "tau=\u03C4;"
+ "ypsilon=\u03C5;"
+ "phi=\u03C6;"
+ "chi=\u03C7;"
+ "psi=\u03C8;"
+ "omega=\u03C9;"
//forms
+ "alpha+=\u03AC;"
+ "epsilon+=\u03AD;"
+ "eta+=\u03AE;"
+ "iota+=\u03AF;"
+ "omicron+=\u03CC;"
+ "ypsilon+=\u03CD;"
+ "omega+=\u03CE;"
+ "iota_diaeresis=\u03CA;"
+ "ypsilon_diaeresis=\u03CB;"
+ "iota_diaeresis+=\u0390;"
+ "ypsilon_diaeresis+=\u03B0;"
+ "sigma+=\u03C2;"
// Variables for conditional mappings
// Use lowercase for all variable names, to allow cut/paste below.
+ "letter=[~[:Lu:][:Ll:]];"
+ "lower=[[:Ll:]];"
+ "softener=[eiyEIY];"
+ "vowel=[aeiouAEIOU"
+ "{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
+ "{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
+ "{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
+ "{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
+ "{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
+ "{iota_diaeresis}{ypsilon_diaeresis}"
+ "{iota_diaeresis+}{ypsilon_diaeresis+}"
+ "];"
+ "n-gamma=[GKXCgkxc];"
+ "gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
+ "pp=[Pp];"
// ==============================================
// Rules
// ==============================================
// The following are special titlecases, and should
// not be copied when duplicating the lowercase
// ==============================================
+ "Th <> {THETA}({lower};"
+ "Ph <> {PHI}({lower};"
+ "Ch <> {CHI}({lower};"
//masked: + "Ps<{PHI}({lower};"
// Because there is no uppercase forms for final sigma,
// we had to move all the sigma rules up here.
// Remember to insert ' to preserve round trip, for double letters
// don't need to do this for the digraphs with h,
// since it is not created when mapping back from greek
// use special form for s
+ "''S <> ({pp}) {SIGMA} ;" // handle PS
+ "S <> {SIGMA};"
// The following are a bit tricky. 's' takes two forms in greek
// final or non final.
// We use ~s to represent the abnormal form: final before letter
// or non-final before non-letter.
// We use 's to separate p and s (otherwise ps is one letter)
// so, we break out the following forms:
+ "''s < ({pp}) {sigma} ({letter});"
+ "s < {sigma} ({letter});"
+ "~s < {sigma} ;"
+ "~s < {sigma+} ({letter});"
+ "''s < ({pp}) {sigma+} ;"
+ "s < {sigma+} ;"
+ "~s ({letter}) > {sigma+};"
+ "~s > {sigma};"
+ "''s ({letter}) > {sigma};"
+ "''s > {sigma+};"
+ "s ({letter}) > {sigma};"
+ "s > {sigma+};"
// because there are no uppercase forms, had to move these up too.
+ "i\"`>{iota_diaeresis+};"
+ "y\"`>{ypsilon_diaeresis+};"
+ "{\u00ef-acute} <> {iota_diaeresis+};"
+ "{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
+ "{y-umlaut-acute} <> {ypsilon_diaeresis+};"
// ==============================================
// Uppercase Forms.
// To make lowercase forms, just copy and lowercase below
// ==============================================
// Typing variants, in case the keyboard doesn't have accents
+ "A`>{ALPHA+};"
+ "E`>{EPSILON+};"
+ "EE`>{ETA+};"
+ "EE>{ETA};"
+ "I`>{IOTA+};"
+ "O`>{OMICRON+};"
+ "OO`>{OMEGA+};"
+ "OO>{OMEGA};"
+ "I\">{IOTA_DIAERESIS};"
+ "Y\">{YPSILON_DIAERESIS};"
// Basic Letters
+ "A<>{ALPHA};"
+ "\u00c1<>{ALPHA+};"
+ "B<>{BETA};"
+ "N ({n-gamma}) <> {GAMMA} ({gamma-n});"
+ "G<>{GAMMA};"
+ "D<>{DELTA};"
+ "''E <> ([Ee]){EPSILON};" // handle EE
+ "E<>{EPSILON};"
+ "\u00c9<>{EPSILON+};"
+ "Z<>{ZETA};"
+ "{E-MACRON-ACUTE}<>{ETA+};"
+ "{E-MACRON}<>{ETA};"
+ "TH<>{THETA};"
+ "I<>{IOTA};"
+ "\u00cd<>{IOTA+};"
+ "\u00cf<>{IOTA_DIAERESIS};"
+ "K<>{KAPPA};"
+ "L<>{LAMBDA};"
+ "M<>{MU};"
+ "N'' <> {NU} ({gamma-n});"
+ "N<>{NU};"
+ "X<>{XI};"
+ "''O <> ([Oo]) {OMICRON};" // handle OO
+ "O<>{OMICRON};"
+ "\u00d3<>{OMICRON+};"
+ "PH<>{PHI};" // needs ordering before P
+ "PS<>{PSI};" // needs ordering before P
+ "P<>{PI};"
+ "R<>{RHO};"
+ "T<>{TAU};"
+ "U <> ({vowel}) {YPSILON};"
+ "\u00da <> ({vowel}) {YPSILON+};"
+ "\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
+ "Y<>{YPSILON};"
+ "\u00dd<>{YPSILON+};"
+ "{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
+ "CH<>{CHI};"
+ "{O-MACRON-ACUTE}<>{OMEGA+};"
+ "{O-MACRON}<>{OMEGA};"
// Extra English Letters. Mapped for completeness
+ "C({softener})>|S;"
+ "C>|K;"
+ "F>|PH;"
+ "H>|CH;"
+ "J>|I;"
+ "Q>|K;"
+ "V>|U;"
+ "W>|U;"
// ==============================================
// Lowercase Forms. Just copy above and lowercase
// ==============================================
// typing variants, in case the keyboard doesn't have accents
+ "a`>{alpha+};"
+ "e`>{epsilon+};"
+ "ee`>{eta+};"
+ "ee>{eta};"
+ "i`>{iota+};"
+ "o`>{omicron+};"
+ "oo`>{omega+};"
+ "oo>{omega};"
+ "i\">{iota_diaeresis};"
+ "y\">{ypsilon_diaeresis};"
// basic letters
+ "a<>{alpha};"
+ "\u00e1<>{alpha+};"
+ "b<>{beta};"
+ "n ({n-gamma}) <> {gamma} ({gamma-n});"
+ "g<>{gamma};"
+ "d<>{delta};"
+ "''e <> ([Ee]){epsilon};" // handle EE
+ "e<>{epsilon};"
+ "\u00e9<>{epsilon+};"
+ "z<>{zeta};"
+ "{e-macron-acute}<>{eta+};"
+ "{e-macron}<>{eta};"
+ "th<>{theta};"
+ "i<>{iota};"
+ "\u00ed<>{iota+};"
+ "\u00ef<>{iota_diaeresis};"
+ "k<>{kappa};"
+ "l<>{lambda};"
+ "m<>{mu};"
+ "n'' <> {nu} ({gamma-n});"
+ "n<>{nu};"
+ "x<>{xi};"
+ "''o <> ([Oo]) {omicron};" // handle OO
+ "o<>{omicron};"
+ "\u00f3<>{omicron+};"
+ "ph<>{phi};" // needs ordering before p
+ "ps<>{psi};" // needs ordering before p
+ "p<>{pi};"
+ "r<>{rho};"
+ "t<>{tau};"
+ "u <> ({vowel}){ypsilon};"
+ "\u00fa <> ({vowel}){ypsilon+};"
+ "\u00fc <> ({vowel}){ypsilon_diaeresis};"
+ "y<>{ypsilon};"
+ "\u00fd<>{ypsilon+};"
+ "{y-umlaut}<>{ypsilon_diaeresis};"
+ "ch<>{chi};"
+ "{o-macron-acute}<>{omega+};"
+ "{o-macron}<>{omega};"
// extra english letters. mapped for completeness
+ "c({softener})>|s;"
+ "c>|k;"
+ "f>|ph;"
+ "h>|ch;"
+ "j>|i;"
+ "q>|k;"
+ "v>|u;"
+ "w>|u;"
// ====================================
// Normal final rule: remove '
// ====================================
//+ "''>;"
}
};
}
}

View File

@ -1,279 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Hebrew extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule",
//variable names, derived from the Unicode names.
"POINT_SHEVA=\u05B0;"
+ "POINT_HATAF_SEGOL=\u05B1;"
+ "POINT_HATAF_PATAH=\u05B2;"
+ "POINT_HATAF_QAMATS=\u05B3;"
+ "POINT_HIRIQ=\u05B4;"
+ "POINT_TSERE=\u05B5;"
+ "POINT_SEGOL=\u05B6;"
+ "POINT_PATAH=\u05B7;"
+ "POINT_QAMATS=\u05B8;"
+ "POINT_HOLAM=\u05B9;"
+ "POINT_QUBUTS=\u05BB;"
+ "POINT_DAGESH_OR_MAPIQ=\u05BC;"
+ "POINT_METEG=\u05BD;"
+ "PUNCTUATION_MAQAF=\u05BE;"
+ "POINT_RAFE=\u05BF;"
+ "PUNCTUATION_PASEQ=\u05C0;"
+ "POINT_SHIN_DOT=\u05C1;"
+ "POINT_SIN_DOT=\u05C2;"
+ "PUNCTUATION_SOF_PASUQ=\u05C3;"
+ "ALEF=\u05D0;"
+ "BET=\u05D1;"
+ "GIMEL=\u05D2;"
+ "DALET=\u05D3;"
+ "HE=\u05D4;"
+ "VAV=\u05D5;"
+ "ZAYIN=\u05D6;"
+ "HET=\u05D7;"
+ "TET=\u05D8;"
+ "YOD=\u05D9;"
+ "FINAL_KAF=\u05DA;"
+ "KAF=\u05DB;"
+ "LAMED=\u05DC;"
+ "FINAL_MEM=\u05DD;"
+ "MEM=\u05DE;"
+ "FINAL_NUN=\u05DF;"
+ "NUN=\u05E0;"
+ "SAMEKH=\u05E1;"
+ "AYIN=\u05E2;"
+ "FINAL_PE=\u05E3;"
+ "PE=\u05E4;"
+ "FINAL_TSADI=\u05E5;"
+ "TSADI=\u05E6;"
+ "QOF=\u05E7;"
+ "RESH=\u05E8;"
+ "SHIN=\u05E9;"
+ "TAV=\u05EA;"
+ "YIDDISH_DOUBLE_VAV=\u05F0;"
+ "YIDDISH_VAV_YOD=\u05F1;"
+ "YIDDISH_DOUBLE_YOD=\u05F2;"
+ "PUNCTUATION_GERESH=\u05F3;"
+ "PUNCTUATION_GERSHAYIM=\u05F4;"
//wildcards
//The values can be anything we don't use in this file: start at E000.
+ "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
+ "softvowel=[eiyEIY];"
+ "vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
//?>{POINT_SHEVA}
//?>{POINT_HATAF_SEGOL}
//?>{POINT_HATAF_PATAH}
//?>{POINT_HATAF_QAMATS}
//?>{POINT_HIRIQ}
//?>{POINT_TSERE}
//?>{POINT_SEGOL}
//?>{POINT_PATAH}
//?>{POINT_QAMATS}
//?>{POINT_HOLAM}
//?>{POINT_QUBUTS}
//?>{POINT_DAGESH_OR_MAPIQ}
//?>{POINT_METEG}
//?>{PUNCTUATION_MAQAF}
//?>{POINT_RAFE}
//?>{PUNCTUATION_PASEQ}
//?>{POINT_SHIN_DOT}
//?>{POINT_SIN_DOT}
//?>{PUNCTUATION_SOF_PASUQ}
+ "a>{ALEF};"
+ "A>{ALEF};"
+ "b>{BET};"
+ "B>{BET};"
+ "c({softvowel}>{SAMEKH};"
+ "C({softvowel}>{SAMEKH};"
+ "c({letter}>{KAF};"
+ "C({letter}>{KAF};"
+ "c>{FINAL_KAF};"
+ "C>{FINAL_KAF};"
+ "d>{DALET};"
+ "D>{DALET};"
+ "e>{AYIN};"
+ "E>{AYIN};"
+ "f({letter}>{PE};"
+ "f>{FINAL_PE};"
+ "F({letter}>{PE};"
+ "F>{FINAL_PE};"
+ "g>{GIMEL};"
+ "G>{GIMEL};"
+ "h>{HE};"
+ "H>{HE};"
+ "i>{YOD};"
+ "I>{YOD};"
+ "j>{DALET}{SHIN};"
+ "J>{DALET}{SHIN};"
+ "kH>{HET};"
+ "kh>{HET};"
+ "KH>{HET};"
+ "Kh>{HET};"
+ "k({letter}>{KAF};"
+ "K({letter}>{KAF};"
+ "k>{FINAL_KAF};"
+ "K>{FINAL_KAF};"
+ "l>{LAMED};"
+ "L>{LAMED};"
+ "m({letter}>{MEM};"
+ "m>{FINAL_MEM};"
+ "M({letter}>{MEM};"
+ "M>{FINAL_MEM};"
+ "n({letter}>{NUN};"
+ "n>{FINAL_NUN};"
+ "N({letter}>{NUN};"
+ "N>{FINAL_NUN};"
+ "o>{VAV};"
+ "O>{VAV};"
+ "p({letter}>{PE};"
+ "p>{FINAL_PE};"
+ "P({letter}>{PE};"
+ "P>{FINAL_PE};"
+ "q>{QOF};"
+ "Q>{QOF};"
+ "r>{RESH};"
+ "R>{RESH};"
+ "sH>{SHIN};"
+ "sh>{SHIN};"
+ "SH>{SHIN};"
+ "Sh>{SHIN};"
+ "s>{SAMEKH};"
+ "S>{SAMEKH};"
+ "th>{TAV};"
+ "tH>{TAV};"
+ "TH>{TAV};"
+ "Th>{TAV};"
+ "tS({letter}>{TSADI};"
+ "ts({letter}>{TSADI};"
+ "Ts({letter}>{TSADI};"
+ "TS({letter}>{TSADI};"
+ "tS>{FINAL_TSADI};"
+ "ts>{FINAL_TSADI};"
+ "Ts>{FINAL_TSADI};"
+ "TS>{FINAL_TSADI};"
+ "t>{TET};"
+ "T>{TET};"
+ "u>{VAV};"
+ "U>{VAV};"
+ "v>{VAV};"
+ "V>{VAV};"
+ "w>{VAV};"
+ "W>{VAV};"
+ "x>{KAF}{SAMEKH};"
+ "X>{KAF}{SAMEKH};"
+ "y>{YOD};"
+ "Y>{YOD};"
+ "z>{ZAYIN};"
+ "Z>{ZAYIN};"
//#?>{YIDDISH_DOUBLE_VAV}
//?>{YIDDISH_VAV_YOD}
//?>{YIDDISH_DOUBLE_YOD}
//?>{PUNCTUATION_GERESH}
//?>{PUNCTUATION_GERSHAYIM}
+ "''>;"
//{POINT_SHEVA}>@
//{POINT_HATAF_SEGOL}>@
//{POINT_HATAF_PATAH}>@
//{POINT_HATAF_QAMATS}>@
//{POINT_HIRIQ}>@
//{POINT_TSERE}>@
//{POINT_SEGOL}>@
//{POINT_PATAH}>@
//{POINT_QAMATS}>@
//{POINT_HOLAM}>@
//{POINT_QUBUTS}>@
//{POINT_DAGESH_OR_MAPIQ}>@
//{POINT_METEG}>@
//{PUNCTUATION_MAQAF}>@
//{POINT_RAFE}>@
//{PUNCTUATION_PASEQ}>@
//{POINT_SHIN_DOT}>@
//{POINT_SIN_DOT}>@
//{PUNCTUATION_SOF_PASUQ}>@
+ "a<{ALEF};"
+ "e<{AYIN};"
+ "b<{BET};"
+ "d<{DALET};"
+ "k<{FINAL_KAF};"
+ "m<{FINAL_MEM};"
+ "n<{FINAL_NUN};"
+ "p<{FINAL_PE};"
+ "ts<{FINAL_TSADI};"
+ "g<{GIMEL};"
+ "kh<{HET};"
+ "h<{HE};"
+ "k''<{KAF}({HE};"
+ "k<{KAF};"
+ "l<{LAMED};"
+ "m<{MEM};"
+ "n<{NUN};"
+ "p<{PE};"
+ "q<{QOF};"
+ "r<{RESH};"
+ "s''<{SAMEKH}({HE};"
+ "s<{SAMEKH};"
+ "sh<{SHIN};"
+ "th<{TAV};"
+ "t''<{TET}({HE};"
+ "t''<{TET}({SAMEKH};"
+ "t''<{TET}({SHIN};"
+ "t<{TET};"
+ "ts<{TSADI};"
+ "v<{VAV}({vowellike};"
+ "u<{VAV};"
+ "y<{YOD};"
+ "z<{ZAYIN};"
//{YIDDISH_DOUBLE_VAV}>@
//{YIDDISH_VAV_YOD}>@
//{YIDDISH_DOUBLE_YOD}>@
//{PUNCTUATION_GERESH}>@
//{PUNCTUATION_GERSHAYIM}>@
+ "<'';"
}
};
}
}

View File

@ -1,325 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule", ""
// VARIABLES
+ "initial=[\u1100-\u115F];"
+ "medial=[\u1160-\u11A7];"
+ "final=[\u11A8-\u11F9];" // added - aliu
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
+ "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
+ "ye=[yeYE];"
+ "ywe=[yweYWE];"
+ "yw=[ywYW];"
+ "nl=[nlNL];"
+ "gnl=[gnlGNL];"
+ "lsgb=[lsgbLSGB];"
+ "ywao=[ywaoYWAO];"
+ "bl=[blBL];"
// RULES
// Hangul structure is IMF or IM
// So you can have, because of adjacent sequences
// IM, but not II or IF
// MF or MI, but not MM
// FI, but not FF or FM
// For English, we just have C or V.
// To generate valid Hangul:
// Vowels:
// We insert IEUNG between VV, and otherwise map V to M
// We also insert IEUNG if there is no
// Consonants:
// We don't break doubles
// Cases like lmgg, we have to break at lm
// So to guess whether a consonant is I or F
// we map all C's to F, except when followed by a vowel, e.g.
// X[{vowel}>CHOSEONG (initial)
// X>JONGSEONG (final)
// special insertion for funny sequences of vowels, and for empty consonant
+ "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
+ "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
// Fix casing.
// Because Korean is caseless, we just want to treat everything as
// lowercase.
// we could do this by always preceeding this transliterator with
// an upper-lowercase transformation, but that wouldn't invert nicely.
// We use the "revisit" syntax to just convert latin to latin
// so that we can avoid
// having to restate all the Latin=>Jamo rules, with the I/F handling.
// We don't have to add titlecase, since that will be picked up
// since the first letter is converted, then revisited. E.g.
// |Gg => |gg => {sang kiyeok}
// We do have to have all caps, since otherwise we could get:
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
+ "Z > |z;"
+ "YU > |yu;"
+ "YO > |yo;"
+ "YI > |yi;"
+ "YEO > |yeo;"
+ "YE > |ye;"
+ "YAE > |yae;"
+ "YA > |ya;"
+ "Y > |y;"
+ "WI > |wi;"
+ "WEO > |weo;"
+ "WE > |we;"
+ "WAE > |wae;"
+ "WA > |wa;"
+ "W > |w;"
+ "U > |u;"
+ "T > |t;"
+ "SS > |ss;"
+ "S > |s;"
+ "P > |p;"
+ "OE > |oe;"
+ "O > |o;"
+ "NJ > |nj;"
+ "NH > |nh;"
+ "NG > |ng;"
+ "N > |n;"
+ "M > |m;"
+ "LT > |lt;"
+ "LS > |ls;"
+ "LP > |lp;"
+ "LM > |lm;"
+ "LH > |lh;"
+ "LG > |lg;"
+ "LB > |lb;"
+ "L > |l;"
+ "K > |k;"
+ "JJ > |jj;"
+ "J > |j;"
+ "I > |i;"
+ "H > |h;"
+ "GS > |gs;"
+ "GG > |gg;"
+ "G > |g;"
+ "EU > |eu;"
+ "EO > |eo;"
+ "E > |e;"
+ "DD > |dd;"
+ "D > |d;"
+ "BS > |bs;"
+ "BB > |bb;"
+ "B > |b;"
+ "AE > |ae;"
+ "A > |a;"
// APOSTROPHE
// As always, an apostrophe is used to separate digraphs into
// singles. That is, if you really wanted [KAN][GGAN], instead
// of [KANG][GAN] you would write "kan'ggan".
// Rules for inserting ' when mapping separated digraphs back
// from Hangul to Latin. Catch every letter that can be the
// LAST of a digraph (or multigraph) AND first of an initial
+ "'' < (l) (\u11c0;" // hangul jongseong thieuth
+ "'' < ({lsgb}) (\u11ba;" // hangul jongseong sios
+ "'' < (l) (\u11c1;" // hangul jongseong phieuph
+ "'' < (l) (\u11b7;" // hangul jongseong mieum
+ "'' < (n) (\u11bd;" // hangul jongseong cieuc
+ "'' < ({nl}) (\u11c2;" // hangul jongseong hieuh
+ "'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok
+ "'' < ({bl}) (\u11b8;" // hangul jongseong pieup
+ "'' < (d) (\u11ae;" // hangul jongseong tikeut
+ "'' < ({ye}) (\u116e;" // hangul jungseong u
+ "'' < ({ywe}) (\u1169;" // hangul jungseong o
+ "'' < ({yw}) (\u1175;" // hangul jungseong i
+ "'' < ({ywao}) (\u1166;" // hangul jungseong e
+ "'' < ({yw}) (\u1161;" // hangul jungseong a
+ "'' < (l) (\u1110;" // hangul choseong thieuth
+ "'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios
+ "'' < ({lsgb}) (\u1109;" // hangul choseong sios
+ "'' < (l) (\u1111;" // hangul choseong phieuph
+ "'' < (l) (\u1106;" // hangul choseong mieum
+ "'' < (n) (\u110c;" // hangul choseong cieuc
+ "'' < (n) (\u110d;"
+ "'' < ({nl}) (\u1112;" // hangul choseong hieuh
+ "'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok
+ "'' < ({gnl}) (\u1100;" // hangul choseong kiyeok
+ "'' < (d) (\u1103;" // hangul choseong tikeut
+ "'' < (d) (\u1104;"
+ "'' < ({bl}) (\u1107;" // hangul choseong pieup
+ "'' < ({bl}) (\u1108;"
// INITIALS
+ "t ({vowel}) <> \u1110;" // hangul choseong thieuth
+ "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
+ "s ({vowel}) <> \u1109;" // hangul choseong sios
+ "p ({vowel}) <> \u1111;" // hangul choseong phieuph
+ "n ({vowel}) <> \u1102;" // hangul choseong nieun
+ "m ({vowel}) <> \u1106;" // hangul choseong mieum
+ "l ({vowel}) <> \u1105;" // hangul choseong rieul
+ "k ({vowel}) <> \u110f;" // hangul choseong khieukh
+ "j ({vowel}) <> \u110c;" // hangul choseong cieuc
+ "h ({vowel}) <> \u1112;" // hangul choseong hieuh
+ "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
+ "g ({vowel}) <> \u1100;" // hangul choseong kiyeok
+ "d ({vowel}) <> \u1103;" // hangul choseong tikeut
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
+ "bb ({vowel}) <> \u1108;"
+ "jj ({vowel}) <> \u110d;"
+ "dd ({vowel}) <> \u1104;"
// If we have gotten through to these rules, and we start with
// a consonant, then the remaining mappings would be to F,
// because must have CC (or C<non-letter>), not CV.
// If we have F before us, then
// we would end up with FF, which is wrong. The simplest fix is
// to still make it an initial, but also insert an "u",
// so we end up with F, I, u, and then continue with the C
// special, only initial
+ "bb > \u1108\u116e;" // hangul choseong ssangpieup
+ "jj > \u1108\u110d;" // hangul choseong ssangcieuc
+ "dd > \u1108\u1104;" // hangul choseong ssangtikeut
+ "({final}) t > \u1110\u116e;" // hangul choseong thieuth
+ "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
+ "({final}) s > \u1109\u116e;" // hangul choseong sios
+ "({final}) p > \u1111\u116e;" // hangul choseong phieuph
+ "({final}) n > \u1102\u116e;" // hangul choseong nieun
+ "({final}) m > \u1106\u116e;" // hangul choseong mieum
+ "({final}) l > \u1105\u116e;" // hangul choseong rieul
+ "({final}) k > \u110f\u116e;" // hangul choseong khieukh
+ "({final}) j > \u110c\u116e;" // hangul choseong cieuc
+ "({final}) h > \u1112\u116e;" // hangul choseong hieuh
+ "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
+ "({final}) g > \u1100\u116e;" // hangul choseong kiyeok
+ "({final}) d > \u1103\u116e;" // hangul choseong tikeut
+ "({final}) c > \u110e\u116e;" // hangul choseong chieuch
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
// MEDIALS after INITIALS
+ "({initial}) yu <> \u1172;" // hangul jungseong yu
+ "({initial}) yo <> \u116d;" // hangul jungseong yo
+ "({initial}) yi <> \u1174;" // hangul jungseong yi
+ "({initial}) yeo <> \u1167;" // hangul jungseong yeo
+ "({initial}) ye <> \u1168;" // hangul jungseong ye
+ "({initial}) yae <> \u1164;" // hangul jungseong yae
+ "({initial}) ya <> \u1163;" // hangul jungseong ya
+ "({initial}) wi <> \u1171;" // hangul jungseong wi
+ "({initial}) weo <> \u116f;" // hangul jungseong weo
+ "({initial}) we <> \u1170;" // hangul jungseong we
+ "({initial}) wae <> \u116b;" // hangul jungseong wae
+ "({initial}) wa <> \u116a;" // hangul jungseong wa
+ "({initial}) u <> \u116e;" // hangul jungseong u
+ "({initial}) oe <> \u116c;" // hangul jungseong oe
+ "({initial}) o <> \u1169;" // hangul jungseong o
+ "({initial}) i <> \u1175;" // hangul jungseong i
+ "({initial}) eu <> \u1173;" // hangul jungseong eu
+ "({initial}) eo <> \u1165;" // hangul jungseong eo
+ "({initial}) e <> \u1166;" // hangul jungseong e
+ "({initial}) ae <> \u1162;" // hangul jungseong ae
+ "({initial}) a <> \u1161;" // hangul jungseong a
// MEDIALS (vowels) not after INITIALs
+ "yu > \u110B\u1172;" // hangul jungseong yu
+ "yo > \u110B\u116d;" // hangul jungseong yo
+ "yi > \u110B\u1174;" // hangul jungseong yi
+ "yeo > \u110B\u1167;" // hangul jungseong yeo
+ "ye > \u110B\u1168;" // hangul jungseong ye
+ "yae > \u110B\u1164;" // hangul jungseong yae
+ "ya > \u110B\u1163;" // hangul jungseong ya
+ "wi > \u110B\u1171;" // hangul jungseong wi
+ "weo > \u110B\u116f;" // hangul jungseong weo
+ "we > \u110B\u1170;" // hangul jungseong we
+ "wae > \u110B\u116b;" // hangul jungseong wae
+ "wa > \u110B\u116a;" // hangul jungseong wa
+ "u > \u110B\u116e;" // hangul jungseong u
+ "oe > \u110B\u116c;" // hangul jungseong oe
+ "o > \u110B\u1169;" // hangul jungseong o
+ "i > \u110B\u1175;" // hangul jungseong i
+ "eu > \u110B\u1173;" // hangul jungseong eu
+ "eo > \u110B\u1165;" // hangul jungseong eo
+ "e > \u110B\u1166;" // hangul jungseong e
+ "ae > \u110B\u1162;" // hangul jungseong ae
+ "a > \u110B\u1161;" // hangul jungseong a
// FINALS
+ "t <> \u11c0;" // hangul jongseong thieuth
+ "ss <> \u11bb;" // hangul jongseong ssangsios
+ "s <> \u11ba;" // hangul jongseong sios
+ "p <> \u11c1;" // hangul jongseong phieuph
+ "nj <> \u11ac;" // hangul jongseong nieun-cieuc
+ "nh <> \u11ad;" // hangul jongseong nieun-hieuh
+ "ng <> \u11bc;" // hangul jongseong ieung
+ "n <> \u11ab;" // hangul jongseong nieun
+ "m <> \u11b7;" // hangul jongseong mieum
+ "lt <> \u11b4;" // hangul jongseong rieul-thieuth
+ "ls <> \u11b3;" // hangul jongseong rieul-sios
+ "lp <> \u11b5;" // hangul jongseong rieul-phieuph
+ "lm <> \u11b1;" // hangul jongseong rieul-mieum
+ "lh <> \u11b6;" // hangul jongseong rieul-hieuh
+ "lg <> \u11b0;" // hangul jongseong rieul-kiyeok
+ "lb <> \u11b2;" // hangul jongseong rieul-pieup
+ "l <> \u11af;" // hangul jongseong rieul
+ "k <> \u11bf;" // hangul jongseong khieukh
+ "j <> \u11bd;" // hangul jongseong cieuc
+ "h <> \u11c2;" // hangul jongseong hieuh
+ "gs <> \u11aa;" // hangul jongseong kiyeok-sios
+ "gg <> \u11a9;" // hangul jongseong ssangkiyeok
+ "g <> \u11a8;" // hangul jongseong kiyeok
+ "d <> \u11ae;" // hangul jongseong tikeut
+ "c <> \u11be;" // hangul jongseong chieuch
+ "bs <> \u11b9;" // hangul jongseong pieup-sios
+ "b <> \u11b8;" // hangul jongseong pieup
// extra English letters
// {moved to bottom - aliu}
+ "z > |s;"
//{ + "Z > |s;" } masked
+ "x > |ks;"
+ "X > |ks;"
+ "v > |b;"
+ "V > |b;"
+ "r > |l;"
+ "R > |l;"
+ "q > |k;"
+ "Q > |k;"
+ "f > |p;"
+ "F > |p;"
//{ + "c > |k;" } masked
+ "C > |k;"
+ "y > \u1172;" // hangul jungseong yu
+ "w > \u1171;" // hangul jungseong wi
// ====================================
// Normal final rule: remove '
// ====================================
+ "''>;"
}
};
}
}

View File

@ -1,84 +0,0 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$StraightQuotes$CurlyQuotes extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule",
// Rewritten using character codes [LIU]
"white=[[:Zs:][:Zl:][:Zp:]];"
+ "black=[^{white}];"
+ "open=[:Ps:];"
+ "dquote=\";"
+ "lAng=\u3008;"
+ "ldAng=\u300A;"
+ "lBrk='[';"
+ "lBrc='{';"
+ "lquote=\u2018;"
+ "rquote=\u2019;"
+ "ldquote=\u201C;"
+ "rdquote=\u201D;"
+ "ldguill=\u00AB;"
+ "rdguill=\u00BB;"
+ "lguill=\u2039;"
+ "rguill=\u203A;"
+ "mdash=\u2014;"
//#######################################
// Conversions from input
//#######################################
// join single quotes
+ "{lquote}''>{ldquote};"
+ "{lquote}{lquote}>{ldquote};"
+ "{rquote}''>{rdquote};"
+ "{rquote}{rquote}>{rdquote};"
//smart single quotes
+ "{white})''>{lquote};"
+ "{open})''>{lquote};"
+ "{black})''>{rquote};"
+ "''>{lquote};"
//smart doubles
+ "{white}){dquote}>{ldquote};"
+ "{open}){dquote}>{ldquote};"
+ "{black}){dquote}>{rdquote};"
+ "{dquote}>{ldquote};"
// join single guillemets
+ "{rguill}{rguill}>{rdguill};"
+ "'>>'>{rdguill};"
+ "{lguill}{lguill}>{ldguill};"
+ "'<<'>{ldguill};"
// prevent double spaces
+ "\\ )\\ >;"
// join hyphens into dash
+ "-->{mdash};"
//#######################################
// Conversions back to input
//#######################################
//smart quotes
+ "''<{lquote};"
+ "''<{rquote};"
+ "{dquote}<{ldquote};"
+ "{dquote}<{rdquote};"
//hyphens
+ "--<{mdash};"
}
};
}
}