Add Mark's Jamo rules, with compile errors fixed

X-SVN-Rev: 517
This commit is contained in:
Alan Liu 2000-01-11 02:23:17 +00:00
parent 4956a84699
commit 471a29ebeb

View File

@ -0,0 +1,277 @@
package com.ibm.text.resources;
import java.util.ListResourceBundle;
public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
/**
* Overrides ListResourceBundle
*/
public Object[][] getContents() {
return new Object[][] {
{ "Rule", ""
// VARIABLES
+ "medial=[\u1160-\u11A7];"
+ "final=[\u11A8-\u11F9];" // added - aliu
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
+ "ye=[yeYE];"
+ "ywe=[yweYWE];"
+ "yw=[ywYW];"
+ "nl=[nlNL];"
+ "gnl=[gnlGNL];"
+ "lsgb=[lsgbLSGB];"
+ "ywao=[ywaoYWAO];"
+ "bl=[blBL];"
// RULES
// Hangul structure is IMF or IM
// So you can have, because of adjacent sequences
// IM, but not II or IF
// MF or MI, but not MM
// FI, but not FF or FM
// For English, we just have C or V.
// To generate valid Hangul:
// Vowels:
// We insert IEUNG between VV, and otherwise map V to M
// We also insert IEUNG if there is no
// Consonants:
// We don't break doubles
// Cases like lmgg, we have to break at lm
// So to guess whether a consonant is I or F
// we map all C's to F, except when followed by a vowel, e.g.
// X[{vowel}>CHOSEONG (initial)
// X>JONGSEONG (final)
// special insertion for funny sequences of vowels
+ "({medial}) ({vowel}) > \u110B;" // HANGUL CHOSEONG IEUNG
// Fix casing.
// Because Korean is caseless, we just want to treat everything as
// lowercase.
// we could do this by always preceeding this transliterator with
// an upper-lowercase transformation, but that wouldn't invert nicely.
// We use the "revisit" syntax to just convert latin to latin
// so that we can avoid
// having to restate all the Latin=>Jamo rules, with the I/F handling.
// We don't have to add titlecase, since that will be picked up
// since the first letter is converted, then revisited. E.g.
// |Gg => |gg => {sang kiyeok}
// We do have to have all caps, since otherwise we could get:
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
+ "Z > |z;"
+ "YU > |yu;"
+ "YO > |yo;"
+ "YI > |yi;"
+ "YEO > |yeo;"
+ "YE > |ye;"
+ "YAE > |yae;"
+ "YA > |ya;"
+ "Y > |y;"
+ "WI > |wi;"
+ "WEO > |weo;"
+ "WE > |we;"
+ "WAE > |wae;"
+ "WA > |wa;"
+ "W > |w;"
+ "U > |u;"
+ "T > |t;"
+ "SS > |ss;"
+ "S > |s;"
+ "P > |p;"
+ "OE > |oe;"
+ "O > |o;"
+ "NJ > |nj;"
+ "NH > |nh;"
+ "NG > |ng;"
+ "N > |n;"
+ "M > |m;"
+ "LT > |lt;"
+ "LS > |ls;"
+ "LP > |lp;"
+ "LM > |lm;"
+ "LH > |lh;"
+ "LG > |lg;"
+ "LB > |lb;"
+ "L > |l;"
+ "K > |k;"
+ "JJ > |jj;"
+ "J > |j;"
+ "I > |i;"
+ "H > |h;"
+ "GS > |gs;"
+ "GG > |gg;"
+ "G > |g;"
+ "EU > |eu;"
+ "EO > |eo;"
+ "E > |e;"
+ "DD > |dd;"
+ "D > |d;"
+ "BS > |bs;"
+ "BB > |bb;"
+ "B > |b;"
+ "AE > |ae;"
+ "A > |a;"
// APOSTROPHE
// As always, an apostrophe is used to separate digraphs into
// singles. That is, if you really wanted [KAN][GGAN], instead
// of [KANG][GAN] you would write "kan'ggan".
// Rules for inserting ' when mapping separated digraphs back
// from Hangul to Latin. Catch every letter that can be the
// LAST of a digraph (or multigraph)
+ "''u < ({ye}) \u116e;" // hangul jungseong u
+ "''t < (l) \u11c0;" // hangul jongseong thieuth
+ "''t < (l) \u1110;" // hangul choseong thieuth
+ "''s < ({lsgb}) \u11ba;" // hangul jongseong sios
+ "''s < ({lsgb}) \u1109;" // hangul choseong sios
+ "''p < (l) \u11c1;" // hangul jongseong phieuph
+ "''p < (l) \u1111;" // hangul choseong phieuph
+ "''o < ({ywe}) \u1169;" // hangul jungseong o
+ "''m < (l) \u11b7;" // hangul jongseong mieum
+ "''m < (l) \u1106;" // hangul choseong mieum
+ "''j < (n) \u11bd;" // hangul jongseong cieuc
+ "''j < (n) \u110c;" // hangul choseong cieuc
+ "''i < ({yw}) \u1175;" // hangul jungseong i
+ "''h < ({nl}) \u11c2;" // hangul jongseong hieuh
+ "''h < ({nl}) \u1112;" // hangul choseong hieuh
+ "''g < ({gnl}) \u11a9;" // hangul jongseong ssangkiyeok
+ "''g < ({gnl}) \u1100;" // hangul choseong kiyeok
+ "''e < ({ywao}) \u1166;" // hangul jungseong e
+ "''d < (d) \u11ae;" // hangul jongseong tikeut
+ "''d < (d) \u1103;" // hangul choseong tikeut
+ "''b < ({bl}) \u11b8;" // hangul jongseong pieup
+ "''b < ({bl}) \u1107;" // hangul choseong pieup
+ "''a < ({yw}) \u1161;" // hangul jungseong a
// INITIALS
+ "t ({vowel}) <> \u1110;" // hangul choseong thieuth
+ "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
+ "s ({vowel}) <> \u1109;" // hangul choseong sios
+ "p ({vowel}) <> \u1111;" // hangul choseong phieuph
+ "n ({vowel}) <> \u1102;" // hangul choseong nieun
+ "m ({vowel}) <> \u1106;" // hangul choseong mieum
+ "l ({vowel}) <> \u1105;" // hangul choseong rieul
+ "k ({vowel}) <> \u110f;" // hangul choseong khieukh
+ "j ({vowel}) <> \u110c;" // hangul choseong cieuc
+ "h ({vowel}) <> \u1112;" // hangul choseong hieuh
+ "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
+ "g ({vowel}) <> \u1100;" // hangul choseong kiyeok
+ "d ({vowel}) <> \u1103;" // hangul choseong tikeut
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
+ "bb ({vowel}) <> \u1108;" // hangul choseong ssangpieup
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
// If we have gotten through to these rules, and we start with
// a consonant, then the remaining mappings would be to F,
// because must have CC (or C<non-letter>), not CV.
// If we have F before us, then
// we would end up with FF, which is wrong. The simplest fix is
// to still make it an initial, but also insert an "u",
// so we end up with F, I, u, and then continue with the C
+ "({final}) t > \u1110\u116e;" // hangul choseong thieuth
+ "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
+ "({final}) s > \u1109\u116e;" // hangul choseong sios
+ "({final}) p > \u1111\u116e;" // hangul choseong phieuph
+ "({final}) n > \u1102\u116e;" // hangul choseong nieun
+ "({final}) m > \u1106\u116e;" // hangul choseong mieum
+ "({final}) l > \u1105\u116e;" // hangul choseong rieul
+ "({final}) k > \u110f\u116e;" // hangul choseong khieukh
+ "({final}) j > \u110c\u116e;" // hangul choseong cieuc
+ "({final}) h > \u1112\u116e;" // hangul choseong hieuh
+ "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
+ "({final}) g > \u1100\u116e;" // hangul choseong kiyeok
+ "({final}) d > \u1103\u116e;" // hangul choseong tikeut
+ "({final}) c > \u110e\u116e;" // hangul choseong chieuch
+ "({final}) bb > \u1108\u116e;" // hangul choseong ssangpieup
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
// MEDIALS (vowels) and FINALS
+ "yu <> \u1172;" // hangul jungseong yu
+ "yo <> \u116d;" // hangul jungseong yo
+ "yi <> \u1174;" // hangul jungseong yi
+ "yeo <> \u1167;" // hangul jungseong yeo
+ "ye <> \u1168;" // hangul jungseong ye
+ "yae <> \u1164;" // hangul jungseong yae
+ "ya <> \u1163;" // hangul jungseong ya
+ "wi <> \u1171;" // hangul jungseong wi
+ "weo <> \u116f;" // hangul jungseong weo
+ "we <> \u1170;" // hangul jungseong we
+ "wae <> \u116b;" // hangul jungseong wae
+ "wa <> \u116a;" // hangul jungseong wa
+ "u <> \u116e;" // hangul jungseong u
+ "t <> \u11c0;" // hangul jongseong thieuth
+ "ss <> \u11bb;" // hangul jongseong ssangsios
+ "s <> \u11ba;" // hangul jongseong sios
+ "p <> \u11c1;" // hangul jongseong phieuph
+ "oe <> \u116c;" // hangul jungseong oe
+ "o <> \u1169;" // hangul jungseong o
+ "nj <> \u11ac;" // hangul jongseong nieun-cieuc
+ "nh <> \u11ad;" // hangul jongseong nieun-hieuh
+ "ng <> \u11bc;" // hangul jongseong ieung
+ "n <> \u11ab;" // hangul jongseong nieun
+ "m <> \u11b7;" // hangul jongseong mieum
+ "lt <> \u11b4;" // hangul jongseong rieul-thieuth
+ "ls <> \u11b3;" // hangul jongseong rieul-sios
+ "lp <> \u11b5;" // hangul jongseong rieul-phieuph
+ "lm <> \u11b1;" // hangul jongseong rieul-mieum
+ "lh <> \u11b6;" // hangul jongseong rieul-hieuh
+ "lg <> \u11b0;" // hangul jongseong rieul-kiyeok
+ "lb <> \u11b2;" // hangul jongseong rieul-pieup
+ "l <> \u11af;" // hangul jongseong rieul
+ "k <> \u11bf;" // hangul jongseong khieukh
+ "jj <> \u110d;" // hangul choseong ssangcieuc
+ "j <> \u11bd;" // hangul jongseong cieuc
+ "i <> \u1175;" // hangul jungseong i
+ "h <> \u11c2;" // hangul jongseong hieuh
+ "gs <> \u11aa;" // hangul jongseong kiyeok-sios
+ "gg <> \u11a9;" // hangul jongseong ssangkiyeok
+ "g <> \u11a8;" // hangul jongseong kiyeok
+ "eu <> \u1173;" // hangul jungseong eu
+ "eo <> \u1165;" // hangul jungseong eo
+ "e <> \u1166;" // hangul jungseong e
+ "dd <> \u1104;" // hangul choseong ssangtikeut
+ "d <> \u11ae;" // hangul jongseong tikeut
+ "c <> \u11be;" // hangul jongseong chieuch
+ "bs <> \u11b9;" // hangul jongseong pieup-sios
+ "b <> \u11b8;" // hangul jongseong pieup
+ "ae <> \u1162;" // hangul jungseong ae
+ "a <> \u1161;" // hangul jungseong a
// extra English letters
// {moved to bottom - aliu}
+ "z > |s;"
//{ + "Z > |s;" } masked
+ "x > |ks;"
+ "X > |ks;"
+ "v > |b;"
+ "V > |b;"
+ "r > |l;"
+ "R > |l;"
+ "q > |k;"
+ "Q > |k;"
+ "f > |p;"
+ "F > |p;"
//{ + "c > |k;" } masked
+ "C > |k;"
// ====================================
// Normal final rule: remove '
// ====================================
+ "''>;"
}
};
}
}