Add Mark's Jamo rules, with compile errors fixed
X-SVN-Rev: 517
This commit is contained in:
parent
4956a84699
commit
471a29ebeb
277
icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Jamo.java
Executable file
277
icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Jamo.java
Executable file
@ -0,0 +1,277 @@
|
||||
package com.ibm.text.resources;
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
public class TransliterationRule$Latin$Jamo extends ListResourceBundle {
|
||||
/**
|
||||
* Overrides ListResourceBundle
|
||||
*/
|
||||
public Object[][] getContents() {
|
||||
return new Object[][] {
|
||||
{ "Rule", ""
|
||||
|
||||
// VARIABLES
|
||||
|
||||
+ "medial=[\u1160-\u11A7];"
|
||||
+ "final=[\u11A8-\u11F9];" // added - aliu
|
||||
+ "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
||||
+ "ye=[yeYE];"
|
||||
+ "ywe=[yweYWE];"
|
||||
+ "yw=[ywYW];"
|
||||
+ "nl=[nlNL];"
|
||||
+ "gnl=[gnlGNL];"
|
||||
+ "lsgb=[lsgbLSGB];"
|
||||
+ "ywao=[ywaoYWAO];"
|
||||
+ "bl=[blBL];"
|
||||
|
||||
// RULES
|
||||
|
||||
// Hangul structure is IMF or IM
|
||||
// So you can have, because of adjacent sequences
|
||||
// IM, but not II or IF
|
||||
// MF or MI, but not MM
|
||||
// FI, but not FF or FM
|
||||
|
||||
// For English, we just have C or V.
|
||||
// To generate valid Hangul:
|
||||
// Vowels:
|
||||
// We insert IEUNG between VV, and otherwise map V to M
|
||||
// We also insert IEUNG if there is no
|
||||
// Consonants:
|
||||
// We don't break doubles
|
||||
// Cases like lmgg, we have to break at lm
|
||||
// So to guess whether a consonant is I or F
|
||||
// we map all C's to F, except when followed by a vowel, e.g.
|
||||
// X[{vowel}>CHOSEONG (initial)
|
||||
// X>JONGSEONG (final)
|
||||
|
||||
// special insertion for funny sequences of vowels
|
||||
|
||||
+ "({medial}) ({vowel}) > \u110B;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
// Fix casing.
|
||||
// Because Korean is caseless, we just want to treat everything as
|
||||
// lowercase.
|
||||
// we could do this by always preceeding this transliterator with
|
||||
// an upper-lowercase transformation, but that wouldn't invert nicely.
|
||||
// We use the "revisit" syntax to just convert latin to latin
|
||||
// so that we can avoid
|
||||
// having to restate all the Latin=>Jamo rules, with the I/F handling.
|
||||
|
||||
// We don't have to add titlecase, since that will be picked up
|
||||
// since the first letter is converted, then revisited. E.g.
|
||||
// |Gg => |gg => {sang kiyeok}
|
||||
// We do have to have all caps, since otherwise we could get:
|
||||
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
|
||||
|
||||
+ "Z > |z;"
|
||||
+ "YU > |yu;"
|
||||
+ "YO > |yo;"
|
||||
+ "YI > |yi;"
|
||||
+ "YEO > |yeo;"
|
||||
+ "YE > |ye;"
|
||||
+ "YAE > |yae;"
|
||||
+ "YA > |ya;"
|
||||
+ "Y > |y;"
|
||||
+ "WI > |wi;"
|
||||
+ "WEO > |weo;"
|
||||
+ "WE > |we;"
|
||||
+ "WAE > |wae;"
|
||||
+ "WA > |wa;"
|
||||
+ "W > |w;"
|
||||
+ "U > |u;"
|
||||
+ "T > |t;"
|
||||
+ "SS > |ss;"
|
||||
+ "S > |s;"
|
||||
+ "P > |p;"
|
||||
+ "OE > |oe;"
|
||||
+ "O > |o;"
|
||||
+ "NJ > |nj;"
|
||||
+ "NH > |nh;"
|
||||
+ "NG > |ng;"
|
||||
+ "N > |n;"
|
||||
+ "M > |m;"
|
||||
+ "LT > |lt;"
|
||||
+ "LS > |ls;"
|
||||
+ "LP > |lp;"
|
||||
+ "LM > |lm;"
|
||||
+ "LH > |lh;"
|
||||
+ "LG > |lg;"
|
||||
+ "LB > |lb;"
|
||||
+ "L > |l;"
|
||||
+ "K > |k;"
|
||||
+ "JJ > |jj;"
|
||||
+ "J > |j;"
|
||||
+ "I > |i;"
|
||||
+ "H > |h;"
|
||||
+ "GS > |gs;"
|
||||
+ "GG > |gg;"
|
||||
+ "G > |g;"
|
||||
+ "EU > |eu;"
|
||||
+ "EO > |eo;"
|
||||
+ "E > |e;"
|
||||
+ "DD > |dd;"
|
||||
+ "D > |d;"
|
||||
+ "BS > |bs;"
|
||||
+ "BB > |bb;"
|
||||
+ "B > |b;"
|
||||
+ "AE > |ae;"
|
||||
+ "A > |a;"
|
||||
|
||||
// APOSTROPHE
|
||||
|
||||
// As always, an apostrophe is used to separate digraphs into
|
||||
// singles. That is, if you really wanted [KAN][GGAN], instead
|
||||
// of [KANG][GAN] you would write "kan'ggan".
|
||||
|
||||
// Rules for inserting ' when mapping separated digraphs back
|
||||
// from Hangul to Latin. Catch every letter that can be the
|
||||
// LAST of a digraph (or multigraph)
|
||||
|
||||
+ "''u < ({ye}) \u116e;" // hangul jungseong u
|
||||
+ "''t < (l) \u11c0;" // hangul jongseong thieuth
|
||||
+ "''t < (l) \u1110;" // hangul choseong thieuth
|
||||
+ "''s < ({lsgb}) \u11ba;" // hangul jongseong sios
|
||||
+ "''s < ({lsgb}) \u1109;" // hangul choseong sios
|
||||
+ "''p < (l) \u11c1;" // hangul jongseong phieuph
|
||||
+ "''p < (l) \u1111;" // hangul choseong phieuph
|
||||
+ "''o < ({ywe}) \u1169;" // hangul jungseong o
|
||||
+ "''m < (l) \u11b7;" // hangul jongseong mieum
|
||||
+ "''m < (l) \u1106;" // hangul choseong mieum
|
||||
+ "''j < (n) \u11bd;" // hangul jongseong cieuc
|
||||
+ "''j < (n) \u110c;" // hangul choseong cieuc
|
||||
+ "''i < ({yw}) \u1175;" // hangul jungseong i
|
||||
+ "''h < ({nl}) \u11c2;" // hangul jongseong hieuh
|
||||
+ "''h < ({nl}) \u1112;" // hangul choseong hieuh
|
||||
+ "''g < ({gnl}) \u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "''g < ({gnl}) \u1100;" // hangul choseong kiyeok
|
||||
+ "''e < ({ywao}) \u1166;" // hangul jungseong e
|
||||
+ "''d < (d) \u11ae;" // hangul jongseong tikeut
|
||||
+ "''d < (d) \u1103;" // hangul choseong tikeut
|
||||
+ "''b < ({bl}) \u11b8;" // hangul jongseong pieup
|
||||
+ "''b < ({bl}) \u1107;" // hangul choseong pieup
|
||||
+ "''a < ({yw}) \u1161;" // hangul jungseong a
|
||||
|
||||
// INITIALS
|
||||
|
||||
+ "t ({vowel}) <> \u1110;" // hangul choseong thieuth
|
||||
+ "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
|
||||
+ "s ({vowel}) <> \u1109;" // hangul choseong sios
|
||||
+ "p ({vowel}) <> \u1111;" // hangul choseong phieuph
|
||||
+ "n ({vowel}) <> \u1102;" // hangul choseong nieun
|
||||
+ "m ({vowel}) <> \u1106;" // hangul choseong mieum
|
||||
+ "l ({vowel}) <> \u1105;" // hangul choseong rieul
|
||||
+ "k ({vowel}) <> \u110f;" // hangul choseong khieukh
|
||||
+ "j ({vowel}) <> \u110c;" // hangul choseong cieuc
|
||||
+ "h ({vowel}) <> \u1112;" // hangul choseong hieuh
|
||||
+ "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
|
||||
+ "g ({vowel}) <> \u1100;" // hangul choseong kiyeok
|
||||
+ "d ({vowel}) <> \u1103;" // hangul choseong tikeut
|
||||
+ "c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
||||
+ "bb ({vowel}) <> \u1108;" // hangul choseong ssangpieup
|
||||
+ "b ({vowel}) <> \u1107;" // hangul choseong pieup
|
||||
|
||||
// If we have gotten through to these rules, and we start with
|
||||
// a consonant, then the remaining mappings would be to F,
|
||||
// because must have CC (or C<non-letter>), not CV.
|
||||
// If we have F before us, then
|
||||
// we would end up with FF, which is wrong. The simplest fix is
|
||||
// to still make it an initial, but also insert an "u",
|
||||
// so we end up with F, I, u, and then continue with the C
|
||||
|
||||
+ "({final}) t > \u1110\u116e;" // hangul choseong thieuth
|
||||
+ "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
|
||||
+ "({final}) s > \u1109\u116e;" // hangul choseong sios
|
||||
+ "({final}) p > \u1111\u116e;" // hangul choseong phieuph
|
||||
+ "({final}) n > \u1102\u116e;" // hangul choseong nieun
|
||||
+ "({final}) m > \u1106\u116e;" // hangul choseong mieum
|
||||
+ "({final}) l > \u1105\u116e;" // hangul choseong rieul
|
||||
+ "({final}) k > \u110f\u116e;" // hangul choseong khieukh
|
||||
+ "({final}) j > \u110c\u116e;" // hangul choseong cieuc
|
||||
+ "({final}) h > \u1112\u116e;" // hangul choseong hieuh
|
||||
+ "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
||||
+ "({final}) g > \u1100\u116e;" // hangul choseong kiyeok
|
||||
+ "({final}) d > \u1103\u116e;" // hangul choseong tikeut
|
||||
+ "({final}) c > \u110e\u116e;" // hangul choseong chieuch
|
||||
+ "({final}) bb > \u1108\u116e;" // hangul choseong ssangpieup
|
||||
+ "({final}) b > \u1107\u116e;" // hangul choseong pieup
|
||||
|
||||
// MEDIALS (vowels) and FINALS
|
||||
|
||||
+ "yu <> \u1172;" // hangul jungseong yu
|
||||
+ "yo <> \u116d;" // hangul jungseong yo
|
||||
+ "yi <> \u1174;" // hangul jungseong yi
|
||||
+ "yeo <> \u1167;" // hangul jungseong yeo
|
||||
+ "ye <> \u1168;" // hangul jungseong ye
|
||||
+ "yae <> \u1164;" // hangul jungseong yae
|
||||
+ "ya <> \u1163;" // hangul jungseong ya
|
||||
+ "wi <> \u1171;" // hangul jungseong wi
|
||||
+ "weo <> \u116f;" // hangul jungseong weo
|
||||
+ "we <> \u1170;" // hangul jungseong we
|
||||
+ "wae <> \u116b;" // hangul jungseong wae
|
||||
+ "wa <> \u116a;" // hangul jungseong wa
|
||||
+ "u <> \u116e;" // hangul jungseong u
|
||||
+ "t <> \u11c0;" // hangul jongseong thieuth
|
||||
+ "ss <> \u11bb;" // hangul jongseong ssangsios
|
||||
+ "s <> \u11ba;" // hangul jongseong sios
|
||||
+ "p <> \u11c1;" // hangul jongseong phieuph
|
||||
+ "oe <> \u116c;" // hangul jungseong oe
|
||||
+ "o <> \u1169;" // hangul jungseong o
|
||||
+ "nj <> \u11ac;" // hangul jongseong nieun-cieuc
|
||||
+ "nh <> \u11ad;" // hangul jongseong nieun-hieuh
|
||||
+ "ng <> \u11bc;" // hangul jongseong ieung
|
||||
+ "n <> \u11ab;" // hangul jongseong nieun
|
||||
+ "m <> \u11b7;" // hangul jongseong mieum
|
||||
+ "lt <> \u11b4;" // hangul jongseong rieul-thieuth
|
||||
+ "ls <> \u11b3;" // hangul jongseong rieul-sios
|
||||
+ "lp <> \u11b5;" // hangul jongseong rieul-phieuph
|
||||
+ "lm <> \u11b1;" // hangul jongseong rieul-mieum
|
||||
+ "lh <> \u11b6;" // hangul jongseong rieul-hieuh
|
||||
+ "lg <> \u11b0;" // hangul jongseong rieul-kiyeok
|
||||
+ "lb <> \u11b2;" // hangul jongseong rieul-pieup
|
||||
+ "l <> \u11af;" // hangul jongseong rieul
|
||||
+ "k <> \u11bf;" // hangul jongseong khieukh
|
||||
+ "jj <> \u110d;" // hangul choseong ssangcieuc
|
||||
+ "j <> \u11bd;" // hangul jongseong cieuc
|
||||
+ "i <> \u1175;" // hangul jungseong i
|
||||
+ "h <> \u11c2;" // hangul jongseong hieuh
|
||||
+ "gs <> \u11aa;" // hangul jongseong kiyeok-sios
|
||||
+ "gg <> \u11a9;" // hangul jongseong ssangkiyeok
|
||||
+ "g <> \u11a8;" // hangul jongseong kiyeok
|
||||
+ "eu <> \u1173;" // hangul jungseong eu
|
||||
+ "eo <> \u1165;" // hangul jungseong eo
|
||||
+ "e <> \u1166;" // hangul jungseong e
|
||||
+ "dd <> \u1104;" // hangul choseong ssangtikeut
|
||||
+ "d <> \u11ae;" // hangul jongseong tikeut
|
||||
+ "c <> \u11be;" // hangul jongseong chieuch
|
||||
+ "bs <> \u11b9;" // hangul jongseong pieup-sios
|
||||
+ "b <> \u11b8;" // hangul jongseong pieup
|
||||
+ "ae <> \u1162;" // hangul jungseong ae
|
||||
+ "a <> \u1161;" // hangul jungseong a
|
||||
|
||||
// extra English letters
|
||||
// {moved to bottom - aliu}
|
||||
|
||||
+ "z > |s;"
|
||||
//{ + "Z > |s;" } masked
|
||||
+ "x > |ks;"
|
||||
+ "X > |ks;"
|
||||
+ "v > |b;"
|
||||
+ "V > |b;"
|
||||
+ "r > |l;"
|
||||
+ "R > |l;"
|
||||
+ "q > |k;"
|
||||
+ "Q > |k;"
|
||||
+ "f > |p;"
|
||||
+ "F > |p;"
|
||||
//{ + "c > |k;" } masked
|
||||
+ "C > |k;"
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
+ "''>;"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user