//-------------------------------------------------------------------- // Copyright (c) 2000, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // Date Name Description // 01/13/2000 aliu Creation. //-------------------------------------------------------------------- ljamo { Rule { // VARIABLES "initial=[\u1100-\u115F];" "medial=[\u1160-\u11A7];" "final=[\u11A8-\u11F9];" // added - aliu "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];" "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];" "ye=[yeYE];" "ywe=[yweYWE];" "yw=[ywYW];" "nl=[nlNL];" "gnl=[gnlGNL];" "lsgb=[lsgbLSGB];" "ywao=[ywaoYWAO];" "bl=[blBL];" // RULES // Hangul structure is IMF or IM // So you can have, because of adjacent sequences // IM, but not II or IF // MF or MI, but not MM // FI, but not FF or FM // For English, we just have C or V. // To generate valid Hangul: // Vowels: // We insert IEUNG between VV, and otherwise map V to M // We also insert IEUNG if there is no // Consonants: // We don't break doubles // Cases like lmgg, we have to break at lm // So to guess whether a consonant is I or F // we map all C's to F, except when followed by a vowel, e.g. // X[{vowel}>CHOSEONG (initial) // X>JONGSEONG (final) // special insertion for funny sequences of vowels, and for empty consonant "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant. "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG // Below, insert an empty consonant in front of a vowel, if there is no Initial in front. // Fix casing. // Because Korean is caseless, we just want to treat everything as // lowercase. // we could do this by always preceeding this transliterator with // an upper-lowercase transformation, but that wouldn't invert nicely. // We use the "revisit" syntax to just convert latin to latin // so that we can avoid // having to restate all the Latin=>Jamo rules, with the I/F handling. // We don't have to add titlecase, since that will be picked up // since the first letter is converted, then revisited. E.g. // |Gg => |gg => {sang kiyeok} // We do have to have all caps, since otherwise we could get: // |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok} "Z > |z;" "YU > |yu;" "YO > |yo;" "YI > |yi;" "YEO > |yeo;" "YE > |ye;" "YAE > |yae;" "YA > |ya;" "Y > |y;" "WI > |wi;" "WEO > |weo;" "WE > |we;" "WAE > |wae;" "WA > |wa;" "W > |w;" "U > |u;" "T > |t;" "SS > |ss;" "S > |s;" "P > |p;" "OE > |oe;" "O > |o;" "NJ > |nj;" "NH > |nh;" "NG > |ng;" "N > |n;" "M > |m;" "LT > |lt;" "LS > |ls;" "LP > |lp;" "LM > |lm;" "LH > |lh;" "LG > |lg;" "LB > |lb;" "L > |l;" "K > |k;" "JJ > |jj;" "J > |j;" "I > |i;" "H > |h;" "GS > |gs;" "GG > |gg;" "G > |g;" "EU > |eu;" "EO > |eo;" "E > |e;" "DD > |dd;" "D > |d;" "BS > |bs;" "BB > |bb;" "B > |b;" "AE > |ae;" "A > |a;" // APOSTROPHE // As always, an apostrophe is used to separate digraphs into // singles. That is, if you really wanted [KAN][GGAN], instead // of [KANG][GAN] you would write "kan'ggan". // Rules for inserting ' when mapping separated digraphs back // from Hangul to Latin. Catch every letter that can be the // LAST of a digraph (or multigraph) AND first of an initial "'' < (l) (\u11c0;" // hangul jongseong thieuth "'' < ({lsgb}) (\u11ba;" // hangul jongseong sios "'' < (l) (\u11c1;" // hangul jongseong phieuph "'' < (l) (\u11b7;" // hangul jongseong mieum "'' < (n) (\u11bd;" // hangul jongseong cieuc "'' < ({nl}) (\u11c2;" // hangul jongseong hieuh "'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok "'' < ({bl}) (\u11b8;" // hangul jongseong pieup "'' < (d) (\u11ae;" // hangul jongseong tikeut "'' < ({ye}) (\u116e;" // hangul jungseong u "'' < ({ywe}) (\u1169;" // hangul jungseong o "'' < ({yw}) (\u1175;" // hangul jungseong i "'' < ({ywao}) (\u1166;" // hangul jungseong e "'' < ({yw}) (\u1161;" // hangul jungseong a "'' < (l) (\u1110;" // hangul choseong thieuth "'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios "'' < ({lsgb}) (\u1109;" // hangul choseong sios "'' < (l) (\u1111;" // hangul choseong phieuph "'' < (l) (\u1106;" // hangul choseong mieum "'' < (n) (\u110c;" // hangul choseong cieuc "'' < (n) (\u110d;" "'' < ({nl}) (\u1112;" // hangul choseong hieuh "'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok "'' < ({gnl}) (\u1100;" // hangul choseong kiyeok "'' < (d) (\u1103;" // hangul choseong tikeut "'' < (d) (\u1104;" "'' < ({bl}) (\u1107;" // hangul choseong pieup "'' < ({bl}) (\u1108;" // INITIALS "t ({vowel}) <> \u1110;" // hangul choseong thieuth "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios "s ({vowel}) <> \u1109;" // hangul choseong sios "p ({vowel}) <> \u1111;" // hangul choseong phieuph "n ({vowel}) <> \u1102;" // hangul choseong nieun "m ({vowel}) <> \u1106;" // hangul choseong mieum "l ({vowel}) <> \u1105;" // hangul choseong rieul "k ({vowel}) <> \u110f;" // hangul choseong khieukh "j ({vowel}) <> \u110c;" // hangul choseong cieuc "h ({vowel}) <> \u1112;" // hangul choseong hieuh "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok "g ({vowel}) <> \u1100;" // hangul choseong kiyeok "d ({vowel}) <> \u1103;" // hangul choseong tikeut "c ({vowel}) <> \u110e;" // hangul choseong chieuch "b ({vowel}) <> \u1107;" // hangul choseong pieup "bb ({vowel}) <> \u1108;" "jj ({vowel}) <> \u110d;" "dd ({vowel}) <> \u1104;" // If we have gotten through to these rules, and we start with // a consonant, then the remaining mappings would be to F, // because must have CC (or C), not CV. // If we have F before us, then // we would end up with FF, which is wrong. The simplest fix is // to still make it an initial, but also insert an "u", // so we end up with F, I, u, and then continue with the C // special, only initial "bb > \u1108\u116e;" // hangul choseong ssangpieup "jj > \u1108\u110d;" // hangul choseong ssangcieuc "dd > \u1108\u1104;" // hangul choseong ssangtikeut "({final}) t > \u1110\u116e;" // hangul choseong thieuth "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios "({final}) s > \u1109\u116e;" // hangul choseong sios "({final}) p > \u1111\u116e;" // hangul choseong phieuph "({final}) n > \u1102\u116e;" // hangul choseong nieun "({final}) m > \u1106\u116e;" // hangul choseong mieum "({final}) l > \u1105\u116e;" // hangul choseong rieul "({final}) k > \u110f\u116e;" // hangul choseong khieukh "({final}) j > \u110c\u116e;" // hangul choseong cieuc "({final}) h > \u1112\u116e;" // hangul choseong hieuh "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok "({final}) g > \u1100\u116e;" // hangul choseong kiyeok "({final}) d > \u1103\u116e;" // hangul choseong tikeut "({final}) c > \u110e\u116e;" // hangul choseong chieuch "({final}) b > \u1107\u116e;" // hangul choseong pieup // MEDIALS after INITIALS "({initial}) yu <> \u1172;" // hangul jungseong yu "({initial}) yo <> \u116d;" // hangul jungseong yo "({initial}) yi <> \u1174;" // hangul jungseong yi "({initial}) yeo <> \u1167;" // hangul jungseong yeo "({initial}) ye <> \u1168;" // hangul jungseong ye "({initial}) yae <> \u1164;" // hangul jungseong yae "({initial}) ya <> \u1163;" // hangul jungseong ya "({initial}) wi <> \u1171;" // hangul jungseong wi "({initial}) weo <> \u116f;" // hangul jungseong weo "({initial}) we <> \u1170;" // hangul jungseong we "({initial}) wae <> \u116b;" // hangul jungseong wae "({initial}) wa <> \u116a;" // hangul jungseong wa "({initial}) u <> \u116e;" // hangul jungseong u "({initial}) oe <> \u116c;" // hangul jungseong oe "({initial}) o <> \u1169;" // hangul jungseong o "({initial}) i <> \u1175;" // hangul jungseong i "({initial}) eu <> \u1173;" // hangul jungseong eu "({initial}) eo <> \u1165;" // hangul jungseong eo "({initial}) e <> \u1166;" // hangul jungseong e "({initial}) ae <> \u1162;" // hangul jungseong ae "({initial}) a <> \u1161;" // hangul jungseong a // MEDIALS (vowels) not after INITIALs "yu > \u110B\u1172;" // hangul jungseong yu "yo > \u110B\u116d;" // hangul jungseong yo "yi > \u110B\u1174;" // hangul jungseong yi "yeo > \u110B\u1167;" // hangul jungseong yeo "ye > \u110B\u1168;" // hangul jungseong ye "yae > \u110B\u1164;" // hangul jungseong yae "ya > \u110B\u1163;" // hangul jungseong ya "wi > \u110B\u1171;" // hangul jungseong wi "weo > \u110B\u116f;" // hangul jungseong weo "we > \u110B\u1170;" // hangul jungseong we "wae > \u110B\u116b;" // hangul jungseong wae "wa > \u110B\u116a;" // hangul jungseong wa "u > \u110B\u116e;" // hangul jungseong u "oe > \u110B\u116c;" // hangul jungseong oe "o > \u110B\u1169;" // hangul jungseong o "i > \u110B\u1175;" // hangul jungseong i "eu > \u110B\u1173;" // hangul jungseong eu "eo > \u110B\u1165;" // hangul jungseong eo "e > \u110B\u1166;" // hangul jungseong e "ae > \u110B\u1162;" // hangul jungseong ae "a > \u110B\u1161;" // hangul jungseong a // FINALS "t <> \u11c0;" // hangul jongseong thieuth "ss <> \u11bb;" // hangul jongseong ssangsios "s <> \u11ba;" // hangul jongseong sios "p <> \u11c1;" // hangul jongseong phieuph "nj <> \u11ac;" // hangul jongseong nieun-cieuc "nh <> \u11ad;" // hangul jongseong nieun-hieuh "ng <> \u11bc;" // hangul jongseong ieung "n <> \u11ab;" // hangul jongseong nieun "m <> \u11b7;" // hangul jongseong mieum "lt <> \u11b4;" // hangul jongseong rieul-thieuth "ls <> \u11b3;" // hangul jongseong rieul-sios "lp <> \u11b5;" // hangul jongseong rieul-phieuph "lm <> \u11b1;" // hangul jongseong rieul-mieum "lh <> \u11b6;" // hangul jongseong rieul-hieuh "lg <> \u11b0;" // hangul jongseong rieul-kiyeok "lb <> \u11b2;" // hangul jongseong rieul-pieup "l <> \u11af;" // hangul jongseong rieul "k <> \u11bf;" // hangul jongseong khieukh "j <> \u11bd;" // hangul jongseong cieuc "h <> \u11c2;" // hangul jongseong hieuh "gs <> \u11aa;" // hangul jongseong kiyeok-sios "gg <> \u11a9;" // hangul jongseong ssangkiyeok "g <> \u11a8;" // hangul jongseong kiyeok "d <> \u11ae;" // hangul jongseong tikeut "c <> \u11be;" // hangul jongseong chieuch "bs <> \u11b9;" // hangul jongseong pieup-sios "b <> \u11b8;" // hangul jongseong pieup // extra English letters // {moved to bottom - aliu} "z > |s;" //{ + "Z > |s;" } masked "x > |ks;" "X > |ks;" "v > |b;" "V > |b;" "r > |l;" "R > |l;" "q > |k;" "Q > |k;" "f > |p;" "F > |p;" //{ + "c > |k;" } masked "C > |k;" "y > \u1172;" // hangul jungseong yu "w > \u1171;" // hangul jungseong wi // ==================================== // Normal final rule: remove ' // ==================================== "''>;" } }