7845ffb9be
X-SVN-Rev: 586
312 lines
11 KiB
Plaintext
312 lines
11 KiB
Plaintext
//--------------------------------------------------------------------
|
|
// Copyright (c) 2000, International Business Machines
|
|
// Corporation and others. All Rights Reserved.
|
|
//--------------------------------------------------------------------
|
|
// Date Name Description
|
|
// 01/13/2000 aliu Creation.
|
|
//--------------------------------------------------------------------
|
|
|
|
ljamo {
|
|
Rule {
|
|
// VARIABLES
|
|
|
|
"initial=[\u1100-\u115F];"
|
|
"medial=[\u1160-\u11A7];"
|
|
"final=[\u11A8-\u11F9];" // added - aliu
|
|
"vowel=[aeiouwyAEIOUWY\u1160-\u11A7];"
|
|
"consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];"
|
|
"ye=[yeYE];"
|
|
"ywe=[yweYWE];"
|
|
"yw=[ywYW];"
|
|
"nl=[nlNL];"
|
|
"gnl=[gnlGNL];"
|
|
"lsgb=[lsgbLSGB];"
|
|
"ywao=[ywaoYWAO];"
|
|
"bl=[blBL];"
|
|
|
|
// RULES
|
|
|
|
// Hangul structure is IMF or IM
|
|
// So you can have, because of adjacent sequences
|
|
// IM, but not II or IF
|
|
// MF or MI, but not MM
|
|
// FI, but not FF or FM
|
|
|
|
// For English, we just have C or V.
|
|
// To generate valid Hangul:
|
|
// Vowels:
|
|
// We insert IEUNG between VV, and otherwise map V to M
|
|
// We also insert IEUNG if there is no
|
|
// Consonants:
|
|
// We don't break doubles
|
|
// Cases like lmgg, we have to break at lm
|
|
// So to guess whether a consonant is I or F
|
|
// we map all C's to F, except when followed by a vowel, e.g.
|
|
// X[{vowel}>CHOSEONG (initial)
|
|
// X>JONGSEONG (final)
|
|
|
|
// special insertion for funny sequences of vowels, and for empty consonant
|
|
|
|
"'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant.
|
|
"({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG
|
|
|
|
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
|
|
|
// Fix casing.
|
|
// Because Korean is caseless, we just want to treat everything as
|
|
// lowercase.
|
|
// we could do this by always preceeding this transliterator with
|
|
// an upper-lowercase transformation, but that wouldn't invert nicely.
|
|
// We use the "revisit" syntax to just convert latin to latin
|
|
// so that we can avoid
|
|
// having to restate all the Latin=>Jamo rules, with the I/F handling.
|
|
|
|
// We don't have to add titlecase, since that will be picked up
|
|
// since the first letter is converted, then revisited. E.g.
|
|
// |Gg => |gg => {sang kiyeok}
|
|
// We do have to have all caps, since otherwise we could get:
|
|
// |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok}
|
|
|
|
"Z > |z;"
|
|
"YU > |yu;"
|
|
"YO > |yo;"
|
|
"YI > |yi;"
|
|
"YEO > |yeo;"
|
|
"YE > |ye;"
|
|
"YAE > |yae;"
|
|
"YA > |ya;"
|
|
"Y > |y;"
|
|
"WI > |wi;"
|
|
"WEO > |weo;"
|
|
"WE > |we;"
|
|
"WAE > |wae;"
|
|
"WA > |wa;"
|
|
"W > |w;"
|
|
"U > |u;"
|
|
"T > |t;"
|
|
"SS > |ss;"
|
|
"S > |s;"
|
|
"P > |p;"
|
|
"OE > |oe;"
|
|
"O > |o;"
|
|
"NJ > |nj;"
|
|
"NH > |nh;"
|
|
"NG > |ng;"
|
|
"N > |n;"
|
|
"M > |m;"
|
|
"LT > |lt;"
|
|
"LS > |ls;"
|
|
"LP > |lp;"
|
|
"LM > |lm;"
|
|
"LH > |lh;"
|
|
"LG > |lg;"
|
|
"LB > |lb;"
|
|
"L > |l;"
|
|
"K > |k;"
|
|
"JJ > |jj;"
|
|
"J > |j;"
|
|
"I > |i;"
|
|
"H > |h;"
|
|
"GS > |gs;"
|
|
"GG > |gg;"
|
|
"G > |g;"
|
|
"EU > |eu;"
|
|
"EO > |eo;"
|
|
"E > |e;"
|
|
"DD > |dd;"
|
|
"D > |d;"
|
|
"BS > |bs;"
|
|
"BB > |bb;"
|
|
"B > |b;"
|
|
"AE > |ae;"
|
|
"A > |a;"
|
|
|
|
// APOSTROPHE
|
|
|
|
// As always, an apostrophe is used to separate digraphs into
|
|
// singles. That is, if you really wanted [KAN][GGAN], instead
|
|
// of [KANG][GAN] you would write "kan'ggan".
|
|
|
|
// Rules for inserting ' when mapping separated digraphs back
|
|
// from Hangul to Latin. Catch every letter that can be the
|
|
// LAST of a digraph (or multigraph)
|
|
|
|
"''u < ({ye}) \u116e;" // hangul jungseong u
|
|
"''t < (l) \u11c0;" // hangul jongseong thieuth
|
|
"''t < (l) \u1110;" // hangul choseong thieuth
|
|
"''s < ({lsgb}) \u11ba;" // hangul jongseong sios
|
|
"''s < ({lsgb}) \u1109;" // hangul choseong sios
|
|
"''p < (l) \u11c1;" // hangul jongseong phieuph
|
|
"''p < (l) \u1111;" // hangul choseong phieuph
|
|
"''o < ({ywe}) \u1169;" // hangul jungseong o
|
|
"''m < (l) \u11b7;" // hangul jongseong mieum
|
|
"''m < (l) \u1106;" // hangul choseong mieum
|
|
"''j < (n) \u11bd;" // hangul jongseong cieuc
|
|
"''j < (n) \u110c;" // hangul choseong cieuc
|
|
"''i < ({yw}) \u1175;" // hangul jungseong i
|
|
"''h < ({nl}) \u11c2;" // hangul jongseong hieuh
|
|
"''h < ({nl}) \u1112;" // hangul choseong hieuh
|
|
"''g < ({gnl}) \u11a9;" // hangul jongseong ssangkiyeok
|
|
"''g < ({gnl}) \u1100;" // hangul choseong kiyeok
|
|
"''e < ({ywao}) \u1166;" // hangul jungseong e
|
|
"''d < (d) \u11ae;" // hangul jongseong tikeut
|
|
"''d < (d) \u1103;" // hangul choseong tikeut
|
|
"''b < ({bl}) \u11b8;" // hangul jongseong pieup
|
|
"''b < ({bl}) \u1107;" // hangul choseong pieup
|
|
"''a < ({yw}) \u1161;" // hangul jungseong a
|
|
|
|
// INITIALS
|
|
|
|
"t ({vowel}) <> \u1110;" // hangul choseong thieuth
|
|
"ss ({vowel}) <> \u110a;" // hangul choseong ssangsios
|
|
"s ({vowel}) <> \u1109;" // hangul choseong sios
|
|
"p ({vowel}) <> \u1111;" // hangul choseong phieuph
|
|
"n ({vowel}) <> \u1102;" // hangul choseong nieun
|
|
"m ({vowel}) <> \u1106;" // hangul choseong mieum
|
|
"l ({vowel}) <> \u1105;" // hangul choseong rieul
|
|
"k ({vowel}) <> \u110f;" // hangul choseong khieukh
|
|
"j ({vowel}) <> \u110c;" // hangul choseong cieuc
|
|
"h ({vowel}) <> \u1112;" // hangul choseong hieuh
|
|
"gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok
|
|
"g ({vowel}) <> \u1100;" // hangul choseong kiyeok
|
|
"d ({vowel}) <> \u1103;" // hangul choseong tikeut
|
|
"c ({vowel}) <> \u110e;" // hangul choseong chieuch
|
|
"bb ({vowel}) <> \u1108;" // hangul choseong ssangpieup
|
|
"b ({vowel}) <> \u1107;" // hangul choseong pieup
|
|
|
|
// If we have gotten through to these rules, and we start with
|
|
// a consonant, then the remaining mappings would be to F,
|
|
// because must have CC (or C<non-letter>), not CV.
|
|
// If we have F before us, then
|
|
// we would end up with FF, which is wrong. The simplest fix is
|
|
// to still make it an initial, but also insert an "u",
|
|
// so we end up with F, I, u, and then continue with the C
|
|
|
|
"({final}) t > \u1110\u116e;" // hangul choseong thieuth
|
|
"({final}) ss > \u110a\u116e;" // hangul choseong ssangsios
|
|
"({final}) s > \u1109\u116e;" // hangul choseong sios
|
|
"({final}) p > \u1111\u116e;" // hangul choseong phieuph
|
|
"({final}) n > \u1102\u116e;" // hangul choseong nieun
|
|
"({final}) m > \u1106\u116e;" // hangul choseong mieum
|
|
"({final}) l > \u1105\u116e;" // hangul choseong rieul
|
|
"({final}) k > \u110f\u116e;" // hangul choseong khieukh
|
|
"({final}) j > \u110c\u116e;" // hangul choseong cieuc
|
|
"({final}) h > \u1112\u116e;" // hangul choseong hieuh
|
|
"({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok
|
|
"({final}) g > \u1100\u116e;" // hangul choseong kiyeok
|
|
"({final}) d > \u1103\u116e;" // hangul choseong tikeut
|
|
"({final}) c > \u110e\u116e;" // hangul choseong chieuch
|
|
"({final}) bb > \u1108\u116e;" // hangul choseong ssangpieup
|
|
"({final}) b > \u1107\u116e;" // hangul choseong pieup
|
|
|
|
// MEDIALS after INITIALS
|
|
|
|
"({initial}) yu <> \u1172;" // hangul jungseong yu
|
|
"({initial}) yo <> \u116d;" // hangul jungseong yo
|
|
"({initial}) yi <> \u1174;" // hangul jungseong yi
|
|
"({initial}) yeo <> \u1167;" // hangul jungseong yeo
|
|
"({initial}) ye <> \u1168;" // hangul jungseong ye
|
|
"({initial}) yae <> \u1164;" // hangul jungseong yae
|
|
"({initial}) ya <> \u1163;" // hangul jungseong ya
|
|
"({initial}) wi <> \u1171;" // hangul jungseong wi
|
|
"({initial}) weo <> \u116f;" // hangul jungseong weo
|
|
"({initial}) we <> \u1170;" // hangul jungseong we
|
|
"({initial}) wae <> \u116b;" // hangul jungseong wae
|
|
"({initial}) wa <> \u116a;" // hangul jungseong wa
|
|
"({initial}) u <> \u116e;" // hangul jungseong u
|
|
"({initial}) oe <> \u116c;" // hangul jungseong oe
|
|
"({initial}) o <> \u1169;" // hangul jungseong o
|
|
"({initial}) i <> \u1175;" // hangul jungseong i
|
|
"({initial}) eu <> \u1173;" // hangul jungseong eu
|
|
"({initial}) eo <> \u1165;" // hangul jungseong eo
|
|
"({initial}) e <> \u1166;" // hangul jungseong e
|
|
"({initial}) ae <> \u1162;" // hangul jungseong ae
|
|
"({initial}) a <> \u1161;" // hangul jungseong a
|
|
|
|
// MEDIALS (vowels) not after INITIALs
|
|
|
|
"yu > \u110B\u1172;" // hangul jungseong yu
|
|
"yo > \u110B\u116d;" // hangul jungseong yo
|
|
"yi > \u110B\u1174;" // hangul jungseong yi
|
|
"yeo > \u110B\u1167;" // hangul jungseong yeo
|
|
"ye > \u110B\u1168;" // hangul jungseong ye
|
|
"yae > \u110B\u1164;" // hangul jungseong yae
|
|
"ya > \u110B\u1163;" // hangul jungseong ya
|
|
"wi > \u110B\u1171;" // hangul jungseong wi
|
|
"weo > \u110B\u116f;" // hangul jungseong weo
|
|
"we > \u110B\u1170;" // hangul jungseong we
|
|
"wae > \u110B\u116b;" // hangul jungseong wae
|
|
"wa > \u110B\u116a;" // hangul jungseong wa
|
|
"u > \u110B\u116e;" // hangul jungseong u
|
|
"oe > \u110B\u116c;" // hangul jungseong oe
|
|
"o > \u110B\u1169;" // hangul jungseong o
|
|
"i > \u110B\u1175;" // hangul jungseong i
|
|
"eu > \u110B\u1173;" // hangul jungseong eu
|
|
"eo > \u110B\u1165;" // hangul jungseong eo
|
|
"e > \u110B\u1166;" // hangul jungseong e
|
|
"ae > \u110B\u1162;" // hangul jungseong ae
|
|
"a > \u110B\u1161;" // hangul jungseong a
|
|
|
|
|
|
// FINALS
|
|
|
|
"t <> \u11c0;" // hangul jongseong thieuth
|
|
"ss <> \u11bb;" // hangul jongseong ssangsios
|
|
"s <> \u11ba;" // hangul jongseong sios
|
|
"p <> \u11c1;" // hangul jongseong phieuph
|
|
"nj <> \u11ac;" // hangul jongseong nieun-cieuc
|
|
"nh <> \u11ad;" // hangul jongseong nieun-hieuh
|
|
"ng <> \u11bc;" // hangul jongseong ieung
|
|
"n <> \u11ab;" // hangul jongseong nieun
|
|
"m <> \u11b7;" // hangul jongseong mieum
|
|
"lt <> \u11b4;" // hangul jongseong rieul-thieuth
|
|
"ls <> \u11b3;" // hangul jongseong rieul-sios
|
|
"lp <> \u11b5;" // hangul jongseong rieul-phieuph
|
|
"lm <> \u11b1;" // hangul jongseong rieul-mieum
|
|
"lh <> \u11b6;" // hangul jongseong rieul-hieuh
|
|
"lg <> \u11b0;" // hangul jongseong rieul-kiyeok
|
|
"lb <> \u11b2;" // hangul jongseong rieul-pieup
|
|
"l <> \u11af;" // hangul jongseong rieul
|
|
"k <> \u11bf;" // hangul jongseong khieukh
|
|
"jj <> \u110d;" // hangul choseong ssangcieuc
|
|
"j <> \u11bd;" // hangul jongseong cieuc
|
|
"h <> \u11c2;" // hangul jongseong hieuh
|
|
"gs <> \u11aa;" // hangul jongseong kiyeok-sios
|
|
"gg <> \u11a9;" // hangul jongseong ssangkiyeok
|
|
"g <> \u11a8;" // hangul jongseong kiyeok
|
|
"dd <> \u1104;" // hangul choseong ssangtikeut
|
|
"d <> \u11ae;" // hangul jongseong tikeut
|
|
"c <> \u11be;" // hangul jongseong chieuch
|
|
"bs <> \u11b9;" // hangul jongseong pieup-sios
|
|
"b <> \u11b8;" // hangul jongseong pieup
|
|
|
|
// extra English letters
|
|
// {moved to bottom - aliu}
|
|
|
|
"z > |s;"
|
|
//{ "Z > |s;" } masked
|
|
"x > |ks;"
|
|
"X > |ks;"
|
|
"v > |b;"
|
|
"V > |b;"
|
|
"r > |l;"
|
|
"R > |l;"
|
|
"q > |k;"
|
|
"Q > |k;"
|
|
"f > |p;"
|
|
"F > |p;"
|
|
//{ "c > |k;" } masked
|
|
"C > |k;"
|
|
|
|
"y > \u1172;" // hangul jungseong yu
|
|
"w > \u1171;" // hangul jungseong wi
|
|
|
|
|
|
// ====================================
|
|
// Normal final rule: remove '
|
|
// ====================================
|
|
|
|
"''>;"
|
|
}
|
|
}
|