563d9e5006
X-SVN-Rev: 1422
748 lines
30 KiB
Plaintext
748 lines
30 KiB
Plaintext
//--------------------------------------------------------------------
|
|
// Copyright (c) 1999-2000, International Business Machines
|
|
// Corporation and others. All Rights Reserved.
|
|
//--------------------------------------------------------------------
|
|
// THIS IS A MACHINE-GENERATED FILE
|
|
// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
|
|
// Source: src\com\ibm\text\resources/TransliterationRule_Latin_Jamo.java
|
|
// Date: Fri May 19 15:50:22 2000
|
|
//--------------------------------------------------------------------
|
|
|
|
// Latin-Jamo
|
|
|
|
ljamo {
|
|
Rule {
|
|
|
|
// VARIABLES
|
|
|
|
"$initial=[\u1100-\u115F];"
|
|
"$INITIAL=[bcdghjklmnpst];"
|
|
"$medial=[\u1160-\u11A7];"
|
|
"$MEDIAL=[aeiou];" // as a left context
|
|
"$comp_med=[\u1160\u1176-\u11A7];" // compound medials and filler
|
|
"$final=[\u11A8-\u11F9];" // added - aliu
|
|
"$vowel=[aeiouwy$medial];"
|
|
// following line used to read "..$medial$final]"
|
|
// assume this was a typo - liu
|
|
"$consonant=[bcdfghjklmnpqrstvxz$initial$final];"
|
|
"$ye_=[yeYE];"
|
|
"$ywe_=[yweYWE];"
|
|
"$yw_=[ywYW];"
|
|
"$nl_=[nlNL];"
|
|
"$gnl_=[gnlGNL];"
|
|
"$lsgb_=[lsgbLSGB];"
|
|
"$ywao_=[ywaoYWAO];"
|
|
"$bl_=[blBL];"
|
|
|
|
"$ieung = \u110b;"
|
|
|
|
// RULES
|
|
|
|
// Hangul structure is IMF or IM
|
|
// So you can have, because of adjacent sequences
|
|
// IM, but not II or IF
|
|
// MF or MI, but not MM
|
|
// FI, but not FF or FM
|
|
|
|
// For English, we just have C or V.
|
|
// To generate valid Hangul:
|
|
// Vowels:
|
|
// We insert IEUNG between VV, and otherwise map V to M
|
|
// We also insert IEUNG if there is no
|
|
// Consonants:
|
|
// We don't break doubles
|
|
// Cases like lmgg, we have to break at lm
|
|
// So to guess whether a consonant is I or F
|
|
// we map all C's to F, except when followed by a vowel, e.g.
|
|
// X[{vowel}>CHOSEONG (initial)
|
|
// X>JONGSEONG (final)
|
|
|
|
// Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
|
|
|
|
|
// General strategy.
|
|
//
|
|
// 1. We support both the normal Jamo block, 1100 - 117F, and the
|
|
// compatibility block, 3130 - 318F. The former uses lowercase latin;
|
|
// the latter uses uppercase. See notes below for details of the
|
|
// compatibility block. Remaining items in this list pertain to the
|
|
// normal Jamo block.
|
|
//
|
|
// 2. Canonical syllables should transliterate without special
|
|
// characters. Canonical syllables are either IMF or IM.
|
|
//
|
|
// 3. We want to support round-trip integrity from jamo to latin and back
|
|
// to Jamo. To do this we have to mark the jamo with special characters
|
|
// when they occur in non-canonical positions.
|
|
//
|
|
// 4. When initial jamo occur in a non-canonical position, they are
|
|
// marked with a leading '['.
|
|
//
|
|
// 5. When final jamo occur in a non-canonical position, they are marked
|
|
// with a trailing ']'.
|
|
//
|
|
// 6. When medial jamo occur in a non-canonical position, they are marked
|
|
// with a leading '~'.
|
|
//
|
|
// 7. Compound jamo characters are handled by enclosing them in
|
|
// parentheses. Initials are '((x)', medials are '(x)', and finals are
|
|
// '(x))'.
|
|
//
|
|
// 8. Disambiguation of 'g' + 'g' vs. 'gg' is accomplished by inserting a
|
|
// '' character between them.
|
|
//
|
|
// 9. IEUNG is used to mark medials not occuring after initials.
|
|
// Isolated IEUNG is transliterated as a back tick.
|
|
//
|
|
// 10. Some old special case and completeness rules have been commented
|
|
// out. These can be reintroduced (and the existing rules modified as
|
|
// needed) so long as round-trip integrity is maintained.
|
|
|
|
// We use the uppercase latin letters for the compatibility Jamo
|
|
// U+3130 - U+318F. The following rules are generated
|
|
// programmatically by a perl script that analyzes the Unicode
|
|
// database. These rules are much simpler because there are no
|
|
// separate code points for initial vs. final consonants, so no
|
|
// contextual rules are needed. The one wrinkle is, as usual, the
|
|
// need to distinguish doubles from two singles, that is, GG vs G G.
|
|
// The perl script finds these special cases by exhaustive search and
|
|
// adds only the minimal rules needed to resolve these cases. The one
|
|
// modification that is made by hand is to replace '' with '/' so as
|
|
// not to conflict with the normal IEUNG in the standard Jamo range. -
|
|
// liu
|
|
"A '' <> {\u314F} [\u3153\u3161\u3154];"
|
|
"B '' <> {\u3142} [\u3142\u3143];"
|
|
"D '' <> {\u3137} [\u3137\u3138];"
|
|
"E '' <> {\u3154} [\u315A\u3157\u315C];"
|
|
"G '' <> {\u3131} [\u3132\u3133\u3131\u3146\u3145];"
|
|
"J '' <> {\u3148} [\u3149\u3148];"
|
|
"L '' <> {\u3139} [\u3132\u3133\u3131\u3141\u3142\u3143\u3146\u3145\u314C\u314D];"
|
|
"N '' <> {\u3134} [\u3149\u3148\u314E];"
|
|
"O '' <> {\u3157} [\u3153\u3161\u3154];"
|
|
"S '' <> {\u3145} [\u3146\u3145];"
|
|
"WA '' <> {\u3158} [\u3153\u3161\u3154];"
|
|
"WE '' <> {\u315E} [\u315A\u3157];"
|
|
"YA '' <> {\u3151} [\u3153\u3161\u3154];"
|
|
"YE '' <> {\u3156} [\u315A\u3157];"
|
|
"YU <> \u3160;"
|
|
"YO <> \u315B;"
|
|
"YI <> \u3162;"
|
|
"YEO <> \u3155;"
|
|
"YE <> \u3156;"
|
|
"YAE <> \u3152;"
|
|
"YA <> \u3151;"
|
|
"WI <> \u315F;"
|
|
"WEO <> \u315D;"
|
|
"WE <> \u315E;"
|
|
"WAE <> \u3159;"
|
|
"WA <> \u3158;"
|
|
"U <> \u315C;"
|
|
"T <> \u314C;"
|
|
"S S <> \u3146;"
|
|
"S <> \u3145;"
|
|
"P <> \u314D;"
|
|
"OE <> \u315A;"
|
|
"O <> \u3157;"
|
|
"N J <> \u3135;"
|
|
"N H <> \u3136;"
|
|
"N <> \u3134;"
|
|
"M <> \u3141;"
|
|
"L T <> \u313E;"
|
|
"L S <> \u313D;"
|
|
"L P <> \u313F;"
|
|
"L M <> \u313B;"
|
|
"L G <> \u313A;"
|
|
"L B <> \u313C;"
|
|
"L <> \u3139;"
|
|
"K <> \u314B;"
|
|
"J J <> \u3149;"
|
|
"J <> \u3148;"
|
|
"I <> \u3163;"
|
|
"H <> \u314E;"
|
|
"G S <> \u3133;"
|
|
"G G <> \u3132;"
|
|
"G <> \u3131;"
|
|
"EU <> \u3161;"
|
|
"EO <> \u3153;"
|
|
"E <> \u3154;"
|
|
"D D <> \u3138;"
|
|
"D <> \u3137;"
|
|
"C <> \u314A;"
|
|
"B B <> \u3143;"
|
|
"B <> \u3142;"
|
|
"AE <> \u3150;"
|
|
"A <> \u314F;"
|
|
"'/' <> \u3147;"
|
|
"'(' YU YEO ')' <> \u318A;"
|
|
"'(' YU YE ')' <> \u318B;"
|
|
"'(' YU I ')' <> \u318C;"
|
|
"'(' YR ')' <> \u3186;"
|
|
"'(' YO YAE ')' <> \u3188;"
|
|
"'(' YO YA ')' <> \u3187;"
|
|
"'(' YO I ')' <> \u3189;"
|
|
"'(' YES S ')' <> \u3182;"
|
|
"'(' YES PAN ')' <> \u3183;"
|
|
"'(' YES ')' <> \u3181;"
|
|
"'(' S N ')' <> \u317B;"
|
|
"'(' S J ')' <> \u317E;"
|
|
"'(' S G ')' <> \u317A;"
|
|
"'(' S D ')' <> \u317C;"
|
|
"'(' S B ')' <> \u317D;"
|
|
"'(' PAN ')' <> \u317F;"
|
|
"'(' P '' ')' <> \u3184;"
|
|
"'(' N S ')' <> \u3167;"
|
|
"'(' N PAN ')' <> \u3168;"
|
|
"'(' N N ')' <> \u3165;"
|
|
"'(' N D ')' <> \u3166;"
|
|
"'(' M S ')' <> \u316F;"
|
|
"'(' M PAN ')' <> \u3170;"
|
|
"'(' M B ')' <> \u316E;"
|
|
"'(' M '' ')' <> \u3171;"
|
|
"'(' L YR ')' <> \u316D;"
|
|
"'(' L PAN ')' <> \u316C;"
|
|
"'(' L H ')' <> \u3140;"
|
|
"'(' L G S ')' <> \u3169;"
|
|
"'(' L D ')' <> \u316A;"
|
|
"'(' L B S ')' <> \u316B;"
|
|
"'(' HJF ')' <> \u3164;"
|
|
"'(' H H ')' <> \u3185;"
|
|
"'(' B T ')' <> \u3177;"
|
|
"'(' B S G ')' <> \u3174;"
|
|
"'(' B S D ')' <> \u3175;"
|
|
"'(' B S ')' <> \u3144;"
|
|
"'(' B J ')' <> \u3176;"
|
|
"'(' B G ')' <> \u3172;"
|
|
"'(' B D ')' <> \u3173;"
|
|
"'(' B B '' ')' <> \u3179;"
|
|
"'(' B '' ')' <> \u3178;"
|
|
"'(' AR I ')' <> \u318E;"
|
|
"'(' AR ')' <> \u318D;"
|
|
"'(' '' '' ')' <> \u3180;"
|
|
|
|
// APOSTROPHE
|
|
|
|
// As always, an apostrophe is used to separate digraphs into
|
|
// singles. That is, if you really wanted [KAN][GGAN], instead
|
|
// of [KANG][GAN] you would write "kan'ggan".
|
|
|
|
// Rules for inserting ' when mapping separated digraphs back
|
|
// from Hangul to Latin. Catch every letter that can be the
|
|
// LAST of a digraph (or multigraph) AND first of an initial
|
|
|
|
// special insertion for funny sequences of vowels, and for empty consonant
|
|
|
|
// + "'' < l{ }\u11c0;" // hangul jongseong thieuth
|
|
// + "'' < $lsgb_{}\u11ba;" // hangul jongseong sios
|
|
// + "'' < l{ }\u11c1;" // hangul jongseong phieuph
|
|
// + "'' < l{ }\u11b7;" // hangul jongseong mieum
|
|
// + "'' < n{ }\u11bd;" // hangul jongseong cieuc
|
|
// + "'' < $nl_{}\u11c2;" // hangul jongseong hieuh
|
|
// + "'' < $gnl_{}\u11a9;" // hangul jongseong ssangkiyeok
|
|
// + "'' < $bl_{}\u11b8;" // hangul jongseong pieup
|
|
// + "'' < d{ }\u11ae;" // hangul jongseong tikeut
|
|
//
|
|
// + "'' < $ye_{}\u116e;" // hangul jungseong u
|
|
// + "'' < $ywe_{}\u1169;" // hangul jungseong o
|
|
// + "'' < $yw_{}\u1175;" // hangul jungseong i
|
|
// + "'' < $ywao_{}\u1166;" // hangul jungseong e
|
|
// + "'' < $yw_{}\u1161;" // hangul jungseong a
|
|
//
|
|
// + "'' < l{ }\u1110;" // hangul choseong thieuth
|
|
// + "'' < $lsgb_{}\u110a;" // hangul choseong ssangsios
|
|
// + "'' < $lsgb_{}\u1109;" // hangul choseong sios
|
|
// + "'' < l{ }\u1111;" // hangul choseong phieuph
|
|
// + "'' < l{ }\u1106;" // hangul choseong mieum
|
|
// + "'' < n{ }\u110c;" // hangul choseong cieuc
|
|
// + "'' < n{ }\u110d;"
|
|
// + "'' < $nl_{}\u1112;" // hangul choseong hieuh
|
|
// + "'' < $gnl_{}\u1101;" // hangul choseong ssangkiyeok
|
|
// + "'' < $gnl_{}\u1100;" // hangul choseong kiyeok
|
|
// + "'' < d{ }\u1103;" // hangul choseong tikeut
|
|
// + "'' < d{ }\u1104;"
|
|
// + "'' < $bl_{}\u1107;" // hangul choseong pieup
|
|
// + "'' < $bl_{}\u1108;"
|
|
|
|
// We transliterate the compound Jamo code points using ((x) for
|
|
// initials, (x) for medials, and (x)) for finals. - liu
|
|
" '((' n g ')' <> \u1113;"
|
|
" '((' n n ')' <> \u1114;"
|
|
" '((' n d ')' <> \u1115;"
|
|
" '((' n b ')' <> \u1116;"
|
|
" '((' d g ')' <> \u1117;"
|
|
" '((' l n ')' <> \u1118;"
|
|
" '((' l l ')' <> \u1119;"
|
|
" '((' l h ')' <> \u111A;"
|
|
" '((' l '' ')' <> \u111B;"
|
|
" '((' m b ')' <> \u111C;"
|
|
" '((' m '' ')' <> \u111D;"
|
|
" '((' b g ')' <> \u111E;"
|
|
" '((' b n ')' <> \u111F;"
|
|
" '((' b d ')' <> \u1120;"
|
|
" '((' b s ')' <> \u1121;"
|
|
" '((' b s g ')' <> \u1122;"
|
|
" '((' b s d ')' <> \u1123;"
|
|
" '((' b s b ')' <> \u1124;"
|
|
" '((' b s s ')' <> \u1125;"
|
|
" '((' b s j ')' <> \u1126;"
|
|
" '((' b j ')' <> \u1127;"
|
|
" '((' b c ')' <> \u1128;"
|
|
" '((' b t ')' <> \u1129;"
|
|
" '((' b p ')' <> \u112A;"
|
|
" '((' b '' ')' <> \u112B;"
|
|
" '((' b b '' ')' <> \u112C;"
|
|
" '((' s g ')' <> \u112D;"
|
|
" '((' s n ')' <> \u112E;"
|
|
" '((' s d ')' <> \u112F;"
|
|
" '((' s l ')' <> \u1130;"
|
|
" '((' s m ')' <> \u1131;"
|
|
" '((' s b ')' <> \u1132;"
|
|
" '((' s b g ')' <> \u1133;"
|
|
" '((' s s s ')' <> \u1134;"
|
|
" '((' s '' ')' <> \u1135;"
|
|
" '((' s j ')' <> \u1136;"
|
|
" '((' s c ')' <> \u1137;"
|
|
" '((' s k ')' <> \u1138;"
|
|
" '((' s t ')' <> \u1139;"
|
|
" '((' s p ')' <> \u113A;"
|
|
" '((' s h ')' <> \u113B;"
|
|
" '((' chs ')' <> \u113C;"
|
|
" '((' chs chs ')' <> \u113D;"
|
|
" '((' ces ')' <> \u113E;"
|
|
" '((' ces ces ')' <> \u113F;"
|
|
" '((' pan ')' <> \u1140;"
|
|
" '((' '' g ')' <> \u1141;"
|
|
" '((' '' d ')' <> \u1142;"
|
|
" '((' '' m ')' <> \u1143;"
|
|
" '((' '' b ')' <> \u1144;"
|
|
" '((' '' s ')' <> \u1145;"
|
|
" '((' '' pan ')' <> \u1146;"
|
|
" '((' '' '' ')' <> \u1147;"
|
|
" '((' '' j ')' <> \u1148;"
|
|
" '((' '' c ')' <> \u1149;"
|
|
" '((' '' t ')' <> \u114A;"
|
|
" '((' '' p ')' <> \u114B;"
|
|
" '((' yes ')' <> \u114C;"
|
|
" '((' j '' ')' <> \u114D;"
|
|
" '((' chc ')' <> \u114E;"
|
|
" '((' chc chc ')' <> \u114F;"
|
|
" '((' cec ')' <> \u1150;"
|
|
" '((' cec cec ')' <> \u1151;"
|
|
" '((' c k ')' <> \u1152;"
|
|
" '((' c h ')' <> \u1153;"
|
|
" '((' cch ')' <> \u1154;"
|
|
" '((' ceh ')' <> \u1155;"
|
|
" '((' p b ')' <> \u1156;"
|
|
" '((' p '' ')' <> \u1157;"
|
|
" '((' h h ')' <> \u1158;"
|
|
" '((' yr ')' <> \u1159;"
|
|
" '((' hcf ')' <> \u115F;"
|
|
" '(' ahjf ')' <> \u1160;" // must start with vowel, hence 'a' + hjf
|
|
" '(' a o ')' <> \u1176;"
|
|
" '(' a u ')' <> \u1177;"
|
|
" '(' ya o ')' <> \u1178;"
|
|
" '(' ya yo ')' <> \u1179;"
|
|
" '(' eo o ')' <> \u117A;"
|
|
" '(' eo u ')' <> \u117B;"
|
|
" '(' eo eu ')' <> \u117C;"
|
|
" '(' yeo o ')' <> \u117D;"
|
|
" '(' yeo u ')' <> \u117E;"
|
|
" '(' o eo ')' <> \u117F;"
|
|
" '(' o e ')' <> \u1180;"
|
|
" '(' o ye ')' <> \u1181;"
|
|
" '(' o o ')' <> \u1182;"
|
|
" '(' o u ')' <> \u1183;"
|
|
" '(' yo ya ')' <> \u1184;"
|
|
" '(' yo yae ')' <> \u1185;"
|
|
" '(' yo yeo ')' <> \u1186;"
|
|
" '(' yo o ')' <> \u1187;"
|
|
" '(' yo i ')' <> \u1188;"
|
|
" '(' u a ')' <> \u1189;"
|
|
" '(' u ae ')' <> \u118A;"
|
|
" '(' u eo eu ')' <> \u118B;"
|
|
" '(' u ye ')' <> \u118C;"
|
|
" '(' u u ')' <> \u118D;"
|
|
" '(' yu a ')' <> \u118E;"
|
|
" '(' yu eo ')' <> \u118F;"
|
|
" '(' yu e ')' <> \u1190;"
|
|
" '(' yu yeo ')' <> \u1191;"
|
|
" '(' yu ye ')' <> \u1192;"
|
|
" '(' yu u ')' <> \u1193;"
|
|
" '(' yu i ')' <> \u1194;"
|
|
" '(' eu u ')' <> \u1195;"
|
|
" '(' eu eu ')' <> \u1196;"
|
|
" '(' yi u ')' <> \u1197;"
|
|
" '(' i a ')' <> \u1198;"
|
|
" '(' i ya ')' <> \u1199;"
|
|
" '(' i o ')' <> \u119A;"
|
|
" '(' i u ')' <> \u119B;"
|
|
" '(' i eu ')' <> \u119C;"
|
|
" '(' i ar ')' <> \u119D;"
|
|
" '(' ar ')' <> \u119E;"
|
|
" '(' ar eo ')' <> \u119F;"
|
|
" '(' ar u ')' <> \u11A0;"
|
|
" '(' ar i ')' <> \u11A1;"
|
|
" '(' ar ar ')' <> \u11A2;"
|
|
" '(' g l '))' <> \u11C3;"
|
|
" '(' g s g '))' <> \u11C4;"
|
|
" '(' n g '))' <> \u11C5;"
|
|
" '(' n d '))' <> \u11C6;"
|
|
" '(' n s '))' <> \u11C7;"
|
|
" '(' n pan '))' <> \u11C8;"
|
|
" '(' n t '))' <> \u11C9;"
|
|
" '(' d g '))' <> \u11CA;"
|
|
" '(' d l '))' <> \u11CB;"
|
|
" '(' l g s '))' <> \u11CC;"
|
|
" '(' l n '))' <> \u11CD;"
|
|
" '(' l d '))' <> \u11CE;"
|
|
" '(' l d h '))' <> \u11CF;"
|
|
" '(' l l '))' <> \u11D0;"
|
|
" '(' l m g '))' <> \u11D1;"
|
|
" '(' l m s '))' <> \u11D2;"
|
|
" '(' l b s '))' <> \u11D3;"
|
|
" '(' l b h '))' <> \u11D4;"
|
|
" '(' l b ng '))' <> \u11D5;"
|
|
" '(' l s s '))' <> \u11D6;"
|
|
" '(' l pan '))' <> \u11D7;"
|
|
" '(' l k '))' <> \u11D8;"
|
|
" '(' l yr '))' <> \u11D9;"
|
|
" '(' m g '))' <> \u11DA;"
|
|
" '(' m l '))' <> \u11DB;"
|
|
" '(' m b '))' <> \u11DC;"
|
|
" '(' m s '))' <> \u11DD;"
|
|
" '(' m s s '))' <> \u11DE;"
|
|
" '(' m pan '))' <> \u11DF;"
|
|
" '(' m c '))' <> \u11E0;"
|
|
" '(' m h '))' <> \u11E1;"
|
|
" '(' m ng '))' <> \u11E2;"
|
|
" '(' b l '))' <> \u11E3;"
|
|
" '(' b p '))' <> \u11E4;"
|
|
" '(' b h '))' <> \u11E5;"
|
|
" '(' b ng '))' <> \u11E6;"
|
|
" '(' s g '))' <> \u11E7;"
|
|
" '(' s d '))' <> \u11E8;"
|
|
" '(' s l '))' <> \u11E9;"
|
|
" '(' s b '))' <> \u11EA;"
|
|
" '(' pan '))' <> \u11EB;"
|
|
" '(' ng g '))' <> \u11EC;"
|
|
" '(' ng g g '))' <> \u11ED;"
|
|
" '(' ng ng '))' <> \u11EE;"
|
|
" '(' ng k '))' <> \u11EF;"
|
|
" '(' yes '))' <> \u11F0;"
|
|
" '(' yes s '))' <> \u11F1;"
|
|
" '(' yes pan '))' <> \u11F2;"
|
|
" '(' p b '))' <> \u11F3;"
|
|
" '(' p ng '))' <> \u11F4;"
|
|
" '(' h n '))' <> \u11F5;"
|
|
" '(' h l '))' <> \u11F6;"
|
|
" '(' h m '))' <> \u11F7;"
|
|
" '(' h b '))' <> \u11F8;"
|
|
" '(' yr '))' <> \u11F9;"
|
|
|
|
|
|
// INITIALS
|
|
|
|
// Added }$vowel post context - liu
|
|
"bb}$vowel<>\u1108 } $vowel;"
|
|
"jj}$vowel<>\u110d } $vowel;"
|
|
"dd}$vowel<>\u1104 } $vowel;"
|
|
"t }$vowel<>\u1110 } $vowel;" // hangul choseong thieuth
|
|
"ss}$vowel<>\u110a } $vowel;" // hangul choseong ssangsios
|
|
"s }$vowel<>\u1109 } $vowel;" // hangul choseong sios
|
|
"p }$vowel<>\u1111 } $vowel;" // hangul choseong phieuph
|
|
"n }$vowel<>\u1102 } $vowel;" // hangul choseong nieun
|
|
"m }$vowel<>\u1106 } $vowel;" // hangul choseong mieum
|
|
"l }$vowel<>\u1105 } $vowel;" // hangul choseong rieul
|
|
"k }$vowel<>\u110f } $vowel;" // hangul choseong khieukh
|
|
"j }$vowel<>\u110c } $vowel;" // hangul choseong cieuc
|
|
"h }$vowel<>\u1112 } $vowel;" // hangul choseong hieuh
|
|
"gg}$vowel<>\u1101 } $vowel;" // hangul choseong ssangkiyeok
|
|
"g }$vowel<>\u1100 } $vowel;" // hangul choseong kiyeok
|
|
"d }$vowel<>\u1103 } $vowel;" // hangul choseong tikeut
|
|
"c }$vowel<>\u110e } $vowel;" // hangul choseong chieuch
|
|
"b }$vowel<>\u1107 } $vowel;" // hangul choseong pieup
|
|
|
|
// Take care of initial-compound medial - '(' $vowel - liu
|
|
"bb} '(' $vowel <> \u1108 } $comp_med;"
|
|
"jj} '(' $vowel <> \u110d } $comp_med;"
|
|
"dd} '(' $vowel <> \u1104 } $comp_med;"
|
|
"t } '(' $vowel <> \u1110 } $comp_med;" // hangul choseong thieuth
|
|
"ss} '(' $vowel <> \u110a } $comp_med;" // hangul choseong ssangsios
|
|
"s } '(' $vowel <> \u1109 } $comp_med;" // hangul choseong sios
|
|
"p } '(' $vowel <> \u1111 } $comp_med;" // hangul choseong phieuph
|
|
"n } '(' $vowel <> \u1102 } $comp_med;" // hangul choseong nieun
|
|
"m } '(' $vowel <> \u1106 } $comp_med;" // hangul choseong mieum
|
|
"l } '(' $vowel <> \u1105 } $comp_med;" // hangul choseong rieul
|
|
"k } '(' $vowel <> \u110f } $comp_med;" // hangul choseong khieukh
|
|
"j } '(' $vowel <> \u110c } $comp_med;" // hangul choseong cieuc
|
|
"h } '(' $vowel <> \u1112 } $comp_med;" // hangul choseong hieuh
|
|
"gg} '(' $vowel <> \u1101 } $comp_med;" // hangul choseong ssangkiyeok
|
|
"g } '(' $vowel <> \u1100 } $comp_med;" // hangul choseong kiyeok
|
|
"d } '(' $vowel <> \u1103 } $comp_med;" // hangul choseong tikeut
|
|
"c } '(' $vowel <> \u110e } $comp_med;" // hangul choseong chieuch
|
|
"b } '(' $vowel <> \u1107 } $comp_med;" // hangul choseong pieup
|
|
|
|
// Mark non-canonical initials with '[' - liu
|
|
"'[' bb <> \u1108;"
|
|
"'[' jj <> \u110d;"
|
|
"'[' dd <> \u1104;"
|
|
"'[' t <> \u1110;" // hangul choseong thieuth
|
|
"'[' ss <> \u110a;" // hangul choseong ssangsios
|
|
"'[' s <> \u1109;" // hangul choseong sios
|
|
"'[' p <> \u1111;" // hangul choseong phieuph
|
|
"'[' n <> \u1102;" // hangul choseong nieun
|
|
"'[' m <> \u1106;" // hangul choseong mieum
|
|
"'[' l <> \u1105;" // hangul choseong rieul
|
|
"'[' k <> \u110f;" // hangul choseong khieukh
|
|
"'[' j <> \u110c;" // hangul choseong cieuc
|
|
"'[' h <> \u1112;" // hangul choseong hieuh
|
|
"'[' gg <> \u1101;" // hangul choseong ssangkiyeok
|
|
"'[' g <> \u1100;" // hangul choseong kiyeok
|
|
"'[' d <> \u1103;" // hangul choseong tikeut
|
|
"'[' c <> \u110e;" // hangul choseong chieuch
|
|
"'[' b <> \u1107;" // hangul choseong pieup
|
|
|
|
|
|
// If we have gotten through to these rules, and we start with
|
|
// a consonant, then the remaining mappings would be to F,
|
|
// because must have CC (or C<non-letter>), not CV.
|
|
// If we have F before us, then
|
|
// we would end up with FF, which is wrong. The simplest fix is
|
|
// to still make it an initial, but also insert an "u",
|
|
// so we end up with F, I, u, and then continue with the C
|
|
|
|
// special, only initial
|
|
// + "bb > \u1108\u116e;" // bb u hangul choseong ssangpieup
|
|
// + "jj > \u110d\u116e;" // jj u hangul choseong ssangcieuc
|
|
// + "dd > \u1104\u116e;" // dd u hangul choseong ssangtikeut
|
|
|
|
// + "$final{ t > \u1110\u116e;" // hangul choseong thieuth
|
|
// + "$final{ ss> \u110a\u116e;" // hangul choseong ssangsios
|
|
// + "$final{ s > \u1109\u116e;" // hangul choseong sios
|
|
// + "$final{ p > \u1111\u116e;" // hangul choseong phieuph
|
|
// + "$final{ n > \u1102\u116e;" // hangul choseong nieun
|
|
// + "$final{ m > \u1106\u116e;" // hangul choseong mieum
|
|
// + "$final{ l > \u1105\u116e;" // hangul choseong rieul
|
|
// + "$final{ k > \u110f\u116e;" // hangul choseong khieukh
|
|
// + "$final{ j > \u110c\u116e;" // hangul choseong cieuc
|
|
// + "$final{ h > \u1112\u116e;" // hangul choseong hieuh
|
|
// + "$final{ gg> \u1101\u116e;" // hangul choseong ssangkiyeok
|
|
// + "$final{ g > \u1100\u116e;" // hangul choseong kiyeok
|
|
// + "$final{ d > \u1103\u116e;" // hangul choseong tikeut
|
|
// + "$final{ c > \u110e\u116e;" // hangul choseong chieuch
|
|
// + "$final{ b > \u1107\u116e;" // hangul choseong pieup
|
|
|
|
// MEDIALS after INITIALS
|
|
|
|
// MEDIALS (vowels) not after INITIALs
|
|
// Added left $initial context - liu
|
|
"$initial{ yu <> $INITIAL{ \u1172;" // hangul jungseong yu
|
|
"$initial{ yo <> $INITIAL{ \u116d;" // hangul jungseong yo
|
|
"$initial{ yi <> $INITIAL{ \u1174;" // hangul jungseong yi
|
|
"$initial{ yeo<> $INITIAL{ \u1167;" // hangul jungseong yeo
|
|
"$initial{ ye <> $INITIAL{ \u1168;" // hangul jungseong ye
|
|
"$initial{ yae<> $INITIAL{ \u1164;" // hangul jungseong yae
|
|
"$initial{ ya <> $INITIAL{ \u1163;" // hangul jungseong ya
|
|
"$initial{ wi <> $INITIAL{ \u1171;" // hangul jungseong wi
|
|
"$initial{ weo<> $INITIAL{ \u116f;" // hangul jungseong weo
|
|
"$initial{ we <> $INITIAL{ \u1170;" // hangul jungseong we
|
|
"$initial{ wae<> $INITIAL{ \u116b;" // hangul jungseong wae
|
|
"$initial{ wa <> $INITIAL{ \u116a;" // hangul jungseong wa
|
|
"$initial{ u <> $INITIAL{ \u116e;" // hangul jungseong u
|
|
"$initial{ oe <> $INITIAL{ \u116c;" // hangul jungseong oe
|
|
"$initial{ o <> $INITIAL{ \u1169;" // hangul jungseong o
|
|
"$initial{ i <> $INITIAL{ \u1175;" // hangul jungseong i
|
|
"$initial{ eu <> $INITIAL{ \u1173;" // hangul jungseong eu
|
|
"$initial{ eo <> $INITIAL{ \u1165;" // hangul jungseong eo
|
|
"$initial{ e <> $INITIAL{ \u1166;" // hangul jungseong e
|
|
"$initial{ ae <> $INITIAL{ \u1162;" // hangul jungseong ae
|
|
"$initial{ a <> $INITIAL{ \u1161;" // hangul jungseong a
|
|
|
|
// Handle non-canonical isolated jungseong - liu
|
|
"'~'yu <> \u1172;" // hangul jungseong yu
|
|
"'~'yo <> \u116d;" // hangul jungseong yo
|
|
"'~'yi <> \u1174;" // hangul jungseong yi
|
|
"'~'yeo<> \u1167;" // hangul jungseong yeo
|
|
"'~'ye <> \u1168;" // hangul jungseong ye
|
|
"'~'yae<> \u1164;" // hangul jungseong yae
|
|
"'~'ya <> \u1163;" // hangul jungseong ya
|
|
"'~'wi <> \u1171;" // hangul jungseong wi
|
|
"'~'weo<> \u116f;" // hangul jungseong weo
|
|
"'~'we <> \u1170;" // hangul jungseong we
|
|
"'~'wae<> \u116b;" // hangul jungseong wae
|
|
"'~'wa <> \u116a;" // hangul jungseong wa
|
|
"'~'u <> \u116e;" // hangul jungseong u
|
|
"'~'oe <> \u116c;" // hangul jungseong oe
|
|
"'~'o <> \u1169;" // hangul jungseong o
|
|
"'~'i <> \u1175;" // hangul jungseong i
|
|
"'~'eu <> \u1173;" // hangul jungseong eu
|
|
"'~'eo <> \u1165;" // hangul jungseong eo
|
|
"'~'e <> \u1166;" // hangul jungseong e
|
|
"'~'ae <> \u1162;" // hangul jungseong ae
|
|
"'~'a <> \u1161;" // hangul jungseong a
|
|
|
|
// MEDIALS (vowels) not after INITIALs
|
|
// Changed from > to <> - liu
|
|
"yu <> $ieung \u1172;" // hangul jungseong yu
|
|
"yo <> $ieung \u116d;" // hangul jungseong yo
|
|
"yi <> $ieung \u1174;" // hangul jungseong yi
|
|
"yeo<> $ieung \u1167;" // hangul jungseong yeo
|
|
"ye <> $ieung \u1168;" // hangul jungseong ye
|
|
"yae<> $ieung \u1164;" // hangul jungseong yae
|
|
"ya <> $ieung \u1163;" // hangul jungseong ya
|
|
"wi <> $ieung \u1171;" // hangul jungseong wi
|
|
"weo<> $ieung \u116f;" // hangul jungseong weo
|
|
"we <> $ieung \u1170;" // hangul jungseong we
|
|
"wae<> $ieung \u116b;" // hangul jungseong wae
|
|
"wa <> $ieung \u116a;" // hangul jungseong wa
|
|
"u <> $ieung \u116e;" // hangul jungseong u
|
|
"oe <> $ieung \u116c;" // hangul jungseong oe
|
|
"o <> $ieung \u1169;" // hangul jungseong o
|
|
"i <> $ieung \u1175;" // hangul jungseong i
|
|
"eu <> $ieung \u1173;" // hangul jungseong eu
|
|
"eo <> $ieung \u1165;" // hangul jungseong eo
|
|
"e <> $ieung \u1166;" // hangul jungseong e
|
|
"ae <> $ieung \u1162;" // hangul jungseong ae
|
|
"a <> $ieung \u1161;" // hangul jungseong a
|
|
|
|
"\\` <> $ieung;"
|
|
// Moved down so as not to mask above rules - liu
|
|
// + "'' < $consonant{$ieung;" // insert a break between any consonant and the empty consonant.
|
|
// + "$medial{}$vowel<>$ieung;" // HANGUL CHOSEONG IEUNG
|
|
|
|
|
|
// FINALS
|
|
|
|
" '' t <> $consonant { \u11c0;" // hangul jongseong thieuth
|
|
" '' ss <> $consonant { \u11bb;" // hangul jongseong ssangsios
|
|
" '' s <> $consonant { \u11ba;" // hangul jongseong sios
|
|
" '' p <> $consonant { \u11c1;" // hangul jongseong phieuph
|
|
" '' nj <> $consonant { \u11ac;" // hangul jongseong nieun-cieuc
|
|
" '' nh <> $consonant { \u11ad;" // hangul jongseong nieun-hieuh
|
|
" '' ng <> $consonant { \u11bc;" // hangul jongseong ieung
|
|
" '' n <> $consonant { \u11ab;" // hangul jongseong nieun
|
|
" '' m <> $consonant { \u11b7;" // hangul jongseong mieum
|
|
" '' lt <> $consonant { \u11b4;" // hangul jongseong rieul-thieuth
|
|
" '' ls <> $consonant { \u11b3;" // hangul jongseong rieul-sios
|
|
" '' lp <> $consonant { \u11b5;" // hangul jongseong rieul-phieuph
|
|
" '' lm <> $consonant { \u11b1;" // hangul jongseong rieul-mieum
|
|
" '' lh <> $consonant { \u11b6;" // hangul jongseong rieul-hieuh
|
|
" '' lg <> $consonant { \u11b0;" // hangul jongseong rieul-kiyeok
|
|
" '' lb <> $consonant { \u11b2;" // hangul jongseong rieul-pieup
|
|
" '' l <> $consonant { \u11af;" // hangul jongseong rieul
|
|
" '' k <> $consonant { \u11bf;" // hangul jongseong khieukh
|
|
" '' j <> $consonant { \u11bd;" // hangul jongseong cieuc
|
|
" '' h <> $consonant { \u11c2;" // hangul jongseong hieuh
|
|
" '' gs <> $consonant { \u11aa;" // hangul jongseong kiyeok-sios
|
|
" '' gg <> $consonant { \u11a9;" // hangul jongseong ssangkiyeok
|
|
" '' g <> $consonant { \u11a8;" // hangul jongseong kiyeok
|
|
" '' d <> $consonant { \u11ae;" // hangul jongseong tikeut
|
|
" '' c <> $consonant { \u11be;" // hangul jongseong chieuch
|
|
" '' bs <> $consonant { \u11b9;" // hangul jongseong pieup-sios
|
|
" '' b <> $consonant { \u11b8;" // hangul jongseong pieup
|
|
|
|
"t ']'> \u11c0;" // hangul jongseong thieuth
|
|
"ss ']'> \u11bb;" // hangul jongseong ssangsios
|
|
"s ']'> \u11ba;" // hangul jongseong sios
|
|
"p ']'> \u11c1;" // hangul jongseong phieuph
|
|
"nj ']'> \u11ac;" // hangul jongseong nieun-cieuc
|
|
"nh ']'> \u11ad;" // hangul jongseong nieun-hieuh
|
|
"ng ']'> \u11bc;" // hangul jongseong ieung
|
|
"n ']'> \u11ab;" // hangul jongseong nieun
|
|
"m ']'> \u11b7;" // hangul jongseong mieum
|
|
"lt ']'> \u11b4;" // hangul jongseong rieul-thieuth
|
|
"ls ']'> \u11b3;" // hangul jongseong rieul-sios
|
|
"lp ']'> \u11b5;" // hangul jongseong rieul-phieuph
|
|
"lm ']'> \u11b1;" // hangul jongseong rieul-mieum
|
|
"lh ']'> \u11b6;" // hangul jongseong rieul-hieuh
|
|
"lg ']'> \u11b0;" // hangul jongseong rieul-kiyeok
|
|
"lb ']'> \u11b2;" // hangul jongseong rieul-pieup
|
|
"l ']'> \u11af;" // hangul jongseong rieul
|
|
"k ']'> \u11bf;" // hangul jongseong khieukh
|
|
"j ']'> \u11bd;" // hangul jongseong cieuc
|
|
"h ']'> \u11c2;" // hangul jongseong hieuh
|
|
"gs ']'> \u11aa;" // hangul jongseong kiyeok-sios
|
|
"gg ']'> \u11a9;" // hangul jongseong ssangkiyeok
|
|
"g ']'> \u11a8;" // hangul jongseong kiyeok
|
|
"d ']'> \u11ae;" // hangul jongseong tikeut
|
|
"c ']'> \u11be;" // hangul jongseong chieuch
|
|
"bs ']'> \u11b9;" // hangul jongseong pieup-sios
|
|
"b ']'> \u11b8;" // hangul jongseong pieup
|
|
|
|
"$medial{ t <> $MEDIAL{ \u11c0;" // hangul jongseong thieuth
|
|
"$medial{ ss <> $MEDIAL{ \u11bb;" // hangul jongseong ssangsios
|
|
"$medial{ s <> $MEDIAL{ \u11ba;" // hangul jongseong sios
|
|
"$medial{ p <> $MEDIAL{ \u11c1;" // hangul jongseong phieuph
|
|
"$medial{ nj <> $MEDIAL{ \u11ac;" // hangul jongseong nieun-cieuc
|
|
"$medial{ nh <> $MEDIAL{ \u11ad;" // hangul jongseong nieun-hieuh
|
|
"$medial{ ng <> $MEDIAL{ \u11bc;" // hangul jongseong ieung
|
|
"$medial{ n <> $MEDIAL{ \u11ab;" // hangul jongseong nieun
|
|
"$medial{ m <> $MEDIAL{ \u11b7;" // hangul jongseong mieum
|
|
"$medial{ lt <> $MEDIAL{ \u11b4;" // hangul jongseong rieul-thieuth
|
|
"$medial{ ls <> $MEDIAL{ \u11b3;" // hangul jongseong rieul-sios
|
|
"$medial{ lp <> $MEDIAL{ \u11b5;" // hangul jongseong rieul-phieuph
|
|
"$medial{ lm <> $MEDIAL{ \u11b1;" // hangul jongseong rieul-mieum
|
|
"$medial{ lh <> $MEDIAL{ \u11b6;" // hangul jongseong rieul-hieuh
|
|
"$medial{ lg <> $MEDIAL{ \u11b0;" // hangul jongseong rieul-kiyeok
|
|
"$medial{ lb <> $MEDIAL{ \u11b2;" // hangul jongseong rieul-pieup
|
|
"$medial{ l <> $MEDIAL{ \u11af;" // hangul jongseong rieul
|
|
"$medial{ k <> $MEDIAL{ \u11bf;" // hangul jongseong khieukh
|
|
"$medial{ j <> $MEDIAL{ \u11bd;" // hangul jongseong cieuc
|
|
"$medial{ h <> $MEDIAL{ \u11c2;" // hangul jongseong hieuh
|
|
"$medial{ gs <> $MEDIAL{ \u11aa;" // hangul jongseong kiyeok-sios
|
|
"$medial{ gg <> $MEDIAL{ \u11a9;" // hangul jongseong ssangkiyeok
|
|
"$medial{ g <> $MEDIAL{ \u11a8;" // hangul jongseong kiyeok
|
|
"$medial{ d <> $MEDIAL{ \u11ae;" // hangul jongseong tikeut
|
|
"$medial{ c <> $MEDIAL{ \u11be;" // hangul jongseong chieuch
|
|
"$medial{ bs <> $MEDIAL{ \u11b9;" // hangul jongseong pieup-sios
|
|
"$medial{ b <> $MEDIAL{ \u11b8;" // hangul jongseong pieup
|
|
|
|
"t ']'< \u11c0;" // hangul jongseong thieuth
|
|
"ss ']'< \u11bb;" // hangul jongseong ssangsios
|
|
"s ']'< \u11ba;" // hangul jongseong sios
|
|
"p ']'< \u11c1;" // hangul jongseong phieuph
|
|
"nj ']'< \u11ac;" // hangul jongseong nieun-cieuc
|
|
"nh ']'< \u11ad;" // hangul jongseong nieun-hieuh
|
|
"ng ']'< \u11bc;" // hangul jongseong ieung
|
|
"n ']'< \u11ab;" // hangul jongseong nieun
|
|
"m ']'< \u11b7;" // hangul jongseong mieum
|
|
"lt ']'< \u11b4;" // hangul jongseong rieul-thieuth
|
|
"ls ']'< \u11b3;" // hangul jongseong rieul-sios
|
|
"lp ']'< \u11b5;" // hangul jongseong rieul-phieuph
|
|
"lm ']'< \u11b1;" // hangul jongseong rieul-mieum
|
|
"lh ']'< \u11b6;" // hangul jongseong rieul-hieuh
|
|
"lg ']'< \u11b0;" // hangul jongseong rieul-kiyeok
|
|
"lb ']'< \u11b2;" // hangul jongseong rieul-pieup
|
|
"l ']'< \u11af;" // hangul jongseong rieul
|
|
"k ']'< \u11bf;" // hangul jongseong khieukh
|
|
"j ']'< \u11bd;" // hangul jongseong cieuc
|
|
"h ']'< \u11c2;" // hangul jongseong hieuh
|
|
"gs ']'< \u11aa;" // hangul jongseong kiyeok-sios
|
|
"gg ']'< \u11a9;" // hangul jongseong ssangkiyeok
|
|
"g ']'< \u11a8;" // hangul jongseong kiyeok
|
|
"d ']'< \u11ae;" // hangul jongseong tikeut
|
|
"c ']'< \u11be;" // hangul jongseong chieuch
|
|
"bs ']'< \u11b9;" // hangul jongseong pieup-sios
|
|
"b ']'< \u11b8;" // hangul jongseong pieup
|
|
|
|
// extra English letters
|
|
|
|
// + "z > |s;"
|
|
// //{ + "Z > |s;" } masked
|
|
// + "x > |ks;"
|
|
// + "X > |ks;"
|
|
// + "v > |b;"
|
|
// + "V > |b;"
|
|
// + "r > |l;"
|
|
// + "R > |l;"
|
|
// + "q > |k;"
|
|
// + "Q > |k;"
|
|
// + "f > |p;"
|
|
// + "F > |p;"
|
|
// //{ + "c > |k;" } masked
|
|
// + "C > |k;"
|
|
|
|
// + "y > \u1172;" // hangul jungseong yu
|
|
// + "w > \u1171;" // hangul jungseong wi
|
|
}
|
|
}
|