2001-10-26 05:41:16 +00:00
|
|
|
|
// -*- Coding: utf-8; -*-
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// Copyright (c) 1999-2001, International Business Machines
|
|
|
|
|
// Corporation and others. All Rights Reserved.
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// THIS IS A MACHINE-GENERATED FILE
|
2001-12-03 18:30:30 +00:00
|
|
|
|
// Tool: dumpicurules.bat
|
2001-10-26 05:41:16 +00:00
|
|
|
|
// Source: ../../text/resources/Transliterator_Latin_Katakana.txt
|
2001-12-03 18:30:30 +00:00
|
|
|
|
// Date: Mon Dec 3 10:28:39 2001
|
2001-10-26 05:41:16 +00:00
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
// Latin_Katakana
|
|
|
|
|
|
|
|
|
|
translit_Latin_Katakana {
|
|
|
|
|
Rule {
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// Copyright (c) 1999-2001, International Business Machines
|
|
|
|
|
// Corporation and others. All Rights Reserved.
|
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Latin_Katakana.txt,v $
|
2001-12-03 18:30:30 +00:00
|
|
|
|
// $Date: 2001/12/03 18:30:30 $
|
|
|
|
|
// $Revision: 1.7 $
|
2001-10-26 05:41:16 +00:00
|
|
|
|
//--------------------------------------------------------------------
|
|
|
|
|
|
2001-11-22 05:50:51 +00:00
|
|
|
|
// note: a global filter is more efficient, but MUST include all source chars
|
2001-11-30 21:24:16 +00:00
|
|
|
|
//:: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
|
|
|
|
|
// MINIMAL FILTER GENERATED FOR: Latin-Katakana
|
2001-12-01 04:30:28 +00:00
|
|
|
|
//## WARNING -- must add width filter, both here and below!!! ###
|
|
|
|
|
":: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;"
|
2001-11-22 05:50:51 +00:00
|
|
|
|
|
2001-12-03 18:30:30 +00:00
|
|
|
|
":: [:Latin:] fullwidth-halfwidth ();"
|
2001-12-01 04:30:28 +00:00
|
|
|
|
":: NFD (NFC);"
|
2001-11-19 22:23:36 +00:00
|
|
|
|
":: Lower ();" // whenever transliterating from cased to uncased script, include this
|
|
|
|
|
// :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// Uses modified Hepburn. Small changes to make unambiguous.
|
|
|
|
|
|
|
|
|
|
// | Kunrei-shiki: Hepburn/MHepburn
|
|
|
|
|
// | ------------------------------
|
|
|
|
|
// | si: shi
|
|
|
|
|
// | si ~ya: sha
|
|
|
|
|
// | si ~yu: shu
|
|
|
|
|
// | si ~yo: sho
|
|
|
|
|
// | zi: ji
|
|
|
|
|
// | zi ~ya: ja
|
|
|
|
|
// | zi ~yu: ju
|
|
|
|
|
// | zi ~yo: jo
|
|
|
|
|
// | ti: chi
|
|
|
|
|
// | ti ~ya: cha
|
|
|
|
|
// | ti ~yu: chu
|
|
|
|
|
// | ti ~yu: cho
|
|
|
|
|
// | tu: tsu
|
|
|
|
|
// | di: ji/dji
|
|
|
|
|
// | du: zu/dzu
|
|
|
|
|
// | hu: fu
|
|
|
|
|
|
|
|
|
|
// | For foreign words:
|
|
|
|
|
// | -----------------
|
|
|
|
|
// | se ~i si
|
|
|
|
|
// | si ~e she
|
|
|
|
|
// |
|
|
|
|
|
// | ze ~i zi
|
|
|
|
|
// | zi ~e je
|
|
|
|
|
// |
|
|
|
|
|
// | te ~i ti
|
|
|
|
|
// | ti ~e che
|
|
|
|
|
// | te ~u tu
|
|
|
|
|
// |
|
|
|
|
|
// | de ~i di
|
|
|
|
|
// | de ~u du
|
|
|
|
|
// | de ~i di
|
|
|
|
|
// |
|
|
|
|
|
// | he ~u: hu
|
|
|
|
|
// | hu ~a fa
|
|
|
|
|
// | hu ~i fi
|
|
|
|
|
// | hu ~e he
|
|
|
|
|
// | hu ~o ho
|
|
|
|
|
|
|
|
|
|
// Most small forms are generated, but if necessary
|
|
|
|
|
// explicit small forms are given with ~a, ~ya, etc.
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------
|
|
|
|
|
// Variables
|
|
|
|
|
|
|
|
|
|
"$vowel = [aeiou] ;"
|
2001-11-19 22:23:36 +00:00
|
|
|
|
"$consonant = [bcdfghjklmnpqrstvwxyz] ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
"$macron = \u0304 ;"
|
|
|
|
|
|
|
|
|
|
// Variables used for doubled-consonants with tsu
|
|
|
|
|
|
|
|
|
|
"$kana = [\u3041-\u3094] ;"
|
|
|
|
|
|
|
|
|
|
"$voice = [\u3099\u309B];"
|
|
|
|
|
"$semivoice = [\u309A\u309C];"
|
|
|
|
|
|
|
|
|
|
"$k_start = [カキクケコかきくけこ] ;"
|
|
|
|
|
|
|
|
|
|
"$s_start = [サシスセソさしすせそ] ;"
|
|
|
|
|
|
|
|
|
|
"$j_start = [シし] $voice ;"
|
|
|
|
|
|
|
|
|
|
"$t_start = [タチツテトたちつてと] ;"
|
|
|
|
|
|
|
|
|
|
"$n_start = [ナニヌネノンなにぬねの] ;"
|
|
|
|
|
|
|
|
|
|
"$h_start = [ハヒヘホはひへほ] ;"
|
|
|
|
|
"$f_start = [フふ] ;"
|
|
|
|
|
|
|
|
|
|
"$m_start = [マミムメモまみむめも] ;"
|
|
|
|
|
|
|
|
|
|
"$y_start = [ヤユヨやゆよ] ;"
|
|
|
|
|
|
|
|
|
|
"$r_start = [ラリルレロらりるれろ] ;"
|
|
|
|
|
|
|
|
|
|
"$w_start = [ワヰヱヲわゐゑを] ;"
|
|
|
|
|
|
|
|
|
|
"$v_start = [ワヰヱヲ]゙ ;"
|
|
|
|
|
|
|
|
|
|
// if ン is followed by $n_quoter, then it needs an
|
|
|
|
|
// apostrophe after its romaji form to disambiguate it.
|
|
|
|
|
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
|
|
|
|
|
|
|
|
|
|
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;"
|
|
|
|
|
|
|
|
|
|
"$small_y = [ャィュェョ] ;"
|
|
|
|
|
|
|
|
|
|
"$iteration = \u309D ;"
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------
|
|
|
|
|
// katakana rules
|
|
|
|
|
|
|
|
|
|
// Punctuation
|
|
|
|
|
|
|
|
|
|
"'.' <> 。;"
|
|
|
|
|
"',' <> 、;"
|
|
|
|
|
// ' ' } [a-z] > ; # delete spaces before latin
|
|
|
|
|
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
|
|
|
|
|
|
|
|
|
|
// Iteration Mark
|
|
|
|
|
// Copy previous letter & marks
|
|
|
|
|
|
|
|
|
|
// TODO
|
|
|
|
|
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
|
|
|
|
|
|
|
|
|
|
// Specials for katakana -- not shared with hiragana
|
|
|
|
|
|
|
|
|
|
"va <> ヷ ;"
|
|
|
|
|
"vi <> ヸ ;"
|
|
|
|
|
"ve <> ヹ ;"
|
|
|
|
|
"vo <> ヺ ;"
|
|
|
|
|
"'~ka' <> ヵ ;"
|
|
|
|
|
"'~ke' <> ヶ ;"
|
|
|
|
|
|
|
|
|
|
// ~~~ begin shared rules ~~~
|
|
|
|
|
|
|
|
|
|
//special
|
|
|
|
|
|
|
|
|
|
"ya < '~'ャ;"
|
|
|
|
|
"yi < '~'ィ ;"
|
|
|
|
|
"yu < '~'ュ;"
|
|
|
|
|
"ye < '~'ェ;"
|
|
|
|
|
"yo < '~'ョ;"
|
|
|
|
|
|
|
|
|
|
//normal
|
|
|
|
|
|
|
|
|
|
"a <> ア ;"
|
|
|
|
|
|
|
|
|
|
"b | '~' < ヒ ゙} $small_y ;"
|
|
|
|
|
"by } $vowel > ビ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ba <> バ ;"
|
|
|
|
|
"bi <> ビ ;"
|
|
|
|
|
"bu <> ブ ;"
|
|
|
|
|
"be <> ベ ;"
|
|
|
|
|
"bo <> ボ ;"
|
|
|
|
|
|
|
|
|
|
"c } i > | s ;"
|
|
|
|
|
"c } e > | s ;"
|
|
|
|
|
|
|
|
|
|
"da <> ダ ;"
|
|
|
|
|
"di <> ディ ;"
|
|
|
|
|
"du <> デゥ ;"
|
|
|
|
|
"de <> デ ;"
|
|
|
|
|
"do <> ド ;"
|
|
|
|
|
"dzu <> ヅ ;"
|
|
|
|
|
"dja < ヂャ ;"
|
|
|
|
|
"dji'~i' < ヂィ ;" // liu
|
|
|
|
|
"dju < ヂュ ;"
|
|
|
|
|
"dje < ヂェ ;"
|
|
|
|
|
"djo < ヂョ ;"
|
|
|
|
|
"dji <> ヂ ;"
|
|
|
|
|
"dj } $vowel > ヂ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
|
|
|
|
|
|
|
|
|
|
"cha < チャ ;"
|
|
|
|
|
"chi'~i' < チィ ;" // liu
|
|
|
|
|
"chu < チュ ;"
|
|
|
|
|
"che < チェ ;"
|
|
|
|
|
"cho < チョ ;"
|
|
|
|
|
"chi <> チ ;"
|
|
|
|
|
"ch } $vowel > チ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"e <> エ ;"
|
|
|
|
|
|
|
|
|
|
"g | '~' < ギ} $small_y ;"
|
|
|
|
|
"gy } $vowel > ギ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ga <> ガ ;"
|
|
|
|
|
"gi <> ギ ;"
|
|
|
|
|
"gu <> グ ;"
|
|
|
|
|
"ge <> ゲ ;"
|
|
|
|
|
"go <> ゴ ;"
|
|
|
|
|
|
|
|
|
|
"i <> イ ;"
|
|
|
|
|
|
|
|
|
|
// j } $vowel > ジ | '~y' ;
|
|
|
|
|
|
|
|
|
|
"ja <> ジャ ;"
|
|
|
|
|
"ji'~i' < ジィ ;" // liu
|
|
|
|
|
"ju <> ジュ ;"
|
|
|
|
|
"je <> ジェ ;"
|
|
|
|
|
"jo <> ジョ ;"
|
|
|
|
|
"ji <> ジ ;"
|
|
|
|
|
|
|
|
|
|
"k | '~' < キ} $small_y ;"
|
|
|
|
|
"ky } $vowel > キ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ka <> カ ;"
|
|
|
|
|
"ki <> キ ;"
|
|
|
|
|
"ku <> ク ;"
|
|
|
|
|
"ke <> ケ ;"
|
|
|
|
|
"ko <> コ ;"
|
|
|
|
|
|
|
|
|
|
"m | '~' < ミ} $small_y ;"
|
|
|
|
|
"my } $vowel > ミ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ma <> マ ;"
|
|
|
|
|
"mi <> ミ ;"
|
|
|
|
|
"mu <> ム ;"
|
|
|
|
|
"me <> メ ;"
|
|
|
|
|
"mo <> モ ;"
|
|
|
|
|
|
|
|
|
|
"m } [pbfv] > ン ;"
|
|
|
|
|
|
|
|
|
|
"n | '~' < ニ } $small_y ;"
|
|
|
|
|
"ny } $vowel > ニ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"na <> ナ ;"
|
|
|
|
|
"ni <> ニ ;"
|
|
|
|
|
"nu <> ヌ ;"
|
|
|
|
|
"ne <> ネ ;"
|
|
|
|
|
"no <> ノ ;"
|
|
|
|
|
|
|
|
|
|
"o <> オ ;"
|
|
|
|
|
|
|
|
|
|
"p | '~' < ピ } $small_y ;"
|
|
|
|
|
"py } $vowel > ピ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"pa <> パ ;"
|
|
|
|
|
"pi <> ピ ;"
|
|
|
|
|
"pu <> プ ;"
|
|
|
|
|
"pe <> ペ ;"
|
|
|
|
|
"po <> ポ ;"
|
|
|
|
|
|
|
|
|
|
"h | '~' < ヒ } $small_y ;"
|
|
|
|
|
"hy } $vowel > ヒ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ha <> ハ ;"
|
|
|
|
|
"hi <> ヒ ;"
|
|
|
|
|
"hu <> ヘゥ ;"
|
|
|
|
|
"he <> ヘ ;"
|
|
|
|
|
"ho <> ホ ;"
|
|
|
|
|
|
|
|
|
|
// f | '~' < フ } $small_y ;
|
|
|
|
|
// f } $vowel > フ | '~' ;
|
|
|
|
|
|
|
|
|
|
"fa <> ファ ;"
|
|
|
|
|
"fi <> フィ ;"
|
|
|
|
|
"fe <> フェ ;"
|
|
|
|
|
"fo <> フォ ;"
|
|
|
|
|
"fu <> フ ;"
|
|
|
|
|
|
|
|
|
|
"r | '~' < リ } $small_y ;"
|
|
|
|
|
"ry } $vowel > リ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ra <> ラ ;"
|
|
|
|
|
"ri <> リ ;"
|
|
|
|
|
"ru <> ル ;"
|
|
|
|
|
"re <> レ ;"
|
|
|
|
|
"ro <> ロ ;"
|
|
|
|
|
|
|
|
|
|
"za <> ザ ;"
|
|
|
|
|
"zi <> ゼィ ;"
|
|
|
|
|
"zu <> ズ ;"
|
|
|
|
|
"ze <> ゼ ;"
|
|
|
|
|
"zo <> ゾ ;"
|
|
|
|
|
|
|
|
|
|
"sa <> サ ;"
|
|
|
|
|
"si <> セィ ;"
|
|
|
|
|
"su <> ス ;"
|
|
|
|
|
"se <> セ ;"
|
|
|
|
|
"so <> ソ ;"
|
|
|
|
|
|
|
|
|
|
"sha < シャ ;"
|
|
|
|
|
"shi'~i' < シィ ;" // liu
|
|
|
|
|
"shu < シュ ;"
|
|
|
|
|
"she < シェ ;"
|
|
|
|
|
"sho < ショ ;"
|
|
|
|
|
"shi <> シ ;"
|
|
|
|
|
"sh } $vowel > シ | '~y' ;"
|
|
|
|
|
|
|
|
|
|
"ta <> タ ;"
|
|
|
|
|
"ti <> ティ ;"
|
|
|
|
|
"tu <> テゥ ;"
|
|
|
|
|
"te <> テ ;"
|
|
|
|
|
"to <> ト ;"
|
|
|
|
|
|
|
|
|
|
"tsu <> ツ ;"
|
|
|
|
|
|
|
|
|
|
// v } $vowel > ヴ | '~' ;
|
|
|
|
|
|
|
|
|
|
//'v~a' < ヴァ ; # liu
|
|
|
|
|
//'v~i' < ヴィ ; # liu
|
|
|
|
|
//'v~e' < ヴェ ; # liu
|
|
|
|
|
//'v~o' < ヴォ ; # liu
|
|
|
|
|
"vu <> ヴ ;"
|
|
|
|
|
|
|
|
|
|
"u <> ウ ;"
|
|
|
|
|
|
|
|
|
|
// w } $vowel > ウ | '~' ;
|
|
|
|
|
|
|
|
|
|
"wa <> ワ ;"
|
|
|
|
|
"wi <> ヰ ;"
|
|
|
|
|
"wu > ウ ;"
|
|
|
|
|
"we <> ヱ ;"
|
|
|
|
|
"wo <> ヲ ;"
|
|
|
|
|
|
|
|
|
|
"ya <> ヤ ;"
|
|
|
|
|
"yi > イ ;"
|
|
|
|
|
"yu <> ユ ;"
|
|
|
|
|
"ye > エ ;"
|
|
|
|
|
"yo <> ヨ ;"
|
|
|
|
|
|
|
|
|
|
// double consonants
|
|
|
|
|
|
|
|
|
|
//specials
|
|
|
|
|
"s } sh > ッ ;"
|
|
|
|
|
"t } ch > ッ ;"
|
|
|
|
|
|
|
|
|
|
//voiced
|
|
|
|
|
|
|
|
|
|
"j } j <> ッ } $j_start ;"
|
|
|
|
|
"b } b <> ッ } [$h_start$f_start] $voice;"
|
|
|
|
|
"d } d <> ッ } $t_start $voice;"
|
|
|
|
|
"g } g <> ッ } $k_start $voice;"
|
|
|
|
|
"p } p <> ッ } [$h_start$f_start] $semivoice;"
|
|
|
|
|
// v } v <> ッ } [ワヰウヱヲう] $voice ;
|
|
|
|
|
"z } z <> ッ } $s_start $voice;"
|
|
|
|
|
"v } v <> ッ } $v_start;"
|
|
|
|
|
|
|
|
|
|
// normal
|
|
|
|
|
|
|
|
|
|
"k } k <> ッ } $k_start ;"
|
|
|
|
|
"m } m <> ッ } $m_start ;"
|
|
|
|
|
"n } n <> ッ } $n_start ;"
|
|
|
|
|
"h } h <> ッ } $h_start ;"
|
|
|
|
|
"f } f <> ッ } $f_start ;"
|
|
|
|
|
"r } r <> ッ } $r_start ;"
|
|
|
|
|
"t } t <> ッ } $t_start ;"
|
|
|
|
|
"s } s <> ッ } $s_start ;"
|
|
|
|
|
|
|
|
|
|
"w } w <> ッ } $w_start;"
|
|
|
|
|
"y } y <> ッ } $y_start;"
|
|
|
|
|
|
|
|
|
|
// completeness
|
|
|
|
|
"x } x > ッ ;"
|
|
|
|
|
"c } k > ッ ;"
|
|
|
|
|
"c } c > ッ ;"
|
|
|
|
|
"c } q > ッ ;"
|
|
|
|
|
"l } l > ッ ;"
|
|
|
|
|
"q } q > ッ ;"
|
|
|
|
|
// y } y > ッ ;
|
|
|
|
|
// w } w > ッ ;
|
|
|
|
|
|
|
|
|
|
// prolonged vowel mark. this indicates a doubling of
|
|
|
|
|
// the preceding vowel sound
|
|
|
|
|
|
|
|
|
|
//a < a { ー ; # liu
|
|
|
|
|
//e < e { ー ; # liu
|
|
|
|
|
//i < i { ー ; # liu
|
|
|
|
|
//o < o { ー ; # liu
|
|
|
|
|
//u < u { ー ; # liu
|
|
|
|
|
|
|
|
|
|
"$macron <> ー ;"
|
|
|
|
|
|
|
|
|
|
// small forms
|
|
|
|
|
|
|
|
|
|
"'~a' <> ァ ;"
|
|
|
|
|
"'~i' <> ィ ;"
|
|
|
|
|
"'~u' <> ゥ ;"
|
|
|
|
|
"'~e' <> ェ ;"
|
|
|
|
|
"'~o' <> ォ ;"
|
|
|
|
|
"'~tsu' <> ッ ;"
|
|
|
|
|
"'~wa' <> ヮ ;"
|
|
|
|
|
"'~ya' <> ャ ;"
|
|
|
|
|
"'~yi' > ィ ;"
|
|
|
|
|
"'~yu' <> ュ ;"
|
|
|
|
|
"'~ye' > ェ ;"
|
|
|
|
|
"'~yo' <> ョ ;"
|
|
|
|
|
|
2001-11-19 22:23:36 +00:00
|
|
|
|
// iteration marks
|
|
|
|
|
// TODO: make more accurate
|
|
|
|
|
|
|
|
|
|
"j $1 < sh (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"dj $1 < ch (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"dz $1 < ts (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
|
|
|
|
|
"g $1 < k (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"z $1 < s (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"d $1 < t (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"h $1 < b (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"v $1 < w (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
|
|
|
|
|
"sh $1 < sh (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"j $1 < j (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"ch $1 < ch (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"dj $1 < dj(y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"ts $1 < ts (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
"dz $1 < dz (y* $vowel) {ヽ$voice ;"
|
|
|
|
|
|
|
|
|
|
"$1 < ($consonant y* $vowel) {ヽ$voice? ;"
|
|
|
|
|
"$1 < (.) {ヽ $voice? ;" // otherwise repeat last character
|
|
|
|
|
"< ヽ $voice? ;" // delete if no characters found
|
|
|
|
|
|
2001-10-26 05:41:16 +00:00
|
|
|
|
// h- rule: lengthens vowel if not followed by a vowel
|
|
|
|
|
|
|
|
|
|
"[aeiou] } h > ー ;"
|
|
|
|
|
|
|
|
|
|
// one-way latin- > kana rules. these do not occur in
|
|
|
|
|
// well-formed romaji representing actual japanese text.
|
|
|
|
|
// their purpose is to make all romaji map to kana of
|
|
|
|
|
// some sort.
|
|
|
|
|
|
|
|
|
|
// the following are not really necessary, but produce
|
|
|
|
|
// slightly more natural results.
|
|
|
|
|
|
|
|
|
|
"cy > セィ ;"
|
|
|
|
|
"dy > ディ ;"
|
|
|
|
|
"hy > ヒ ;"
|
|
|
|
|
"sy > セィ ;"
|
|
|
|
|
"ty > ティ ;"
|
|
|
|
|
"zy > ゼィ ;"
|
|
|
|
|
|
|
|
|
|
"h > ヘ ;"
|
|
|
|
|
|
|
|
|
|
// isolated consonants listed here so as not to mask
|
|
|
|
|
// longer rules above.
|
|
|
|
|
|
|
|
|
|
"ch > チ;"
|
|
|
|
|
"sh > シ ;"
|
|
|
|
|
"dz > ヅ ;"
|
|
|
|
|
"dj > ヂ;"
|
|
|
|
|
|
|
|
|
|
"b > ブ ;"
|
|
|
|
|
"d > デ ;"
|
|
|
|
|
"g > グ ;"
|
|
|
|
|
"k > ク ;"
|
|
|
|
|
"m > ム ;"
|
|
|
|
|
"n'' < ン } $n_quoter ;"
|
|
|
|
|
"n <> ン ;"
|
|
|
|
|
"p > プ ;"
|
|
|
|
|
"r > ル ;"
|
|
|
|
|
"s > ス ;"
|
|
|
|
|
"t > テ ;"
|
|
|
|
|
"y > イ ;"
|
|
|
|
|
"z > ズ ;"
|
|
|
|
|
"v > ヴ ;"
|
|
|
|
|
|
|
|
|
|
"f > フ;"
|
|
|
|
|
"j > ジ;"
|
|
|
|
|
"w > ウ;"
|
|
|
|
|
|
2001-11-19 22:23:36 +00:00
|
|
|
|
"ß > | ss ;"
|
|
|
|
|
"æ > | e ;"
|
|
|
|
|
"ð > | d ;"
|
|
|
|
|
"ø > | u ;"
|
|
|
|
|
"þ > | th ;"
|
|
|
|
|
|
2001-10-26 05:41:16 +00:00
|
|
|
|
// simple substitutions using backup
|
|
|
|
|
|
|
|
|
|
"c > | k ;"
|
|
|
|
|
"l > | r ;"
|
|
|
|
|
"q > | k ;"
|
|
|
|
|
"x > | ks ;"
|
|
|
|
|
|
|
|
|
|
// ~~~ END shared rules ~~~
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------
|
|
|
|
|
// Final cleanup
|
|
|
|
|
|
|
|
|
|
"'~' > ;" // delete stray tildes between letters
|
|
|
|
|
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
|
2001-11-30 21:24:16 +00:00
|
|
|
|
// [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
|
2001-11-19 22:23:36 +00:00
|
|
|
|
|
2001-12-01 04:30:28 +00:00
|
|
|
|
":: NFC (NFD) ;"
|
2001-12-03 18:30:30 +00:00
|
|
|
|
":: ([:Katakana:] halfwidth-fullwidth);"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
2001-11-19 22:23:36 +00:00
|
|
|
|
// note: a global filter is more efficient, but MUST include all source chars!!
|
2001-11-30 21:24:16 +00:00
|
|
|
|
//:: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
|
|
|
|
|
// MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
|
2001-12-01 04:30:28 +00:00
|
|
|
|
":: ( [[\\\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;"
|
2001-10-26 05:41:16 +00:00
|
|
|
|
|
|
|
|
|
// eof
|
|
|
|
|
}
|
|
|
|
|
}
|