// -*- Coding: utf-8; -*- //-------------------------------------------------------------------- // Copyright (c) 1999-2004, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: dumpICUrules.bat // Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt // Date: Tue May 18 17:24:49 2004 //-------------------------------------------------------------------- // Hiragana_Katakana t_Hira_Kana { Rule { //-------------------------------------------------------------------- //-------------------------------------------------------------------- //-------------------------------------------------------------------- // note: a global filter is more efficient, but MUST include all source chars ":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;" ":: NFKC ();" // Hiragana-Katakana // This is largely a one-to-one mapping, but it has a // few kinks: // 1. The Katakana va/vi/ve/vo (30F7-30FA) have no // Hiragana equivalents. We use Hiragana wa/wi/we/wo // (308F-3092) with a voicing mark (3099), which is // semantically equivalent. However, this is a non- // roundtripping transformation. // 2. The Katakana small ka/ke (30F5,30F6) have no // Hiragana equiavlents. We convert them to normal // Hiragana ka/ke (304B,3051). This is a one-way // information-losing transformation and precludes // round-tripping of 30F5 and 30F6. // 3. The combining marks 3099-309C are in the Hiragana // block, but they apply to Katakana as well, so we // leave them untouched. // 4. The Katakana prolonged sound mark 30FC doubles the // preceding vowel. This is a one-way information- // losing transformation from Katakana to Hiragana. // 5. The Katakana middle dot separates words in foreign // expressions; we leave this unmodified. // The above points preclude successful round-trip // transformations of arbitrary input text. However, // they provide naturalistic results that should conform // to user expectations. // Combining equivalents va/vi/ve/vo "わ゙ <> ヷ;" "ゐ゙ <> ヸ;" "ゑ゙ <> ヹ;" "を゙ <> ヺ;" // One-to-one mappings, main block // 3041:3094 <> 30A1:30F4 // 309D,E <> 30FD,E "ぁ <> ァ;" "あ <> ア;" "ぃ <> ィ;" "い <> イ;" "ぅ <> ゥ;" "う <> ウ;" "ぇ <> ェ;" "え <> エ;" "ぉ <> ォ;" "お <> オ;" "か <> カ;" "が <> ガ;" "き <> キ;" "ぎ <> ギ;" "く <> ク;" "ぐ <> グ;" "け <> ケ;" "げ <> ゲ;" "こ <> コ;" "ご <> ゴ;" "さ <> サ;" "ざ <> ザ;" "し <> シ;" "じ <> ジ;" "す <> ス;" "ず <> ズ;" "せ <> セ;" "ぜ <> ゼ;" "そ <> ソ;" "ぞ <> ゾ;" "た <> タ;" "だ <> ダ;" "ち <> チ;" "ぢ <> ヂ;" "っ <> ッ;" "つ <> ツ;" "づ <> ヅ;" "て <> テ;" "で <> デ;" "と <> ト;" "ど <> ド;" "な <> ナ;" "に <> ニ;" "ぬ <> ヌ;" "ね <> ネ;" "の <> ノ;" "は <> ハ;" "ば <> バ;" "ぱ <> パ;" "ひ <> ヒ;" "び <> ビ;" "ぴ <> ピ;" "ふ <> フ;" "ぶ <> ブ;" "ぷ <> プ;" "へ <> ヘ;" "べ <> ベ;" "ぺ <> ペ;" "ほ <> ホ;" "ぼ <> ボ;" "ぽ <> ポ;" "ま <> マ;" "み <> ミ;" "む <> ム;" "め <> メ;" "も <> モ;" "ゃ <> ャ;" "や <> ヤ;" "ゅ <> ュ;" "ゆ <> ユ;" "ょ <> ョ;" "よ <> ヨ;" "ら <> ラ;" "り <> リ;" "る <> ル;" "れ <> レ;" "ろ <> ロ;" "ゎ <> ヮ;" "わ <> ワ;" "ゐ <> ヰ;" "ゑ <> ヱ;" "を <> ヲ;" "ん <> ン;" "ゔ <> ヴ;" "ゝ <> ヽ;" "ゞ <> ヾ;" // One-way Katakana-Hiragana xform of small K ka/ke to // normal H ka/ke. "か < ヵ;" "け < ヶ;" // Katakana followed by a prolonged sound mark 30FC has // its final vowel doubled. This is a Katakana-Hiragana // one-way information-losing transformation. We // include the small Katakana (e.g., small A 3041) and // do not distinguish them from their large // counterparts. It doesn't make sense to double a // small counterpart vowel as a small Hiragana vowel, so // we don't do so. In natural text this should never // occur anyway. If a 30FC is seen without a preceding // vowel sound (e.g., after n 30F3) we do not change it. //## $long = ー; // The following categories are Hiragana, not Katakana // as might be expected, since by the time we get to the // 30FC, the preceding character will have already been // transformed to Hiragana. // {The following mechanically generated from the // Unicode 3.0 data:} "$xa = [" "ぁ あ か が さ ざ" "た だ な は ば ぱ" "ま ゃ や ら ゎ わ" "];" "$xi = [" "ぃ い き ぎ し じ" "ち ぢ に ひ び ぴ" "み り ゐ" "];" "$xu = [" "ぅ う く ぐ す ず" "っ つ づ ぬ ふ ぶ" "ぷ む ゅ ゆ る ゔ" "];" "$xe = [" "ぇ え け げ せ ぜ" "て で ね へ べ ぺ" "め れ ゑ" "];" "$xo = [" "ぉ お こ ご そ ぞ" "と ど の ほ ぼ ぽ" "も ょ よ ろ を" "];" "あ < $xa {ー};" "い < $xi {ー};" "う < $xu {ー};" "え < $xe {ー};" "お < $xo {ー};" ":: (NFKC) ;" // note: a global filter is more efficient, but MUST include all source chars!! ":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);" // eof } }