5f481fb5c7
X-SVN-Rev: 9391
224 lines
5.1 KiB
Plaintext
224 lines
5.1 KiB
Plaintext
// -*- Coding: utf-8; -*-
|
||
//--------------------------------------------------------------------
|
||
// Copyright (c) 1999-2002, International Business Machines
|
||
// Corporation and others. All Rights Reserved.
|
||
//--------------------------------------------------------------------
|
||
// THIS IS A MACHINE-GENERATED FILE
|
||
// Tool: dumpicurules.bat
|
||
// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
|
||
// Date: Sat Jul 27 10:31:07 2002
|
||
//--------------------------------------------------------------------
|
||
|
||
// Hiragana_Katakana
|
||
|
||
t_Hira_Kana {
|
||
Rule {
|
||
//--------------------------------------------------------------------
|
||
//--------------------------------------------------------------------
|
||
//--------------------------------------------------------------------
|
||
|
||
// note: a global filter is more efficient, but MUST include all source chars
|
||
":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
|
||
":: NFKC ();"
|
||
|
||
// Hiragana-Katakana
|
||
|
||
// This is largely a one-to-one mapping, but it has a
|
||
// few kinks:
|
||
|
||
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||
// (308F-3092) with a voicing mark (3099), which is
|
||
// semantically equivalent. However, this is a non-
|
||
// roundtripping transformation.
|
||
|
||
// 2. The Katakana small ka/ke (30F5,30F6) have no
|
||
// Hiragana equiavlents. We convert them to normal
|
||
// Hiragana ka/ke (304B,3051). This is a one-way
|
||
// information-losing transformation and precludes
|
||
// round-tripping of 30F5 and 30F6.
|
||
|
||
// 3. The combining marks 3099-309C are in the Hiragana
|
||
// block, but they apply to Katakana as well, so we
|
||
// leave them untouched.
|
||
|
||
// 4. The Katakana prolonged sound mark 30FC doubles the
|
||
// preceding vowel. This is a one-way information-
|
||
// losing transformation from Katakana to Hiragana.
|
||
|
||
// 5. The Katakana middle dot separates words in foreign
|
||
// expressions; we leave this unmodified.
|
||
|
||
// The above points preclude successful round-trip
|
||
// transformations of arbitrary input text. However,
|
||
// they provide naturalistic results that should conform
|
||
// to user expectations.
|
||
|
||
|
||
// Combining equivalents va/vi/ve/vo
|
||
"わ゙ <> ヷ;"
|
||
"ゐ゙ <> ヸ;"
|
||
"ゑ゙ <> ヹ;"
|
||
"を゙ <> ヺ;"
|
||
|
||
// One-to-one mappings, main block
|
||
// 3041:3094 <> 30A1:30F4
|
||
// 309D,E <> 30FD,E
|
||
"ぁ <> ァ;"
|
||
"あ <> ア;"
|
||
"ぃ <> ィ;"
|
||
"い <> イ;"
|
||
"ぅ <> ゥ;"
|
||
"う <> ウ;"
|
||
"ぇ <> ェ;"
|
||
"え <> エ;"
|
||
"ぉ <> ォ;"
|
||
"お <> オ;"
|
||
"か <> カ;"
|
||
"が <> ガ;"
|
||
"き <> キ;"
|
||
"ぎ <> ギ;"
|
||
"く <> ク;"
|
||
"ぐ <> グ;"
|
||
"け <> ケ;"
|
||
"げ <> ゲ;"
|
||
"こ <> コ;"
|
||
"ご <> ゴ;"
|
||
"さ <> サ;"
|
||
"ざ <> ザ;"
|
||
"し <> シ;"
|
||
"じ <> ジ;"
|
||
"す <> ス;"
|
||
"ず <> ズ;"
|
||
"せ <> セ;"
|
||
"ぜ <> ゼ;"
|
||
"そ <> ソ;"
|
||
"ぞ <> ゾ;"
|
||
"た <> タ;"
|
||
"だ <> ダ;"
|
||
"ち <> チ;"
|
||
"ぢ <> ヂ;"
|
||
"っ <> ッ;"
|
||
"つ <> ツ;"
|
||
"づ <> ヅ;"
|
||
"て <> テ;"
|
||
"で <> デ;"
|
||
"と <> ト;"
|
||
"ど <> ド;"
|
||
"な <> ナ;"
|
||
"に <> ニ;"
|
||
"ぬ <> ヌ;"
|
||
"ね <> ネ;"
|
||
"の <> ノ;"
|
||
"は <> ハ;"
|
||
"ば <> バ;"
|
||
"ぱ <> パ;"
|
||
"ひ <> ヒ;"
|
||
"び <> ビ;"
|
||
"ぴ <> ピ;"
|
||
"ふ <> フ;"
|
||
"ぶ <> ブ;"
|
||
"ぷ <> プ;"
|
||
"へ <> ヘ;"
|
||
"べ <> ベ;"
|
||
"ぺ <> ペ;"
|
||
"ほ <> ホ;"
|
||
"ぼ <> ボ;"
|
||
"ぽ <> ポ;"
|
||
"ま <> マ;"
|
||
"み <> ミ;"
|
||
"む <> ム;"
|
||
"め <> メ;"
|
||
"も <> モ;"
|
||
"ゃ <> ャ;"
|
||
"や <> ヤ;"
|
||
"ゅ <> ュ;"
|
||
"ゆ <> ユ;"
|
||
"ょ <> ョ;"
|
||
"よ <> ヨ;"
|
||
"ら <> ラ;"
|
||
"り <> リ;"
|
||
"る <> ル;"
|
||
"れ <> レ;"
|
||
"ろ <> ロ;"
|
||
"ゎ <> ヮ;"
|
||
"わ <> ワ;"
|
||
"ゐ <> ヰ;"
|
||
"ゑ <> ヱ;"
|
||
"を <> ヲ;"
|
||
"ん <> ン;"
|
||
"ゔ <> ヴ;"
|
||
"ゝ <> ヽ;"
|
||
"ゞ <> ヾ;"
|
||
|
||
// One-way Katakana-Hiragana xform of small K ka/ke to
|
||
// normal H ka/ke.
|
||
"か < ヵ;"
|
||
"け < ヶ;"
|
||
|
||
// Katakana followed by a prolonged sound mark 30FC has
|
||
// its final vowel doubled. This is a Katakana-Hiragana
|
||
// one-way information-losing transformation. We
|
||
// include the small Katakana (e.g., small A 3041) and
|
||
// do not distinguish them from their large
|
||
// counterparts. It doesn't make sense to double a
|
||
// small counterpart vowel as a small Hiragana vowel, so
|
||
// we don't do so. In natural text this should never
|
||
// occur anyway. If a 30FC is seen without a preceding
|
||
// vowel sound (e.g., after n 30F3) we do not change it.
|
||
|
||
//## $long = ー;
|
||
|
||
// The following categories are Hiragana, not Katakana
|
||
// as might be expected, since by the time we get to the
|
||
// 30FC, the preceding character will have already been
|
||
// transformed to Hiragana.
|
||
|
||
// {The following mechanically generated from the
|
||
// Unicode 3.0 data:}
|
||
|
||
"$xa = ["
|
||
"ぁ あ か が さ ざ"
|
||
"た だ な は ば ぱ"
|
||
"ま ゃ や ら ゎ わ"
|
||
"];"
|
||
|
||
"$xi = ["
|
||
"ぃ い き ぎ し じ"
|
||
"ち ぢ に ひ び ぴ"
|
||
"み り ゐ"
|
||
"];"
|
||
|
||
"$xu = ["
|
||
"ぅ う く ぐ す ず"
|
||
"っ つ づ ぬ ふ ぶ"
|
||
"ぷ む ゅ ゆ る ゔ"
|
||
"];"
|
||
|
||
"$xe = ["
|
||
"ぇ え け げ せ ぜ"
|
||
"て で ね へ べ ぺ"
|
||
"め れ ゑ"
|
||
"];"
|
||
|
||
"$xo = ["
|
||
"ぉ お こ ご そ ぞ"
|
||
"と ど の ほ ぼ ぽ"
|
||
"も ょ よ ろ を"
|
||
"];"
|
||
|
||
"あ < $xa {ー};"
|
||
"い < $xi {ー};"
|
||
"う < $xu {ー};"
|
||
"え < $xe {ー};"
|
||
"お < $xo {ー};"
|
||
|
||
":: (NFKC) ;"
|
||
|
||
// note: a global filter is more efficient, but MUST include all source chars!!
|
||
":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"
|
||
|
||
// eof
|
||
}
|
||
}
|