scuffed-code/icu4c/source/data/translit/t_Hira_Kana.txt
2004-05-19 04:17:37 +00:00

224 lines
5.1 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Hiragana_Katakana
t_Hira_Kana {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// note: a global filter is more efficient, but MUST include all source chars
":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
":: NFKC ();"
// Hiragana-Katakana
// This is largely a one-to-one mapping, but it has a
// few kinks:
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
// (308F-3092) with a voicing mark (3099), which is
// semantically equivalent. However, this is a non-
// roundtripping transformation.
// 2. The Katakana small ka/ke (30F5,30F6) have no
// Hiragana equiavlents. We convert them to normal
// Hiragana ka/ke (304B,3051). This is a one-way
// information-losing transformation and precludes
// round-tripping of 30F5 and 30F6.
// 3. The combining marks 3099-309C are in the Hiragana
// block, but they apply to Katakana as well, so we
// leave them untouched.
// 4. The Katakana prolonged sound mark 30FC doubles the
// preceding vowel. This is a one-way information-
// losing transformation from Katakana to Hiragana.
// 5. The Katakana middle dot separates words in foreign
// expressions; we leave this unmodified.
// The above points preclude successful round-trip
// transformations of arbitrary input text. However,
// they provide naturalistic results that should conform
// to user expectations.
// Combining equivalents va/vi/ve/vo
"わ゙ <> ヷ;"
"ゐ゙ <> ヸ;"
"ゑ゙ <> ヹ;"
"を゙ <> ヺ;"
// One-to-one mappings, main block
// 3041:3094 <> 30A1:30F4
// 309D,E <> 30FD,E
"ぁ <> ァ;"
"あ <> ア;"
"ぃ <> ィ;"
"い <> イ;"
"ぅ <> ゥ;"
"う <> ウ;"
"ぇ <> ェ;"
"え <> エ;"
"ぉ <> ォ;"
"お <> オ;"
"か <> カ;"
"が <> ガ;"
"き <> キ;"
"ぎ <> ギ;"
"く <> ク;"
"ぐ <> グ;"
"け <> ケ;"
"げ <> ゲ;"
"こ <> コ;"
"ご <> ゴ;"
"さ <> サ;"
"ざ <> ザ;"
"し <> シ;"
"じ <> ジ;"
"す <> ス;"
"ず <> ズ;"
"せ <> セ;"
"ぜ <> ゼ;"
"そ <> ソ;"
"ぞ <> ゾ;"
"た <> タ;"
"だ <> ダ;"
"ち <> チ;"
"ぢ <> ヂ;"
"っ <> ッ;"
"つ <> ツ;"
"づ <> ヅ;"
"て <> テ;"
"で <> デ;"
"と <> ト;"
"ど <> ド;"
"な <> ナ;"
"に <> ニ;"
"ぬ <> ヌ;"
"ね <> ネ;"
"の <> ;"
"は <> ハ;"
"ば <> バ;"
"ぱ <> パ;"
"ひ <> ヒ;"
"び <> ビ;"
"ぴ <> ピ;"
"ふ <> フ;"
"ぶ <> ブ;"
"ぷ <> プ;"
"へ <> ヘ;"
"べ <> ベ;"
"ぺ <> ペ;"
"ほ <> ホ;"
"ぼ <> ボ;"
"ぽ <> ポ;"
"ま <> マ;"
"み <> ミ;"
"む <> ム;"
"め <> メ;"
"も <> モ;"
"ゃ <> ャ;"
"や <> ヤ;"
"ゅ <> ュ;"
"ゆ <> ユ;"
"ょ <> ョ;"
"よ <> ヨ;"
"ら <> ラ;"
"り <> リ;"
"る <> ル;"
"れ <> レ;"
"ろ <> ロ;"
"ゎ <> ヮ;"
"わ <> ワ;"
"ゐ <> ヰ;"
"ゑ <> ヱ;"
"を <> ヲ;"
"ん <> ン;"
"ゔ <> ヴ;"
"ゝ <> ヽ;"
"ゞ <> ヾ;"
// One-way Katakana-Hiragana xform of small K ka/ke to
// normal H ka/ke.
"か < ヵ;"
"け < ヶ;"
// Katakana followed by a prolonged sound mark 30FC has
// its final vowel doubled. This is a Katakana-Hiragana
// one-way information-losing transformation. We
// include the small Katakana (e.g., small A 3041) and
// do not distinguish them from their large
// counterparts. It doesn't make sense to double a
// small counterpart vowel as a small Hiragana vowel, so
// we don't do so. In natural text this should never
// occur anyway. If a 30FC is seen without a preceding
// vowel sound (e.g., after n 30F3) we do not change it.
//## $long = ー;
// The following categories are Hiragana, not Katakana
// as might be expected, since by the time we get to the
// 30FC, the preceding character will have already been
// transformed to Hiragana.
// {The following mechanically generated from the
// Unicode 3.0 data:}
"$xa = ["
"ぁ あ か が さ ざ"
"た だ な は ば ぱ"
"ま ゃ や ら ゎ わ"
"];"
"$xi = ["
"ぃ い き ぎ し じ"
"ち ぢ に ひ び ぴ"
"み り ゐ"
"];"
"$xu = ["
"ぅ う く ぐ す ず"
"っ つ づ ぬ ふ ぶ"
"ぷ む ゅ ゆ る ゔ"
"];"
"$xe = ["
"ぇ え け げ せ ぜ"
"て で ね へ べ ぺ"
"め れ ゑ"
"];"
"$xo = ["
"ぉ お こ ご そ ぞ"
"と ど の ほ ぼ ぽ"
"も ょ よ ろ を"
"];"
"あ < $xa {ー};"
"い < $xi {ー};"
"う < $xu {ー};"
"え < $xe {ー};"
"お < $xo {ー};"
":: (NFKC) ;"
// note: a global filter is more efficient, but MUST include all source chars!!
":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"
// eof
}
}