//-------------------------------------------------------------------- // Copyright (c) 1999-2000, International Business Machines // Corporation and others. All Rights Reserved. //-------------------------------------------------------------------- // THIS IS A MACHINE-GENERATED FILE // Tool: src\com\ibm\tools\translit\dumpICUrules.bat // Source: src/com/ibm/text/resources/TransliterationRule_Hiragana_Katakana.java // Date: Thu Jun 29 17:03:15 2000 //-------------------------------------------------------------------- // Hiragana-Katakana kana { Rule { // Hiragana-Katana // This is largely a one-to-one mapping, but it has a // few kinks: // 1. The Katakana va/vi/ve/vo (30F7-30FA) have no // Hiragana equivalents. We use Hiragana wa/wi/we/wo // (308F-3092) with a voicing mark (3099), which is // semantically equivalent. However, this is a non- // roundtripping transformation. // 2. The Katakana small ka/ke (30F5,30F6) have no // Hiragana equiavlents. We convert them to normal // Hiragana ka/ke (304B,3051). This is a one-way // information-losing transformation and precludes // round-tripping of 30F5 and 30F6. // 3. The combining marks 3099-309C are in the Hiragana // block, but they apply to Katakana as well, so we // leave the untouched. // 4. The Katakana prolonged sound mark 30FC doubles the // preceding vowel. This is a one-way information- // losing transformation from Katakana to Hiragana. // 5. The Katakana middle dot separates words in foreign // expressions; we leave this unmodified. // The above points preclude successful round-trip // transformations of arbitrary input text. However, // they provide naturalistic results that should conform // to natural language expectations. // Combining equivalents "\u308F\u3099 <> \u30F7;" "\u3090\u3099 <> \u30F8;" "\u3091\u3099 <> \u30F9;" "\u3092\u3099 <> \u30FA;" // One-to-one mappings, main block // 3041:3094 <> 30A1:30F4 // 309D,E <> 30FD,E "\u3041 <> \u30A1;" "\u3042 <> \u30A2;" "\u3043 <> \u30A3;" "\u3044 <> \u30A4;" "\u3045 <> \u30A5;" "\u3046 <> \u30A6;" "\u3047 <> \u30A7;" "\u3048 <> \u30A8;" "\u3049 <> \u30A9;" "\u304A <> \u30AA;" "\u304B <> \u30AB;" "\u304C <> \u30AC;" "\u304D <> \u30AD;" "\u304E <> \u30AE;" "\u304F <> \u30AF;" "\u3050 <> \u30B0;" "\u3051 <> \u30B1;" "\u3052 <> \u30B2;" "\u3053 <> \u30B3;" "\u3054 <> \u30B4;" "\u3055 <> \u30B5;" "\u3056 <> \u30B6;" "\u3057 <> \u30B7;" "\u3058 <> \u30B8;" "\u3059 <> \u30B9;" "\u305A <> \u30BA;" "\u305B <> \u30BB;" "\u305C <> \u30BC;" "\u305D <> \u30BD;" "\u305E <> \u30BE;" "\u305F <> \u30BF;" "\u3060 <> \u30C0;" "\u3061 <> \u30C1;" "\u3062 <> \u30C2;" "\u3063 <> \u30C3;" "\u3064 <> \u30C4;" "\u3065 <> \u30C5;" "\u3066 <> \u30C6;" "\u3067 <> \u30C7;" "\u3068 <> \u30C8;" "\u3069 <> \u30C9;" "\u306A <> \u30CA;" "\u306B <> \u30CB;" "\u306C <> \u30CC;" "\u306D <> \u30CD;" "\u306E <> \u30CE;" "\u306F <> \u30CF;" "\u3070 <> \u30D0;" "\u3071 <> \u30D1;" "\u3072 <> \u30D2;" "\u3073 <> \u30D3;" "\u3074 <> \u30D4;" "\u3075 <> \u30D5;" "\u3076 <> \u30D6;" "\u3077 <> \u30D7;" "\u3078 <> \u30D8;" "\u3079 <> \u30D9;" "\u307A <> \u30DA;" "\u307B <> \u30DB;" "\u307C <> \u30DC;" "\u307D <> \u30DD;" "\u307E <> \u30DE;" "\u307F <> \u30DF;" "\u3080 <> \u30E0;" "\u3081 <> \u30E1;" "\u3082 <> \u30E2;" "\u3083 <> \u30E3;" "\u3084 <> \u30E4;" "\u3085 <> \u30E5;" "\u3086 <> \u30E6;" "\u3087 <> \u30E7;" "\u3088 <> \u30E8;" "\u3089 <> \u30E9;" "\u308A <> \u30EA;" "\u308B <> \u30EB;" "\u308C <> \u30EC;" "\u308D <> \u30ED;" "\u308E <> \u30EE;" "\u308F <> \u30EF;" "\u3090 <> \u30F0;" "\u3091 <> \u30F1;" "\u3092 <> \u30F2;" "\u3093 <> \u30F3;" "\u3094 <> \u30F4;" "\u309D <> \u30FD;" "\u309E <> \u30FE;" // Fallback; this is a one-way Katakana-Hiragana xform. "\u304B < \u30F5;" "\u3051 < \u30F6;" // Anything followed by a prolonged sound mark 30FC has // its final vowel doubled. This is a Katakana-Hiragana // one-way information-losing transformation. We // include the small Katakana (e.g., small A 3041) and // do not distinguish them from their large // counterparts. It doesn't make sense to double a // small counterpart vowel as a small Hiragana vowel, so // we don't do so. In natural text this should never // occur anyway. If a 30FC is seen without a preceding // vowel sound (e.g., after n 30F3) we do not change it. "$long = \u30FC;" // The following categories are Hiragana, not Katakana // as might be expected, since by the time we get to the // 30FC, the preceding character will have already been // transformed to Hiragana. // {The following mechanically generated from the // Unicode 3.0 data:} "$xa = [" "\u3041 \u3042 \u304B \u304C \u3055 \u3056" "\u305F \u3060 \u306A \u306F \u3070 \u3071" "\u307E \u3083 \u3084 \u3089 \u308E \u308F" "];" "$xi = [" "\u3043 \u3044 \u304D \u304E \u3057 \u3058" "\u3061 \u3062 \u306B \u3072 \u3073 \u3074" "\u307F \u308A \u3090" "];" "$xu = [" "\u3045 \u3046 \u304F \u3050 \u3059 \u305A" "\u3063 \u3064 \u3065 \u306C \u3075 \u3076" "\u3077 \u3080 \u3085 \u3086 \u308B \u3094" "];" "$xe = [" "\u3047 \u3048 \u3051 \u3052 \u305B \u305C" "\u3066 \u3067 \u306D \u3078 \u3079 \u307A" "\u3081 \u308C \u3091" "];" "$xo = [" "\u3049 \u304A \u3053 \u3054 \u305D \u305E" "\u3068 \u3069 \u306E \u307B \u307C \u307D" "\u3082 \u3087 \u3088 \u308D \u3092" "];" "\u3042 < $xa {$long};" "\u3044 < $xi {$long};" "\u3046 < $xu {$long};" "\u3048 < $xe {$long};" "\u304A < $xo {$long};" "" } }