210 lines
6.0 KiB
Plaintext
210 lines
6.0 KiB
Plaintext
|
//--------------------------------------------------------------------
|
||
|
// Copyright (c) 1999-2000, International Business Machines
|
||
|
// Corporation and others. All Rights Reserved.
|
||
|
//--------------------------------------------------------------------
|
||
|
// THIS IS A MACHINE-GENERATED FILE
|
||
|
// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
|
||
|
// Source: src/com/ibm/text/resources/TransliterationRule_Hiragana_Katakana.java
|
||
|
// Date: Thu Jun 29 17:03:15 2000
|
||
|
//--------------------------------------------------------------------
|
||
|
|
||
|
// Hiragana-Katakana
|
||
|
|
||
|
kana {
|
||
|
Rule {
|
||
|
|
||
|
// Hiragana-Katana
|
||
|
|
||
|
// This is largely a one-to-one mapping, but it has a
|
||
|
// few kinks:
|
||
|
|
||
|
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||
|
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||
|
// (308F-3092) with a voicing mark (3099), which is
|
||
|
// semantically equivalent. However, this is a non-
|
||
|
// roundtripping transformation.
|
||
|
|
||
|
// 2. The Katakana small ka/ke (30F5,30F6) have no
|
||
|
// Hiragana equiavlents. We convert them to normal
|
||
|
// Hiragana ka/ke (304B,3051). This is a one-way
|
||
|
// information-losing transformation and precludes
|
||
|
// round-tripping of 30F5 and 30F6.
|
||
|
|
||
|
// 3. The combining marks 3099-309C are in the Hiragana
|
||
|
// block, but they apply to Katakana as well, so we
|
||
|
// leave the untouched.
|
||
|
|
||
|
// 4. The Katakana prolonged sound mark 30FC doubles the
|
||
|
// preceding vowel. This is a one-way information-
|
||
|
// losing transformation from Katakana to Hiragana.
|
||
|
|
||
|
// 5. The Katakana middle dot separates words in foreign
|
||
|
// expressions; we leave this unmodified.
|
||
|
|
||
|
// The above points preclude successful round-trip
|
||
|
// transformations of arbitrary input text. However,
|
||
|
// they provide naturalistic results that should conform
|
||
|
// to natural language expectations.
|
||
|
|
||
|
|
||
|
// Combining equivalents
|
||
|
"\u308F\u3099 <> \u30F7;"
|
||
|
"\u3090\u3099 <> \u30F8;"
|
||
|
"\u3091\u3099 <> \u30F9;"
|
||
|
"\u3092\u3099 <> \u30FA;"
|
||
|
|
||
|
// One-to-one mappings, main block
|
||
|
// 3041:3094 <> 30A1:30F4
|
||
|
// 309D,E <> 30FD,E
|
||
|
"\u3041 <> \u30A1;"
|
||
|
"\u3042 <> \u30A2;"
|
||
|
"\u3043 <> \u30A3;"
|
||
|
"\u3044 <> \u30A4;"
|
||
|
"\u3045 <> \u30A5;"
|
||
|
"\u3046 <> \u30A6;"
|
||
|
"\u3047 <> \u30A7;"
|
||
|
"\u3048 <> \u30A8;"
|
||
|
"\u3049 <> \u30A9;"
|
||
|
"\u304A <> \u30AA;"
|
||
|
"\u304B <> \u30AB;"
|
||
|
"\u304C <> \u30AC;"
|
||
|
"\u304D <> \u30AD;"
|
||
|
"\u304E <> \u30AE;"
|
||
|
"\u304F <> \u30AF;"
|
||
|
"\u3050 <> \u30B0;"
|
||
|
"\u3051 <> \u30B1;"
|
||
|
"\u3052 <> \u30B2;"
|
||
|
"\u3053 <> \u30B3;"
|
||
|
"\u3054 <> \u30B4;"
|
||
|
"\u3055 <> \u30B5;"
|
||
|
"\u3056 <> \u30B6;"
|
||
|
"\u3057 <> \u30B7;"
|
||
|
"\u3058 <> \u30B8;"
|
||
|
"\u3059 <> \u30B9;"
|
||
|
"\u305A <> \u30BA;"
|
||
|
"\u305B <> \u30BB;"
|
||
|
"\u305C <> \u30BC;"
|
||
|
"\u305D <> \u30BD;"
|
||
|
"\u305E <> \u30BE;"
|
||
|
"\u305F <> \u30BF;"
|
||
|
"\u3060 <> \u30C0;"
|
||
|
"\u3061 <> \u30C1;"
|
||
|
"\u3062 <> \u30C2;"
|
||
|
"\u3063 <> \u30C3;"
|
||
|
"\u3064 <> \u30C4;"
|
||
|
"\u3065 <> \u30C5;"
|
||
|
"\u3066 <> \u30C6;"
|
||
|
"\u3067 <> \u30C7;"
|
||
|
"\u3068 <> \u30C8;"
|
||
|
"\u3069 <> \u30C9;"
|
||
|
"\u306A <> \u30CA;"
|
||
|
"\u306B <> \u30CB;"
|
||
|
"\u306C <> \u30CC;"
|
||
|
"\u306D <> \u30CD;"
|
||
|
"\u306E <> \u30CE;"
|
||
|
"\u306F <> \u30CF;"
|
||
|
"\u3070 <> \u30D0;"
|
||
|
"\u3071 <> \u30D1;"
|
||
|
"\u3072 <> \u30D2;"
|
||
|
"\u3073 <> \u30D3;"
|
||
|
"\u3074 <> \u30D4;"
|
||
|
"\u3075 <> \u30D5;"
|
||
|
"\u3076 <> \u30D6;"
|
||
|
"\u3077 <> \u30D7;"
|
||
|
"\u3078 <> \u30D8;"
|
||
|
"\u3079 <> \u30D9;"
|
||
|
"\u307A <> \u30DA;"
|
||
|
"\u307B <> \u30DB;"
|
||
|
"\u307C <> \u30DC;"
|
||
|
"\u307D <> \u30DD;"
|
||
|
"\u307E <> \u30DE;"
|
||
|
"\u307F <> \u30DF;"
|
||
|
"\u3080 <> \u30E0;"
|
||
|
"\u3081 <> \u30E1;"
|
||
|
"\u3082 <> \u30E2;"
|
||
|
"\u3083 <> \u30E3;"
|
||
|
"\u3084 <> \u30E4;"
|
||
|
"\u3085 <> \u30E5;"
|
||
|
"\u3086 <> \u30E6;"
|
||
|
"\u3087 <> \u30E7;"
|
||
|
"\u3088 <> \u30E8;"
|
||
|
"\u3089 <> \u30E9;"
|
||
|
"\u308A <> \u30EA;"
|
||
|
"\u308B <> \u30EB;"
|
||
|
"\u308C <> \u30EC;"
|
||
|
"\u308D <> \u30ED;"
|
||
|
"\u308E <> \u30EE;"
|
||
|
"\u308F <> \u30EF;"
|
||
|
"\u3090 <> \u30F0;"
|
||
|
"\u3091 <> \u30F1;"
|
||
|
"\u3092 <> \u30F2;"
|
||
|
"\u3093 <> \u30F3;"
|
||
|
"\u3094 <> \u30F4;"
|
||
|
"\u309D <> \u30FD;"
|
||
|
"\u309E <> \u30FE;"
|
||
|
|
||
|
// Fallback; this is a one-way Katakana-Hiragana xform.
|
||
|
"\u304B < \u30F5;"
|
||
|
"\u3051 < \u30F6;"
|
||
|
|
||
|
// Anything followed by a prolonged sound mark 30FC has
|
||
|
// its final vowel doubled. This is a Katakana-Hiragana
|
||
|
// one-way information-losing transformation. We
|
||
|
// include the small Katakana (e.g., small A 3041) and
|
||
|
// do not distinguish them from their large
|
||
|
// counterparts. It doesn't make sense to double a
|
||
|
// small counterpart vowel as a small Hiragana vowel, so
|
||
|
// we don't do so. In natural text this should never
|
||
|
// occur anyway. If a 30FC is seen without a preceding
|
||
|
// vowel sound (e.g., after n 30F3) we do not change it.
|
||
|
|
||
|
"$long = \u30FC;"
|
||
|
|
||
|
// The following categories are Hiragana, not Katakana
|
||
|
// as might be expected, since by the time we get to the
|
||
|
// 30FC, the preceding character will have already been
|
||
|
// transformed to Hiragana.
|
||
|
|
||
|
// {The following mechanically generated from the
|
||
|
// Unicode 3.0 data:}
|
||
|
|
||
|
"$xa = ["
|
||
|
"\u3041 \u3042 \u304B \u304C \u3055 \u3056"
|
||
|
"\u305F \u3060 \u306A \u306F \u3070 \u3071"
|
||
|
"\u307E \u3083 \u3084 \u3089 \u308E \u308F"
|
||
|
"];"
|
||
|
|
||
|
"$xi = ["
|
||
|
"\u3043 \u3044 \u304D \u304E \u3057 \u3058"
|
||
|
"\u3061 \u3062 \u306B \u3072 \u3073 \u3074"
|
||
|
"\u307F \u308A \u3090"
|
||
|
"];"
|
||
|
|
||
|
"$xu = ["
|
||
|
"\u3045 \u3046 \u304F \u3050 \u3059 \u305A"
|
||
|
"\u3063 \u3064 \u3065 \u306C \u3075 \u3076"
|
||
|
"\u3077 \u3080 \u3085 \u3086 \u308B \u3094"
|
||
|
"];"
|
||
|
|
||
|
"$xe = ["
|
||
|
"\u3047 \u3048 \u3051 \u3052 \u305B \u305C"
|
||
|
"\u3066 \u3067 \u306D \u3078 \u3079 \u307A"
|
||
|
"\u3081 \u308C \u3091"
|
||
|
"];"
|
||
|
|
||
|
"$xo = ["
|
||
|
"\u3049 \u304A \u3053 \u3054 \u305D \u305E"
|
||
|
"\u3068 \u3069 \u306E \u307B \u307C \u307D"
|
||
|
"\u3082 \u3087 \u3088 \u308D \u3092"
|
||
|
"];"
|
||
|
|
||
|
"\u3042 < $xa {$long};"
|
||
|
"\u3044 < $xi {$long};"
|
||
|
"\u3046 < $xu {$long};"
|
||
|
"\u3048 < $xe {$long};"
|
||
|
"\u304A < $xo {$long};"
|
||
|
|
||
|
""
|
||
|
}
|
||
|
}
|