2006-07-21 01:08:32 +00:00
|
|
|
|
# ***************************************************************************
|
|
|
|
|
# *
|
2011-03-12 14:57:18 +00:00
|
|
|
|
# * Copyright (C) 2004-2011, International Business Machines
|
2006-07-21 01:08:32 +00:00
|
|
|
|
# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
|
|
|
|
|
# *
|
|
|
|
|
# ***************************************************************************
|
|
|
|
|
# File: Latin_Katakana.txt
|
2006-10-27 17:55:54 +00:00
|
|
|
|
# Generated from CLDR
|
2006-07-21 01:08:32 +00:00
|
|
|
|
#
|
2009-04-10 07:47:09 +00:00
|
|
|
|
:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」\u3099-\u309Aァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0304Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
|
2004-08-02 20:06:55 +00:00
|
|
|
|
:: [:Latin:] fullwidth-halfwidth ();
|
|
|
|
|
:: NFD (NFC);
|
|
|
|
|
:: Lower (); # whenever transliterating from cased to uncased script, include this
|
|
|
|
|
$vowel = [aeiou] ;
|
|
|
|
|
$consonant = [bcdfghjklmnpqrstvwxyz] ;
|
|
|
|
|
$macron = \u0304 ;
|
2006-07-21 01:08:32 +00:00
|
|
|
|
$kana = [ぁ-ゔ] ;
|
|
|
|
|
$voice = [\u3099゛];
|
|
|
|
|
$semivoice = [\u309A゜];
|
2004-08-02 20:06:55 +00:00
|
|
|
|
$k_start = [カキクケコかきくけこ] ;
|
|
|
|
|
$s_start = [サシスセソさしすせそ] ;
|
|
|
|
|
$j_start = [シし] $voice ;
|
|
|
|
|
$t_start = [タチツテトたちつてと] ;
|
|
|
|
|
$n_start = [ナニヌネノンなにぬねの] ;
|
|
|
|
|
$h_start = [ハヒヘホはひへほ] ;
|
|
|
|
|
$f_start = [フふ] ;
|
|
|
|
|
$m_start = [マミムメモまみむめも] ;
|
|
|
|
|
$y_start = [ヤユヨやゆよ] ;
|
|
|
|
|
$r_start = [ラリルレロらりるれろ] ;
|
|
|
|
|
$w_start = [ワヰヱヲわゐゑを] ;
|
2006-07-21 01:08:32 +00:00
|
|
|
|
$v_start = [ワヰヱヲ]\u3099 ;
|
2009-03-13 03:21:29 +00:00
|
|
|
|
$voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ;
|
2004-08-02 20:06:55 +00:00
|
|
|
|
$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
|
|
|
|
|
$small_y = [ャィュェョ] ;
|
2006-07-21 01:08:32 +00:00
|
|
|
|
$iteration = ゝ ;
|
2009-04-10 07:47:09 +00:00
|
|
|
|
'.' ↔ 。;
|
|
|
|
|
',' ↔ 、;
|
|
|
|
|
va ↔ ワ\u3099 ;
|
|
|
|
|
vi ↔ ヰ\u3099 ;
|
|
|
|
|
ve ↔ ヱ\u3099 ;
|
|
|
|
|
vo ↔ ヲ\u3099 ;
|
|
|
|
|
'~ka' ↔ ヵ ;
|
|
|
|
|
'~ke' ↔ ヶ ;
|
|
|
|
|
ya ← '~'ャ;
|
|
|
|
|
yi ← '~'ィ ;
|
|
|
|
|
yu ← '~'ュ;
|
|
|
|
|
ye ← '~'ェ;
|
|
|
|
|
yo ← '~'ョ;
|
|
|
|
|
a ↔ ア ;
|
|
|
|
|
b | '~' ← ヒ \u3099} $small_y ;
|
|
|
|
|
by } $vowel → ヒ\u3099 | '~y' ;
|
|
|
|
|
ba ↔ ハ\u3099 ;
|
|
|
|
|
bi ↔ ヒ\u3099 ;
|
|
|
|
|
bu ↔ フ\u3099 ;
|
|
|
|
|
be ↔ ヘ\u3099 ;
|
|
|
|
|
bo ↔ ホ\u3099 ;
|
|
|
|
|
c } i → | s ;
|
|
|
|
|
c } e → | s ;
|
|
|
|
|
da ↔ タ\u3099 ;
|
|
|
|
|
di ↔ テ\u3099ィ ;
|
|
|
|
|
du ↔ テ\u3099ゥ ;
|
|
|
|
|
de ↔ テ\u3099 ;
|
|
|
|
|
do ↔ ト\u3099 ;
|
|
|
|
|
dzu ↔ ツ\u3099 ;
|
|
|
|
|
dja ← チ\u3099ャ ;
|
|
|
|
|
dji'~i' ← チ\u3099ィ ; # liu
|
|
|
|
|
dju ← チ\u3099ュ ;
|
|
|
|
|
dje ← チ\u3099ェ ;
|
|
|
|
|
djo ← チ\u3099ョ ;
|
|
|
|
|
dji ↔ チ\u3099 ;
|
|
|
|
|
dj } $vowel → チ\u3099 | '~y' ;
|
|
|
|
|
cha ← チャ ;
|
|
|
|
|
chi'~i' ← チィ ; # liu
|
|
|
|
|
chu ← チュ ;
|
|
|
|
|
che ← チェ ;
|
|
|
|
|
cho ← チョ ;
|
|
|
|
|
chi ↔ チ ;
|
|
|
|
|
ch } $vowel → チ | '~y' ;
|
|
|
|
|
e ↔ エ ;
|
|
|
|
|
g | '~' ← キ\u3099} $small_y ;
|
|
|
|
|
gy } $vowel → キ\u3099 | '~y' ;
|
|
|
|
|
ga ↔ カ\u3099 ;
|
|
|
|
|
gi ↔ キ\u3099 ;
|
|
|
|
|
gu ↔ ク\u3099 ;
|
|
|
|
|
ge ↔ ケ\u3099 ;
|
|
|
|
|
go ↔ コ\u3099 ;
|
|
|
|
|
i ↔ イ ;
|
|
|
|
|
ja ↔ シ\u3099ャ ;
|
|
|
|
|
ji'~i' ← シ\u3099ィ ; # liu
|
|
|
|
|
ju ↔ シ\u3099ュ ;
|
|
|
|
|
je ↔ シ\u3099ェ ;
|
|
|
|
|
jo ↔ シ\u3099ョ ;
|
|
|
|
|
ji ↔ シ\u3099 ;
|
|
|
|
|
k | '~' ← キ} $small_y ;
|
|
|
|
|
ky } $vowel → キ | '~y' ;
|
|
|
|
|
ka ↔ カ ;
|
|
|
|
|
ki ↔ キ ;
|
|
|
|
|
ku ↔ ク ;
|
|
|
|
|
ke ↔ ケ ;
|
|
|
|
|
ko ↔ コ ;
|
|
|
|
|
m | '~' ← ミ} $small_y ;
|
|
|
|
|
my } $vowel → ミ | '~y' ;
|
|
|
|
|
ma ↔ マ ;
|
|
|
|
|
mi ↔ ミ ;
|
|
|
|
|
mu ↔ ム ;
|
|
|
|
|
me ↔ メ ;
|
|
|
|
|
mo ↔ モ ;
|
|
|
|
|
m } [pbfv] → ン ;
|
|
|
|
|
n | '~' ← ニ } $small_y ;
|
|
|
|
|
ny } $vowel → ニ | '~y' ;
|
|
|
|
|
na ↔ ナ ;
|
|
|
|
|
ni ↔ ニ ;
|
|
|
|
|
nu ↔ ヌ ;
|
|
|
|
|
ne ↔ ネ ;
|
|
|
|
|
no ↔ ノ ;
|
|
|
|
|
o ↔ オ ;
|
|
|
|
|
p | '~' ← ヒ\u309A } $small_y ;
|
|
|
|
|
py } $vowel → ヒ\u309A | '~y' ;
|
|
|
|
|
pa ↔ ハ\u309A ;
|
|
|
|
|
pi ↔ ヒ\u309A ;
|
|
|
|
|
pu ↔ フ\u309A ;
|
|
|
|
|
pe ↔ ヘ\u309A ;
|
|
|
|
|
po ↔ ホ\u309A ;
|
|
|
|
|
h | '~' ← ヒ } $small_y ;
|
|
|
|
|
hy } $vowel → ヒ | '~y' ;
|
|
|
|
|
ha ↔ ハ ;
|
|
|
|
|
hi ↔ ヒ ;
|
|
|
|
|
hu ↔ ヘゥ ;
|
|
|
|
|
he ↔ ヘ ;
|
|
|
|
|
ho ↔ ホ ;
|
|
|
|
|
fa ↔ ファ ;
|
|
|
|
|
fi ↔ フィ ;
|
|
|
|
|
fe ↔ フェ ;
|
|
|
|
|
fo ↔ フォ ;
|
|
|
|
|
fu ↔ フ ;
|
|
|
|
|
r | '~' ← リ } $small_y ;
|
|
|
|
|
ry } $vowel → リ | '~y' ;
|
|
|
|
|
ra ↔ ラ ;
|
|
|
|
|
ri ↔ リ ;
|
|
|
|
|
ru ↔ ル ;
|
|
|
|
|
re ↔ レ ;
|
|
|
|
|
ro ↔ ロ ;
|
|
|
|
|
za ↔ サ\u3099 ;
|
|
|
|
|
zi ↔ セ\u3099ィ ;
|
|
|
|
|
zu ↔ ス\u3099 ;
|
|
|
|
|
ze ↔ セ\u3099 ;
|
|
|
|
|
zo ↔ ソ\u3099 ;
|
|
|
|
|
sa ↔ サ ;
|
|
|
|
|
si ↔ セィ ;
|
|
|
|
|
su ↔ ス ;
|
|
|
|
|
se ↔ セ ;
|
|
|
|
|
so ↔ ソ ;
|
|
|
|
|
sha ← シャ ;
|
|
|
|
|
shi'~i' ← シィ ; # liu
|
|
|
|
|
shu ← シュ ;
|
|
|
|
|
she ← シェ ;
|
|
|
|
|
sho ← ショ ;
|
|
|
|
|
shi ↔ シ ;
|
|
|
|
|
sh } $vowel → シ | '~y' ;
|
|
|
|
|
ta ↔ タ ;
|
|
|
|
|
ti ↔ ティ ;
|
|
|
|
|
tu ↔ テゥ ;
|
|
|
|
|
te ↔ テ ;
|
|
|
|
|
to ↔ ト ;
|
|
|
|
|
tsu ↔ ツ ;
|
|
|
|
|
vu ↔ ウ\u3099 ;
|
|
|
|
|
u ↔ ウ ;
|
|
|
|
|
wa ↔ ワ ;
|
|
|
|
|
wi ↔ ヰ ;
|
|
|
|
|
wu → ウ ;
|
|
|
|
|
we ↔ ヱ ;
|
|
|
|
|
wo ↔ ヲ ;
|
|
|
|
|
ya ↔ ヤ ;
|
|
|
|
|
yi → イ ;
|
|
|
|
|
yu ↔ ユ ;
|
|
|
|
|
ye → エ ;
|
|
|
|
|
yo ↔ ヨ ;
|
|
|
|
|
s } sh → ッ ;
|
|
|
|
|
t } ch → ッ ;
|
|
|
|
|
j } j ↔ ッ } $j_start ;
|
|
|
|
|
b } b ↔ ッ } [$h_start$f_start] $voice;
|
|
|
|
|
d } d ↔ ッ } $t_start $voice;
|
|
|
|
|
g } g ↔ ッ } $k_start $voice;
|
|
|
|
|
p } p ↔ ッ } [$h_start$f_start] $semivoice;
|
|
|
|
|
z } z ↔ ッ } $s_start $voice;
|
|
|
|
|
v } v ↔ ッ } $v_start;
|
|
|
|
|
k } k ↔ ッ } $k_start ;
|
|
|
|
|
m } m ↔ ッ } $m_start ;
|
|
|
|
|
n } n ↔ ッ } $n_start ;
|
|
|
|
|
h } h ↔ ッ } $h_start ;
|
|
|
|
|
f } f ↔ ッ } $f_start ;
|
|
|
|
|
r } r ↔ ッ } $r_start ;
|
|
|
|
|
t } t ↔ ッ } $t_start ;
|
|
|
|
|
s } s ↔ ッ } $s_start ;
|
|
|
|
|
w } w ↔ ッ } $w_start;
|
|
|
|
|
y } y ↔ ッ } $y_start;
|
|
|
|
|
x } x → ッ ;
|
|
|
|
|
c } k → ッ ;
|
|
|
|
|
c } c → ッ ;
|
|
|
|
|
c } q → ッ ;
|
|
|
|
|
l } l → ッ ;
|
|
|
|
|
q } q → ッ ;
|
|
|
|
|
$macron ↔ ー ;
|
|
|
|
|
'~a' ↔ ァ ;
|
|
|
|
|
'~i' ↔ ィ ;
|
|
|
|
|
'~u' ↔ ゥ ;
|
|
|
|
|
'~e' ↔ ェ ;
|
|
|
|
|
'~o' ↔ ォ ;
|
|
|
|
|
'~tsu' ↔ ッ ;
|
|
|
|
|
'~wa' ↔ ヮ ;
|
|
|
|
|
'~ya' ↔ ャ ;
|
|
|
|
|
'~yi' → ィ ;
|
|
|
|
|
'~yu' ↔ ュ ;
|
|
|
|
|
'~ye' → ェ ;
|
|
|
|
|
'~yo' ↔ ョ ;
|
|
|
|
|
j $1 ← sh (y* $vowel) {ヽ$voice ;
|
|
|
|
|
dj $1 ← ch (y* $vowel) {ヽ$voice ;
|
|
|
|
|
dz $1 ← ts (y* $vowel) {ヽ$voice ;
|
|
|
|
|
g $1 ← k (y* $vowel) {ヽ$voice ;
|
|
|
|
|
z $1 ← s (y* $vowel) {ヽ$voice ;
|
|
|
|
|
d $1 ← t (y* $vowel) {ヽ$voice ;
|
|
|
|
|
h $1 ← b (y* $vowel) {ヽ$voice ;
|
|
|
|
|
v $1 ← w (y* $vowel) {ヽ$voice ;
|
|
|
|
|
sh $1 ← sh (y* $vowel) {ヽ$voice ;
|
|
|
|
|
j $1 ← j (y* $vowel) {ヽ$voice ;
|
|
|
|
|
ch $1 ← ch (y* $vowel) {ヽ$voice ;
|
|
|
|
|
dj $1 ← dj(y* $vowel) {ヽ$voice ;
|
|
|
|
|
ts $1 ← ts (y* $vowel) {ヽ$voice ;
|
|
|
|
|
dz $1 ← dz (y* $vowel) {ヽ$voice ;
|
|
|
|
|
$1 ← ($consonant y* $vowel) {ヽ$voice? ;
|
|
|
|
|
$1 ← (.) {ヽ $voice? ; # otherwise repeat last character
|
|
|
|
|
← ヽ $voice? ; # delete if no characters found
|
|
|
|
|
$voweled_basekana [\u3099 \u309A]? { h → ー ;
|
|
|
|
|
cy → セィ ;
|
|
|
|
|
dy → テ\u3099ィ ;
|
|
|
|
|
hy → ヒ ;
|
|
|
|
|
sy → セィ ;
|
|
|
|
|
ty → ティ ;
|
|
|
|
|
zy → セ\u3099ィ ;
|
|
|
|
|
h → ヘ ;
|
|
|
|
|
ch → チ;
|
|
|
|
|
sh → シ ;
|
|
|
|
|
dz → ツ\u3099 ;
|
|
|
|
|
dj → チ\u3099;
|
|
|
|
|
b → フ\u3099 ;
|
|
|
|
|
d → テ\u3099 ;
|
|
|
|
|
g → ク\u3099 ;
|
|
|
|
|
k → ク ;
|
|
|
|
|
m → ム ;
|
|
|
|
|
n'' ← ン } $n_quoter ;
|
|
|
|
|
n ↔ ン ;
|
|
|
|
|
p → フ\u309A ;
|
|
|
|
|
r → ル ;
|
|
|
|
|
s → ス ;
|
|
|
|
|
t → テ ;
|
|
|
|
|
y → イ ;
|
|
|
|
|
z → ス\u3099 ;
|
|
|
|
|
v → ウ\u3099 ;
|
|
|
|
|
f → フ;
|
|
|
|
|
j → シ\u3099;
|
|
|
|
|
w → ウ;
|
|
|
|
|
ß → | ss ;
|
|
|
|
|
æ → | e ;
|
|
|
|
|
ð → | d ;
|
|
|
|
|
ø → | u ;
|
|
|
|
|
þ → | th ;
|
|
|
|
|
c → | k ;
|
|
|
|
|
l → | r ;
|
|
|
|
|
q → | k ;
|
|
|
|
|
x → | ks ;
|
|
|
|
|
'~' → ; # delete stray tildes between letters
|
|
|
|
|
[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
|
2006-07-21 01:08:32 +00:00
|
|
|
|
:: NFC (NFD) ;
|
2007-07-26 01:38:26 +00:00
|
|
|
|
:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
|
|
|
|
|
:: ( [[\ -~¢-£¥-¦¬\u0304₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ\u3099-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
|