ICU-1575 fixed normalizing transliterator to get closer to the end in incremental mode; latin-katakana & fullwidth/halfwidth now pass incremental tests. UnicodeSet has some package-private utilities added -- they should be made public next release.

X-SVN-Rev: 7259
This commit is contained in:
Mark Davis 2001-12-03 02:10:26 +00:00
parent 514ea083f8
commit 444318a847
8 changed files with 1444 additions and 556 deletions

View File

@ -1,6 +1,6 @@
#--------------------------------------------------------------------
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:57 2001
#--------------------------------------------------------------------
@ -8,263 +8,266 @@
# Fullwidth-Halfwidth
# Mechanically generated from Unicode Character Database
# IDEOGRAPHIC SPACE then added, and
# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
# multicharacter
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
# single character
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>' '̄; # from FULLWIDTH MACRON
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>'¯'; # from FULLWIDTH MACRON
' '<>' '; # ideographic space (place this after MACRON)
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
←<>←; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
→<>→; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
←<>←; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
→<>→; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
# eof

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Latin_Katakana.txt,v $
# $Date: 2001/12/01 00:51:28 $
# $Revision: 1.18 $
# $Date: 2001/12/03 02:10:26 $
# $Revision: 1.19 $
#--------------------------------------------------------------------
# note: a global filter is more efficient, but MUST include all source chars
@ -13,7 +13,7 @@
### WARNING -- must add width filter, both here and below!!! ###
:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;
:: fullwidth-halfwidth ();
:: [:Latin:] fullwidth-halfwidth ();
:: NFD (NFC);
:: Lower (); # whenever transliterating from cased to uncased script, include this
# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
@ -489,7 +489,7 @@ x > | ks ;
# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
:: NFC (NFD) ;
:: (halfwidth-fullwidth);
:: ([:Katakana:] halfwidth-fullwidth);
# note: a global filter is more efficient, but MUST include all source chars!!
#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);

View File

@ -13,9 +13,11 @@ import java.util.*;
/**
* @author Alan Liu
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/11/29 17:27:44 $
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.14 $ $Date: 2001/12/03 02:10:26 $
*/
final class NormalizationTransliterator extends Transliterator {
static final boolean DEBUG = false;
/**
* The normalization mode of this transliterator.
@ -33,29 +35,19 @@ final class NormalizationTransliterator extends Transliterator {
* effectively consider these to be cc!=0, for our purposes.
*
* From http://www.macchiato.com/utc/NFUnsafeStart-3.1.1dX.txt
* Generated in unicodetools, NFSkippable
*
* TODO Update this to 4 separate sets, one for each norm. form.
*/
static final UnicodeSet[] UNSAFE_STARTS = new UnicodeSet[4];
static final UnicodeSet[] SKIPPABLES = new UnicodeSet[4];
static final int
D = 0, C = 1, KD= 2, KC = 3;
// TODO: Set to exact values for different NFs for more accuracy
static {
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
}
// Instance data, simply pointer to one of the above
// Instance data, simply pointer to one of the sets below
final UnicodeSet UNSAFE_START;
final UnicodeSet SKIPPABLE;
/**
* System registration hook.
@ -125,6 +117,7 @@ final class NormalizationTransliterator extends Transliterator {
mode = m;
options = opt;
UNSAFE_START = UNSAFE_STARTS[startChoice];
SKIPPABLE = SKIPPABLES[startChoice];
}
/**
@ -152,7 +145,7 @@ final class NormalizationTransliterator extends Transliterator {
for (int i = start+1; i < limit; i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(text, i);
if (UCharacter.getCombiningClass(cp) == 0 && !UNSAFE_START.contains(cp)) {
int delta = convert(text, lastSafe, i);
int delta = convert(text, lastSafe, i, null);
i += delta;
limit += delta;
overallDelta += delta;
@ -160,16 +153,29 @@ final class NormalizationTransliterator extends Transliterator {
}
}
if (!isIncremental) {
int delta = convert(text, lastSafe, limit);
int delta = convert(text, lastSafe, limit, null);
overallDelta += delta;
lastSafe = limit + delta;
} else {
// We are incremental, so accept the last characters IF they turn into SKIPPABLEs
int delta = convert(text, lastSafe, limit, SKIPPABLE);
if (delta != Integer.MIN_VALUE) {
overallDelta += delta;
lastSafe = limit + delta;
}
}
offsets.contextLimit += overallDelta;
offsets.limit += overallDelta;
offsets.start = lastSafe;
}
int convert(Replaceable text, int lastSafe, int limit) {
/**
* Converts the range from lastSafe to limit.
* @param verify If non-null, check to see that all replacement characters are in it. If not,
* abort the conversion and return Integer.MIN_VALUE.
* @return return the delta in length (new - old), or Integer.MIN_VALUE if the verify aborted.
*/
int convert(Replaceable text, int lastSafe, int limit, UnicodeSet verify) {
//System.out.println("t: " + com.ibm.util.Utility.hex(text.toString()) + ", s: " + lastSafe + ", l: " + limit);
int len = limit - lastSafe;
@ -179,6 +185,18 @@ final class NormalizationTransliterator extends Transliterator {
text.getChars(lastSafe, limit, buffer, 0);
String input = new String(buffer, 0, len); // TODO: fix normalizer to take char[]
String output = Normalizer.normalize(input, mode, options);
// verify OK, if specified
if (verify != null) {
boolean skip = !SKIPPABLE.containsAll(output);
if (DEBUG) {
System.out.println((skip ? " SKIP: " : "NOSKIP: ")
+ com.ibm.util.Utility.escape(input)
+ " => " + com.ibm.util.Utility.escape(output));
}
if (skip) return Integer.MIN_VALUE;
}
if (output.equals(input)) {
return 0;
}
@ -188,4 +206,400 @@ final class NormalizationTransliterator extends Transliterator {
private char buffer[] = new char[30];
static {
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
SKIPPABLES[D] = new UnicodeSet(
"[^\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-"
+ "\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112"
+ "-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148"
+ "\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-"
+ "\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8"
+ "-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u034E\u0360-\u0362"
+ "\u0374\u037E\u0385-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-"
+ "\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419"
+ "\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u0483-"
+ "\u0486\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2"
+ "-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0591-\u05A1\u05A3-\u05B9"
+ "\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u0622-\u0626\u064B-\u0655"
+ "\u0670\u06C0\u06C2\u06D3\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8"
+ "\u06EA-\u06ED\u0711\u0730-\u074A\u0929\u0931\u0934\u093C\u094D"
+ "\u0951-\u0954\u0958-\u095F\u09BC\u09CB-\u09CD\u09DC-\u09DD\u09DF"
+ "\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E\u0ABC\u0ACD\u0B3C"
+ "\u0B48\u0B4B-\u0B4D\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCD\u0C48\u0C4D"
+ "\u0C55-\u0C56\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CCD\u0D4A-\u0D4D"
+ "\u0DCA\u0DDA\u0DDC-\u0DDE\u0E38-\u0E3A\u0E48-\u0E4B\u0EB8-\u0EB9"
+ "\u0EC8-\u0ECB\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F43\u0F4D\u0F52"
+ "\u0F57\u0F5C\u0F69\u0F71-\u0F76\u0F78\u0F7A-\u0F7D\u0F80-\u0F84"
+ "\u0F86-\u0F87\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u0FC6\u1026"
+ "\u1037\u1039\u17D2\u18A9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-"
+ "\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59"
+ "\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-"
+ "\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6"
+ "-\u1FFD\u2000-\u2001\u20D0-\u20DC\u20E1\u2126\u212A-\u212B\u219A"
+ "-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241"
+ "\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-"
+ "\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0"
+ "-\u22E3\u22EA-\u22ED\u2329-\u232A\u302A-\u302F\u304C\u304E\u3050"
+ "\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065"
+ "\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A"
+ "\u307C-\u307D\u3094\u3099-\u309A\u309E\u30AC\u30AE\u30B0\u30B2"
+ "\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7"
+ "\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC"
+ "-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10"
+ "\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D-"
+ "\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44"
+ "\uFB46-\uFB4E\uFE20-\uFE23\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D"
+ "1AD\\U0001D1BB-\\U0001D1C0\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-dE-hI-IJ-kL-lN-nO-oR-tU-žO-oU-uA-uA-"
+ "?G-j?-??-??-??-?`-??-??;?-???-??-??-??-??-????-????-????-??-??-?"
+ "?-??-??-??-??-??-??-??-??-??-???-???-??-??????-??-??-??-???-????"
+ "???-??-???-??-???????-???????-??-???-????-???-??-???-????-??-??-"
+ "??-??-??-???????????-???-??-??-??????????????-???-??-??-??-??-??"
+ "-?????-??-??-???-??-??-??-??-??-?\ -\ ?-???K-Å?-???-????????????"
+ "?-??-??-??-??-??-??-??-??-?<->?-?????????????????-??-??-??-??-??"
+ "?-??????????????????-??-??-??-??-???-???-??-????-????-??-??-??-?"
+ "?-???-??-??-??-???-????-????-????-????-????-????-??]"*/
SKIPPABLES[C] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\u00A8\u00C0-\u00CF\u00D1-\u00D6\u00D8-\u00DD"
+ "\u00E0-\u00EF\u00F1-\u00F6\u00F8-\u00FD\u00FF-\u0103\u0106-"
+ "\u010F\u0112-\u0117\u011A-\u0121\u0124-\u0125\u0128-\u012D\u0130"
+ "\u0139-\u013A\u013D-\u013E\u0143-\u0144\u0147-\u0148\u014C-"
+ "\u0151\u0154-\u0155\u0158-\u015D\u0160-\u0161\u0164-\u0165\u0168"
+ "-\u0171\u0174-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01B7\u01CD-"
+ "\u01DC\u01DE-\u01E1\u01E6-\u01EB\u01F4-\u01F5\u01F8-\u01FB\u0200"
+ "-\u021B\u021E-\u021F\u0226-\u0233\u0292\u0300-\u034E\u0360-"
+ "\u0362\u0374\u037E\u0387\u0391\u0395\u0397\u0399\u039F\u03A1"
+ "\u03A5\u03A9\u03AC\u03AE\u03B1\u03B5\u03B7\u03B9\u03BF\u03C1"
+ "\u03C5\u03C9-\u03CB\u03CE\u03D2\u0406\u0410\u0413\u0415-\u0418"
+ "\u041A\u041E\u0423\u0427\u042B\u042D\u0430\u0433\u0435-\u0438"
+ "\u043A\u043E\u0443\u0447\u044B\u044D\u0456\u0474-\u0475\u0483-"
+ "\u0486\u04D8-\u04D9\u04E8-\u04E9\u0591-\u05A1\u05A3-\u05B9\u05BB"
+ "-\u05BD\u05BF\u05C1-\u05C2\u05C4\u0622-\u0623\u0627\u0648\u064A-"
+ "\u0655\u0670\u06C1\u06D2\u06D5-\u06DC\u06DF-\u06E4\u06E7-\u06E8"
+ "\u06EA-\u06ED\u0711\u0730-\u074A\u0928\u0930\u0933\u093C\u094D"
+ "\u0951-\u0954\u0958-\u095F\u09BC\u09BE\u09C7\u09CD\u09D7\u09DC-"
+ "\u09DD\u09DF\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E\u0ABC"
+ "\u0ACD\u0B3C\u0B3E\u0B47\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B92"
+ "\u0BBE\u0BC6-\u0BC7\u0BCD\u0BD7\u0C46\u0C4D\u0C55-\u0C56\u0CBF"
+ "\u0CC2\u0CC6\u0CCA\u0CCD\u0CD5-\u0CD6\u0D3E\u0D46-\u0D47\u0D4D"
+ "\u0D57\u0DCA\u0DCF\u0DD9\u0DDC\u0DDF\u0E38-\u0E3A\u0E48-\u0E4B"
+ "\u0EB8-\u0EB9\u0EC8-\u0ECB\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F43"
+ "\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F71-\u0F76\u0F78\u0F7A-\u0F7D"
+ "\u0F80-\u0F84\u0F86-\u0F87\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9"
+ "\u0FC6\u1025\u102E\u1037\u1039\u1100-\u1112\u1161-\u1175\u11A8-"
+ "\u11C2\u17D2\u18A9\u1E00-\u1E03\u1E0A-\u1E0F\u1E12-\u1E1B\u1E20-"
+ "\u1E27\u1E2A-\u1E41\u1E44-\u1E53\u1E58-\u1E7D\u1E80-\u1E87\u1E8E"
+ "-\u1E91\u1E96-\u1E99\u1EA0-\u1EF3\u1EF6-\u1EF9\u1F00-\u1F11"
+ "\u1F18-\u1F19\u1F20-\u1F31\u1F38-\u1F39\u1F40-\u1F41\u1F48-"
+ "\u1F49\u1F50-\u1F51\u1F59\u1F60-\u1F71\u1F73-\u1F75\u1F77\u1F79"
+ "\u1F7B-\u1F7D\u1F80-\u1F81\u1F88-\u1F89\u1F90-\u1F91\u1F98-"
+ "\u1F99\u1FA0-\u1FA1\u1FA8-\u1FA9\u1FB3\u1FB6\u1FBB-\u1FBC\u1FBE-"
+ "\u1FBF\u1FC3\u1FC6\u1FC9\u1FCB-\u1FCC\u1FD3\u1FDB\u1FE3\u1FEB"
+ "\u1FEE-\u1FEF\u1FF3\u1FF6\u1FF9\u1FFB-\u1FFE\u2000-\u2001\u20D0-"
+ "\u20DC\u20E1\u2126\u212A-\u212B\u2190\u2192\u2194\u21D0\u21D2"
+ "\u21D4\u2203\u2208\u220B\u2223\u2225\u223C\u2243\u2245\u2248"
+ "\u224D\u2261\u2264-\u2265\u2272-\u2273\u2276-\u2277\u227A-\u227D"
+ "\u2282-\u2283\u2286-\u2287\u2291-\u2292\u22A2\u22A8-\u22A9\u22AB"
+ "\u22B2-\u22B5\u2329-\u232A\u302A-\u302F\u3046\u304B\u304D\u304F"
+ "\u3051\u3053\u3055\u3057\u3059\u305B\u305D\u305F\u3061\u3064"
+ "\u3066\u3068\u306F\u3072\u3075\u3078\u307B\u3099-\u309A\u309D"
+ "\u30A6\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB"
+ "\u30BD\u30BF\u30C1\u30C4\u30C6\u30C8\u30CF\u30D2\u30D5\u30D8"
+ "\u30DB\u30EF-\u30F2\u30FD\uAC00\uAC1C\uAC38\uAC54\uAC70\uAC8C"
+ "\uACA8\uACC4\uACE0\uACFC\uAD18\uAD34\uAD50\uAD6C\uAD88\uADA4"
+ "\uADC0\uADDC\uADF8\uAE14\uAE30\uAE4C\uAE68\uAE84\uAEA0\uAEBC"
+ "\uAED8\uAEF4\uAF10\uAF2C\uAF48\uAF64\uAF80\uAF9C\uAFB8\uAFD4"
+ "\uAFF0\uB00C\uB028\uB044\uB060\uB07C\uB098\uB0B4\uB0D0\uB0EC"
+ "\uB108\uB124\uB140\uB15C\uB178\uB194\uB1B0\uB1CC\uB1E8\uB204"
+ "\uB220\uB23C\uB258\uB274\uB290\uB2AC\uB2C8\uB2E4\uB300\uB31C"
+ "\uB338\uB354\uB370\uB38C\uB3A8\uB3C4\uB3E0\uB3FC\uB418\uB434"
+ "\uB450\uB46C\uB488\uB4A4\uB4C0\uB4DC\uB4F8\uB514\uB530\uB54C"
+ "\uB568\uB584\uB5A0\uB5BC\uB5D8\uB5F4\uB610\uB62C\uB648\uB664"
+ "\uB680\uB69C\uB6B8\uB6D4\uB6F0\uB70C\uB728\uB744\uB760\uB77C"
+ "\uB798\uB7B4\uB7D0\uB7EC\uB808\uB824\uB840\uB85C\uB878\uB894"
+ "\uB8B0\uB8CC\uB8E8\uB904\uB920\uB93C\uB958\uB974\uB990\uB9AC"
+ "\uB9C8\uB9E4\uBA00\uBA1C\uBA38\uBA54\uBA70\uBA8C\uBAA8\uBAC4"
+ "\uBAE0\uBAFC\uBB18\uBB34\uBB50\uBB6C\uBB88\uBBA4\uBBC0\uBBDC"
+ "\uBBF8\uBC14\uBC30\uBC4C\uBC68\uBC84\uBCA0\uBCBC\uBCD8\uBCF4"
+ "\uBD10\uBD2C\uBD48\uBD64\uBD80\uBD9C\uBDB8\uBDD4\uBDF0\uBE0C"
+ "\uBE28\uBE44\uBE60\uBE7C\uBE98\uBEB4\uBED0\uBEEC\uBF08\uBF24"
+ "\uBF40\uBF5C\uBF78\uBF94\uBFB0\uBFCC\uBFE8\uC004\uC020\uC03C"
+ "\uC058\uC074\uC090\uC0AC\uC0C8\uC0E4\uC100\uC11C\uC138\uC154"
+ "\uC170\uC18C\uC1A8\uC1C4\uC1E0\uC1FC\uC218\uC234\uC250\uC26C"
+ "\uC288\uC2A4\uC2C0\uC2DC\uC2F8\uC314\uC330\uC34C\uC368\uC384"
+ "\uC3A0\uC3BC\uC3D8\uC3F4\uC410\uC42C\uC448\uC464\uC480\uC49C"
+ "\uC4B8\uC4D4\uC4F0\uC50C\uC528\uC544\uC560\uC57C\uC598\uC5B4"
+ "\uC5D0\uC5EC\uC608\uC624\uC640\uC65C\uC678\uC694\uC6B0\uC6CC"
+ "\uC6E8\uC704\uC720\uC73C\uC758\uC774\uC790\uC7AC\uC7C8\uC7E4"
+ "\uC800\uC81C\uC838\uC854\uC870\uC88C\uC8A8\uC8C4\uC8E0\uC8FC"
+ "\uC918\uC934\uC950\uC96C\uC988\uC9A4\uC9C0\uC9DC\uC9F8\uCA14"
+ "\uCA30\uCA4C\uCA68\uCA84\uCAA0\uCABC\uCAD8\uCAF4\uCB10\uCB2C"
+ "\uCB48\uCB64\uCB80\uCB9C\uCBB8\uCBD4\uCBF0\uCC0C\uCC28\uCC44"
+ "\uCC60\uCC7C\uCC98\uCCB4\uCCD0\uCCEC\uCD08\uCD24\uCD40\uCD5C"
+ "\uCD78\uCD94\uCDB0\uCDCC\uCDE8\uCE04\uCE20\uCE3C\uCE58\uCE74"
+ "\uCE90\uCEAC\uCEC8\uCEE4\uCF00\uCF1C\uCF38\uCF54\uCF70\uCF8C"
+ "\uCFA8\uCFC4\uCFE0\uCFFC\uD018\uD034\uD050\uD06C\uD088\uD0A4"
+ "\uD0C0\uD0DC\uD0F8\uD114\uD130\uD14C\uD168\uD184\uD1A0\uD1BC"
+ "\uD1D8\uD1F4\uD210\uD22C\uD248\uD264\uD280\uD29C\uD2B8\uD2D4"
+ "\uD2F0\uD30C\uD328\uD344\uD360\uD37C\uD398\uD3B4\uD3D0\uD3EC"
+ "\uD408\uD424\uD440\uD45C\uD478\uD494\uD4B0\uD4CC\uD4E8\uD504"
+ "\uD520\uD53C\uD558\uD574\uD590\uD5AC\uD5C8\uD5E4\uD600\uD61C"
+ "\uD638\uD654\uD670\uD68C\uD6A8\uD6C4\uD6E0\uD6FC\uD718\uD734"
+ "\uD750\uD76C\uD788\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20"
+ "\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D-\uFB1F\uFB2A-\uFB36\uFB38"
+ "-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFE20-"
+ "\uFE23\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
+ "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-"
+ "\\U0001D1C0\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^<->A-PR-Za-pr-z¨À-ÏÑ-ÖØ-Ýà-ïñ-öø-ýÿ-aC-dE-eE-gH-hI-iIL-lL-lN-n"
+ "N-nO-oR-rR-sŠ-šT-tU-uW-?O-oU-u?A-uA-?G-o?-??-??-??-??-??`-??-??;"
+ "????????O??ae??????-???????-??????????-?????????-??-??-??-??-??-"
+ "??-???-???-????-?????-??-??-??-???-???????-??-???????-???????-??"
+ "???????-??-????-??????-???????-???-?????????-??-??-??-??-???????"
+ "????-???-??-??-?????????????-??-??-????-??-??-??-??-??-??-??-??-"
+ "??-??-??-??-??-??-??-??-??-??-???-??-????-??-??-??-??-??-??-????"
+ "-??-?????-??????-?????-?\ -\ ?-???K-Å?????????|?~??˜?==-=?-??-??"
+ "-??-??-??-???-???-?<->?-???????????????????????-????????????????"
+ "????????-???????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "???????????????????????????-????-????-??-??-??-??-???-??-??-??-?"
+ "??-????-????-????-????-????-????-??]"*/
SKIPPABLES[KD] = new UnicodeSet(
"[^\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-"
+ "\u00BE\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0"
+ "-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F"
+ "\u0112-\u0125\u0128-\u0130\u0132-\u0137\u0139-\u0140\u0143-"
+ "\u0149\u014C-\u0151\u0154-\u0165\u0168-\u017F\u01A0-\u01A1\u01AF"
+ "-\u01B0\u01C4-\u01DC\u01DE-\u01E3\u01E6-\u01F5\u01F8-\u021B"
+ "\u021E-\u021F\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-"
+ "\u02E4\u0300-\u034E\u0360-\u0362\u0374\u037A\u037E\u0384-\u038A"
+ "\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D0-\u03D6\u03F0"
+ "-\u03F2\u03F4-\u03F5\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419"
+ "\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u0483-"
+ "\u0486\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2"
+ "-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0587\u0591-\u05A1\u05A3-"
+ "\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u0622-\u0626\u064B-"
+ "\u0655\u0670\u0675-\u0678\u06C0\u06C2\u06D3\u06D6-\u06DC\u06DF-"
+ "\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u0929\u0931"
+ "\u0934\u093C\u094D\u0951-\u0954\u0958-\u095F\u09BC\u09CB-\u09CD"
+ "\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E"
+ "\u0ABC\u0ACD\u0B3C\u0B48\u0B4B-\u0B4D\u0B5C-\u0B5D\u0B94\u0BCA-"
+ "\u0BCD\u0C48\u0C4D\u0C55-\u0C56\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB"
+ "\u0CCD\u0D4A-\u0D4D\u0DCA\u0DDA\u0DDC-\u0DDE\u0E33\u0E38-\u0E3A"
+ "\u0E48-\u0E4B\u0EB3\u0EB8-\u0EB9\u0EC8-\u0ECB\u0EDC-\u0EDD\u0F0C"
+ "\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F43\u0F4D\u0F52\u0F57\u0F5C"
+ "\u0F69\u0F71-\u0F7D\u0F80-\u0F84\u0F86-\u0F87\u0F93\u0F9D\u0FA2"
+ "\u0FA7\u0FAC\u0FB9\u0FC6\u1026\u1037\u1039\u17D2\u18A9\u1E00-"
+ "\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48"
+ "-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4"
+ "\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-"
+ "\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F"
+ "\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-"
+ "\u208E\u20A8\u20D0-\u20DC\u20E1\u2100-\u2103\u2105-\u2107\u2109-"
+ "\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128"
+ "\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u219A-"
+ "\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-"
+ "\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-"
+ "\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288"
+ "-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A"
+ "\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u302A-\u302F\u3036"
+ "\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A"
+ "\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-"
+ "\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309C"
+ "\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC"
+ "\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4"
+ "\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE"
+ "\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-"
+ "\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B"
+ "-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15"
+ "-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13"
+ "-\uFB17\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-"
+ "\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0"
+ "-\uFDFB\uFE20-\uFE23\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66"
+ "\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61"
+ "-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC"
+ "\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D"
+ "1AD\\U0001D1BB-\\U0001D1C0\\U0001D400-\\U0001D454\\U0001D456-\\U0001D4"
+ "9C\\U0001D49E-\\U0001D49F\\U0001D4A2\\U0001D4A5-\\U0001D4A6\\U0001D4A9"
+ "-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C0"
+ "\\U0001D4C2-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A"
+ "\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-"
+ "\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9\\U0001D7CE-"
+ "\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^ ¨ª¯²-µ¸-º¼-¾À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-dE-hI-I?-kL-?N-?O-oR-tU"
+ "-?O-oU-u?-uA-?G-??-??-??-??-??-??-?`-??-???;?-???-??-??-??-??-??"
+ "-??-????-????-????-??-??-??-??-??-??-??-??-??-???-??-??-???-???-"
+ "??-???-?????-??-??-??-???-???????-??-???-??-???????-???????-??-?"
+ "??-????-???-??-???-????-???-??-???-??-??-???-???????????-??-??-?"
+ "?????????????-??-??-??-??-??-??-?????-??-??-??-??-??-??-??-?\ -"
+ "\?-=·-…??-??-????-?°4-???-???-??-E?-lN-?P-R?-™Z?ZK-Ce-FM-??-??-?"
+ "??-???????-??-????????-??-??-??-??-??-??-??-??-?<->?-????-?"
+ "\ ?-???-?????????????????-??-??-??-??-???-??????????????????-??-"
+ "??-??-??-???-???-??-??-??-??-??-??-??-??-??-??-??-??-????-????-?"
+ "?-??-??-??-??-???-??-??-??-??-??-??-??-??-??-??-??-??-???-?!-~?-"
+ "??-??-??-??-??-??-???-????-????-????-????-????-????-????-????-??"
+ "????-????-????-??????-????-????-????-????-????-????-????-????-??"
+ "????-????-????-????-????-??]"*/
SKIPPABLES[KC] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-"
+ "\u00BA\u00BC-\u00BE\u00C0-\u00CF\u00D1-\u00D6\u00D8-\u00DD\u00E0"
+ "-\u00EF\u00F1-\u00F6\u00F8-\u00FD\u00FF-\u0103\u0106-\u010F"
+ "\u0112-\u0117\u011A-\u0121\u0124-\u0125\u0128-\u012D\u0130\u0132"
+ "-\u0133\u0139-\u013A\u013D-\u0140\u0143-\u0144\u0147-\u0149"
+ "\u014C-\u0151\u0154-\u0155\u0158-\u015D\u0160-\u0161\u0164-"
+ "\u0165\u0168-\u0171\u0174-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01B7"
+ "\u01C4-\u01DC\u01DE-\u01E1\u01E6-\u01EB\u01F1-\u01F5\u01F8-"
+ "\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0292\u02B0-\u02B8"
+ "\u02D8-\u02DD\u02E0-\u02E4\u0300-\u034E\u0360-\u0362\u0374\u037A"
+ "\u037E\u0384-\u0385\u0387\u0391\u0395\u0397\u0399\u039F\u03A1"
+ "\u03A5\u03A9\u03AC\u03AE\u03B1\u03B5\u03B7\u03B9\u03BF\u03C1"
+ "\u03C5\u03C9-\u03CB\u03CE\u03D0-\u03D6\u03F0-\u03F2\u03F4-\u03F5"
+ "\u0406\u0410\u0413\u0415-\u0418\u041A\u041E\u0423\u0427\u042B"
+ "\u042D\u0430\u0433\u0435-\u0438\u043A\u043E\u0443\u0447\u044B"
+ "\u044D\u0456\u0474-\u0475\u0483-\u0486\u04D8-\u04D9\u04E8-\u04E9"
+ "\u0587\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2"
+ "\u05C4\u0622-\u0623\u0627\u0648\u064A-\u0655\u0670\u0675-\u0678"
+ "\u06C1\u06D2\u06D5-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED"
+ "\u0711\u0730-\u074A\u0928\u0930\u0933\u093C\u094D\u0951-\u0954"
+ "\u0958-\u095F\u09BC\u09BE\u09C7\u09CD\u09D7\u09DC-\u09DD\u09DF"
+ "\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E\u0ABC\u0ACD\u0B3C"
+ "\u0B3E\u0B47\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B92\u0BBE\u0BC6-"
+ "\u0BC7\u0BCD\u0BD7\u0C46\u0C4D\u0C55-\u0C56\u0CBF\u0CC2\u0CC6"
+ "\u0CCA\u0CCD\u0CD5-\u0CD6\u0D3E\u0D46-\u0D47\u0D4D\u0D57\u0DCA"
+ "\u0DCF\u0DD9\u0DDC\u0DDF\u0E33\u0E38-\u0E3A\u0E48-\u0E4B\u0EB3"
+ "\u0EB8-\u0EB9\u0EC8-\u0ECB\u0EDC-\u0EDD\u0F0C\u0F18-\u0F19\u0F35"
+ "\u0F37\u0F39\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F71-\u0F7D"
+ "\u0F80-\u0F84\u0F86-\u0F87\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9"
+ "\u0FC6\u1025\u102E\u1037\u1039\u1100-\u1112\u1161-\u1175\u11A8-"
+ "\u11C2\u17D2\u18A9\u1E00-\u1E03\u1E0A-\u1E0F\u1E12-\u1E1B\u1E20-"
+ "\u1E27\u1E2A-\u1E41\u1E44-\u1E53\u1E58-\u1E7D\u1E80-\u1E87\u1E8E"
+ "-\u1E91\u1E96-\u1E9B\u1EA0-\u1EF3\u1EF6-\u1EF9\u1F00-\u1F11"
+ "\u1F18-\u1F19\u1F20-\u1F31\u1F38-\u1F39\u1F40-\u1F41\u1F48-"
+ "\u1F49\u1F50-\u1F51\u1F59\u1F60-\u1F71\u1F73-\u1F75\u1F77\u1F79"
+ "\u1F7B-\u1F7D\u1F80-\u1F81\u1F88-\u1F89\u1F90-\u1F91\u1F98-"
+ "\u1F99\u1FA0-\u1FA1\u1FA8-\u1FA9\u1FB3\u1FB6\u1FBB-\u1FC1\u1FC3"
+ "\u1FC6\u1FC9\u1FCB-\u1FCF\u1FD3\u1FDB\u1FDD-\u1FDF\u1FE3\u1FEB"
+ "\u1FED-\u1FEF\u1FF3\u1FF6\u1FF9\u1FFB-\u1FFE\u2000-\u200A\u2011"
+ "\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E"
+ "\u2048-\u2049\u2070\u2074-\u208E\u20A8\u20D0-\u20DC\u20E1\u2100-"
+ "\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120"
+ "-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139"
+ "\u2153-\u217F\u2190\u2192\u2194\u21D0\u21D2\u21D4\u2203\u2208"
+ "\u220B\u2223\u2225\u222C-\u222D\u222F-\u2230\u223C\u2243\u2245"
+ "\u2248\u224D\u2261\u2264-\u2265\u2272-\u2273\u2276-\u2277\u227A-"
+ "\u227D\u2282-\u2283\u2286-\u2287\u2291-\u2292\u22A2\u22A8-\u22A9"
+ "\u22AB\u22B2-\u22B5\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-"
+ "\u2FD5\u3000\u302A-\u302F\u3036\u3038-\u303A\u3046\u304B\u304D"
+ "\u304F\u3051\u3053\u3055\u3057\u3059\u305B\u305D\u305F\u3061"
+ "\u3064\u3066\u3068\u306F\u3072\u3075\u3078\u307B\u3099-\u309D"
+ "\u30A6\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB"
+ "\u30BD\u30BF\u30C1\u30C4\u30C6\u30C8\u30CF\u30D2\u30D5\u30D8"
+ "\u30DB\u30EF-\u30F2\u30FD\u3131-\u318E\u3192-\u319F\u3200-\u321C"
+ "\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-"
+ "\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00\uAC1C\uAC38"
+ "\uAC54\uAC70\uAC8C\uACA8\uACC4\uACE0\uACFC\uAD18\uAD34\uAD50"
+ "\uAD6C\uAD88\uADA4\uADC0\uADDC\uADF8\uAE14\uAE30\uAE4C\uAE68"
+ "\uAE84\uAEA0\uAEBC\uAED8\uAEF4\uAF10\uAF2C\uAF48\uAF64\uAF80"
+ "\uAF9C\uAFB8\uAFD4\uAFF0\uB00C\uB028\uB044\uB060\uB07C\uB098"
+ "\uB0B4\uB0D0\uB0EC\uB108\uB124\uB140\uB15C\uB178\uB194\uB1B0"
+ "\uB1CC\uB1E8\uB204\uB220\uB23C\uB258\uB274\uB290\uB2AC\uB2C8"
+ "\uB2E4\uB300\uB31C\uB338\uB354\uB370\uB38C\uB3A8\uB3C4\uB3E0"
+ "\uB3FC\uB418\uB434\uB450\uB46C\uB488\uB4A4\uB4C0\uB4DC\uB4F8"
+ "\uB514\uB530\uB54C\uB568\uB584\uB5A0\uB5BC\uB5D8\uB5F4\uB610"
+ "\uB62C\uB648\uB664\uB680\uB69C\uB6B8\uB6D4\uB6F0\uB70C\uB728"
+ "\uB744\uB760\uB77C\uB798\uB7B4\uB7D0\uB7EC\uB808\uB824\uB840"
+ "\uB85C\uB878\uB894\uB8B0\uB8CC\uB8E8\uB904\uB920\uB93C\uB958"
+ "\uB974\uB990\uB9AC\uB9C8\uB9E4\uBA00\uBA1C\uBA38\uBA54\uBA70"
+ "\uBA8C\uBAA8\uBAC4\uBAE0\uBAFC\uBB18\uBB34\uBB50\uBB6C\uBB88"
+ "\uBBA4\uBBC0\uBBDC\uBBF8\uBC14\uBC30\uBC4C\uBC68\uBC84\uBCA0"
+ "\uBCBC\uBCD8\uBCF4\uBD10\uBD2C\uBD48\uBD64\uBD80\uBD9C\uBDB8"
+ "\uBDD4\uBDF0\uBE0C\uBE28\uBE44\uBE60\uBE7C\uBE98\uBEB4\uBED0"
+ "\uBEEC\uBF08\uBF24\uBF40\uBF5C\uBF78\uBF94\uBFB0\uBFCC\uBFE8"
+ "\uC004\uC020\uC03C\uC058\uC074\uC090\uC0AC\uC0C8\uC0E4\uC100"
+ "\uC11C\uC138\uC154\uC170\uC18C\uC1A8\uC1C4\uC1E0\uC1FC\uC218"
+ "\uC234\uC250\uC26C\uC288\uC2A4\uC2C0\uC2DC\uC2F8\uC314\uC330"
+ "\uC34C\uC368\uC384\uC3A0\uC3BC\uC3D8\uC3F4\uC410\uC42C\uC448"
+ "\uC464\uC480\uC49C\uC4B8\uC4D4\uC4F0\uC50C\uC528\uC544\uC560"
+ "\uC57C\uC598\uC5B4\uC5D0\uC5EC\uC608\uC624\uC640\uC65C\uC678"
+ "\uC694\uC6B0\uC6CC\uC6E8\uC704\uC720\uC73C\uC758\uC774\uC790"
+ "\uC7AC\uC7C8\uC7E4\uC800\uC81C\uC838\uC854\uC870\uC88C\uC8A8"
+ "\uC8C4\uC8E0\uC8FC\uC918\uC934\uC950\uC96C\uC988\uC9A4\uC9C0"
+ "\uC9DC\uC9F8\uCA14\uCA30\uCA4C\uCA68\uCA84\uCAA0\uCABC\uCAD8"
+ "\uCAF4\uCB10\uCB2C\uCB48\uCB64\uCB80\uCB9C\uCBB8\uCBD4\uCBF0"
+ "\uCC0C\uCC28\uCC44\uCC60\uCC7C\uCC98\uCCB4\uCCD0\uCCEC\uCD08"
+ "\uCD24\uCD40\uCD5C\uCD78\uCD94\uCDB0\uCDCC\uCDE8\uCE04\uCE20"
+ "\uCE3C\uCE58\uCE74\uCE90\uCEAC\uCEC8\uCEE4\uCF00\uCF1C\uCF38"
+ "\uCF54\uCF70\uCF8C\uCFA8\uCFC4\uCFE0\uCFFC\uD018\uD034\uD050"
+ "\uD06C\uD088\uD0A4\uD0C0\uD0DC\uD0F8\uD114\uD130\uD14C\uD168"
+ "\uD184\uD1A0\uD1BC\uD1D8\uD1F4\uD210\uD22C\uD248\uD264\uD280"
+ "\uD29C\uD2B8\uD2D4\uD2F0\uD30C\uD328\uD344\uD360\uD37C\uD398"
+ "\uD3B4\uD3D0\uD3EC\uD408\uD424\uD440\uD45C\uD478\uD494\uD4B0"
+ "\uD4CC\uD4E8\uD504\uD520\uD53C\uD558\uD574\uD590\uD5AC\uD5C8"
+ "\uD5E4\uD600\uD61C\uD638\uD654\uD670\uD68C\uD6A8\uD6C4\uD6E0"
+ "\uD6FC\uD718\uD734\uD750\uD76C\uD788\uF900-\uFA0D\uFA10\uFA12"
+ "\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06"
+ "\uFB13-\uFB17\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43"
+ "-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7"
+ "\uFDF0-\uFDFB\uFE20-\uFE23\uFE30-\uFE44\uFE49-\uFE52\uFE54-"
+ "\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E"
+ "\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-"
+ "\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001D15E-\\U0001D169\\U0001D16D-"
+ "\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D400-\\U0001D454\\U0001D456-"
+ "\\U0001D49C\\U0001D49E-\\U0001D49F\\U0001D4A2\\U0001D4A5-\\U0001D4A6"
+ "\\U0001D4A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
+ "\\U0001D4C0\\U0001D4C2-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-"
+ "\\U0001D50A\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-"
+ "\\U0001D539\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546"
+ "\\U0001D54A-\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9"
+ "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^<->A-PR-Za-pr-z ¨ª¯²-µ¸-º¼-¾À-ÏÑ-ÖØ-Ýà-ïñ-öø-ýÿ-aC-dE-eE-gH-hI"
+ "-iI?-?L-lL-?N-nN-?O-oR-rR-sŠ-šT-tU-uW-?O-oU-u??-uA-?G-o?-??-??-?"
+ "?-??-???-??-??-?`-??-???;?-?????????O??ae??????-???-??-??-?????-"
+ "??????????-?????????-??-??-??-???-??-??-???-???-????-???-????-??"
+ "-??-??-???-???????-??-???????-???????-?????????-??-????-??????-?"
+ "??????-???-??????????-??-???-??-??-???-???????????-??-??-???????"
+ "??????-??-??-????-??-??-??-??-??-??-??-??-??-??-??-??-??-??-??-?"
+ "?-??-??-???-??-????-??-??-??-??-??-??-????-?????-????-????-?????"
+ "-?\ -\?-=·-…??-??-????-?°4-???-???-??-E?-lN-?P-R?-™Z?ZK-Ce-FM-??"
+ "-??????????|??-??-?~??˜?==-=?-??-??-??-??-??-???-???-?<->?-????-"
+ "?\ ?-???-???????????????????????-???????????????????????-???-??-"
+ "??-??-??-??-??-??-??-??-??-?????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????-????-????-??-??-??-"
+ "??-??-???-??-??-??-??-??-??-??-??-??-??-??-??-???-?!-~?-??-??-??"
+ "-??-??-??-???-????-????-????-????-????-????-????-????-??????-???"
+ "?-????-??????-????-????-????-????-????-????-????-????-??????-???"
+ "?-????-????-????-??]"*/
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
* $Date: 2001/12/01 21:46:25 $
* $Revision: 1.50 $
* $Date: 2001/12/03 02:10:24 $
* $Revision: 1.51 $
*
*****************************************************************************************
*/
@ -204,7 +204,7 @@ import com.ibm.util.Utility;
* Unicode property
* </table>
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.50 $ $Date: 2001/12/01 21:46:25 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.51 $ $Date: 2001/12/03 02:10:24 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -905,6 +905,33 @@ public class UnicodeSet extends UnicodeFilter {
}
return true;
}
// TODO: Make this public
/**
* Some character in s is in the set.
*/
boolean containsSome(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
cp = UTF16.charAt(s, i);
if (contains(cp)) return true;
}
return false;
}
// TODO: Make this public
/**
* Every character in s is in the set.
*/
boolean containsAll(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
cp = UTF16.charAt(s, i);
if (!contains(cp)) return false;
}
return true;
}
/**
* Adds all of the elements in the specified set to this set if

View File

@ -13,9 +13,11 @@ import java.util.*;
/**
* @author Alan Liu
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.13 $ $Date: 2001/11/29 17:27:44 $
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.14 $ $Date: 2001/12/03 02:10:26 $
*/
final class NormalizationTransliterator extends Transliterator {
static final boolean DEBUG = false;
/**
* The normalization mode of this transliterator.
@ -33,29 +35,19 @@ final class NormalizationTransliterator extends Transliterator {
* effectively consider these to be cc!=0, for our purposes.
*
* From http://www.macchiato.com/utc/NFUnsafeStart-3.1.1dX.txt
* Generated in unicodetools, NFSkippable
*
* TODO Update this to 4 separate sets, one for each norm. form.
*/
static final UnicodeSet[] UNSAFE_STARTS = new UnicodeSet[4];
static final UnicodeSet[] SKIPPABLES = new UnicodeSet[4];
static final int
D = 0, C = 1, KD= 2, KC = 3;
// TODO: Set to exact values for different NFs for more accuracy
static {
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
}
// Instance data, simply pointer to one of the above
// Instance data, simply pointer to one of the sets below
final UnicodeSet UNSAFE_START;
final UnicodeSet SKIPPABLE;
/**
* System registration hook.
@ -125,6 +117,7 @@ final class NormalizationTransliterator extends Transliterator {
mode = m;
options = opt;
UNSAFE_START = UNSAFE_STARTS[startChoice];
SKIPPABLE = SKIPPABLES[startChoice];
}
/**
@ -152,7 +145,7 @@ final class NormalizationTransliterator extends Transliterator {
for (int i = start+1; i < limit; i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(text, i);
if (UCharacter.getCombiningClass(cp) == 0 && !UNSAFE_START.contains(cp)) {
int delta = convert(text, lastSafe, i);
int delta = convert(text, lastSafe, i, null);
i += delta;
limit += delta;
overallDelta += delta;
@ -160,16 +153,29 @@ final class NormalizationTransliterator extends Transliterator {
}
}
if (!isIncremental) {
int delta = convert(text, lastSafe, limit);
int delta = convert(text, lastSafe, limit, null);
overallDelta += delta;
lastSafe = limit + delta;
} else {
// We are incremental, so accept the last characters IF they turn into SKIPPABLEs
int delta = convert(text, lastSafe, limit, SKIPPABLE);
if (delta != Integer.MIN_VALUE) {
overallDelta += delta;
lastSafe = limit + delta;
}
}
offsets.contextLimit += overallDelta;
offsets.limit += overallDelta;
offsets.start = lastSafe;
}
int convert(Replaceable text, int lastSafe, int limit) {
/**
* Converts the range from lastSafe to limit.
* @param verify If non-null, check to see that all replacement characters are in it. If not,
* abort the conversion and return Integer.MIN_VALUE.
* @return return the delta in length (new - old), or Integer.MIN_VALUE if the verify aborted.
*/
int convert(Replaceable text, int lastSafe, int limit, UnicodeSet verify) {
//System.out.println("t: " + com.ibm.util.Utility.hex(text.toString()) + ", s: " + lastSafe + ", l: " + limit);
int len = limit - lastSafe;
@ -179,6 +185,18 @@ final class NormalizationTransliterator extends Transliterator {
text.getChars(lastSafe, limit, buffer, 0);
String input = new String(buffer, 0, len); // TODO: fix normalizer to take char[]
String output = Normalizer.normalize(input, mode, options);
// verify OK, if specified
if (verify != null) {
boolean skip = !SKIPPABLE.containsAll(output);
if (DEBUG) {
System.out.println((skip ? " SKIP: " : "NOSKIP: ")
+ com.ibm.util.Utility.escape(input)
+ " => " + com.ibm.util.Utility.escape(output));
}
if (skip) return Integer.MIN_VALUE;
}
if (output.equals(input)) {
return 0;
}
@ -188,4 +206,400 @@ final class NormalizationTransliterator extends Transliterator {
private char buffer[] = new char[30];
static {
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
SKIPPABLES[D] = new UnicodeSet(
"[^\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-"
+ "\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112"
+ "-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148"
+ "\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-"
+ "\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8"
+ "-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u034E\u0360-\u0362"
+ "\u0374\u037E\u0385-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-"
+ "\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419"
+ "\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u0483-"
+ "\u0486\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2"
+ "-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0591-\u05A1\u05A3-\u05B9"
+ "\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u0622-\u0626\u064B-\u0655"
+ "\u0670\u06C0\u06C2\u06D3\u06D6-\u06DC\u06DF-\u06E4\u06E7-\u06E8"
+ "\u06EA-\u06ED\u0711\u0730-\u074A\u0929\u0931\u0934\u093C\u094D"
+ "\u0951-\u0954\u0958-\u095F\u09BC\u09CB-\u09CD\u09DC-\u09DD\u09DF"
+ "\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E\u0ABC\u0ACD\u0B3C"
+ "\u0B48\u0B4B-\u0B4D\u0B5C-\u0B5D\u0B94\u0BCA-\u0BCD\u0C48\u0C4D"
+ "\u0C55-\u0C56\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB\u0CCD\u0D4A-\u0D4D"
+ "\u0DCA\u0DDA\u0DDC-\u0DDE\u0E38-\u0E3A\u0E48-\u0E4B\u0EB8-\u0EB9"
+ "\u0EC8-\u0ECB\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F43\u0F4D\u0F52"
+ "\u0F57\u0F5C\u0F69\u0F71-\u0F76\u0F78\u0F7A-\u0F7D\u0F80-\u0F84"
+ "\u0F86-\u0F87\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9\u0FC6\u1026"
+ "\u1037\u1039\u17D2\u18A9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-"
+ "\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59"
+ "\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-"
+ "\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6"
+ "-\u1FFD\u2000-\u2001\u20D0-\u20DC\u20E1\u2126\u212A-\u212B\u219A"
+ "-\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u2241"
+ "\u2244\u2247\u2249\u2260\u2262\u226D-\u2271\u2274-\u2275\u2278-"
+ "\u2279\u2280-\u2281\u2284-\u2285\u2288-\u2289\u22AC-\u22AF\u22E0"
+ "-\u22E3\u22EA-\u22ED\u2329-\u232A\u302A-\u302F\u304C\u304E\u3050"
+ "\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065"
+ "\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A"
+ "\u307C-\u307D\u3094\u3099-\u309A\u309E\u30AC\u30AE\u30B0\u30B2"
+ "\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7"
+ "\u30C9\u30D0-\u30D1\u30D3-\u30D4\u30D6-\u30D7\u30D9-\u30DA\u30DC"
+ "-\u30DD\u30F4\u30F7-\u30FA\u30FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10"
+ "\uFA12\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D-"
+ "\uFB1F\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44"
+ "\uFB46-\uFB4E\uFE20-\uFE23\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D"
+ "1AD\\U0001D1BB-\\U0001D1C0\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-dE-hI-IJ-kL-lN-nO-oR-tU-žO-oU-uA-uA-"
+ "?G-j?-??-??-??-?`-??-??;?-???-??-??-??-??-????-????-????-??-??-?"
+ "?-??-??-??-??-??-??-??-??-??-???-???-??-??????-??-??-??-???-????"
+ "???-??-???-??-???????-???????-??-???-????-???-??-???-????-??-??-"
+ "??-??-??-???????????-???-??-??-??????????????-???-??-??-??-??-??"
+ "-?????-??-??-???-??-??-??-??-??-?\ -\ ?-???K-Å?-???-????????????"
+ "?-??-??-??-??-??-??-??-??-?<->?-?????????????????-??-??-??-??-??"
+ "?-??????????????????-??-??-??-??-???-???-??-????-????-??-??-??-?"
+ "?-???-??-??-??-???-????-????-????-????-????-????-??]"*/
SKIPPABLES[C] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\u00A8\u00C0-\u00CF\u00D1-\u00D6\u00D8-\u00DD"
+ "\u00E0-\u00EF\u00F1-\u00F6\u00F8-\u00FD\u00FF-\u0103\u0106-"
+ "\u010F\u0112-\u0117\u011A-\u0121\u0124-\u0125\u0128-\u012D\u0130"
+ "\u0139-\u013A\u013D-\u013E\u0143-\u0144\u0147-\u0148\u014C-"
+ "\u0151\u0154-\u0155\u0158-\u015D\u0160-\u0161\u0164-\u0165\u0168"
+ "-\u0171\u0174-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01B7\u01CD-"
+ "\u01DC\u01DE-\u01E1\u01E6-\u01EB\u01F4-\u01F5\u01F8-\u01FB\u0200"
+ "-\u021B\u021E-\u021F\u0226-\u0233\u0292\u0300-\u034E\u0360-"
+ "\u0362\u0374\u037E\u0387\u0391\u0395\u0397\u0399\u039F\u03A1"
+ "\u03A5\u03A9\u03AC\u03AE\u03B1\u03B5\u03B7\u03B9\u03BF\u03C1"
+ "\u03C5\u03C9-\u03CB\u03CE\u03D2\u0406\u0410\u0413\u0415-\u0418"
+ "\u041A\u041E\u0423\u0427\u042B\u042D\u0430\u0433\u0435-\u0438"
+ "\u043A\u043E\u0443\u0447\u044B\u044D\u0456\u0474-\u0475\u0483-"
+ "\u0486\u04D8-\u04D9\u04E8-\u04E9\u0591-\u05A1\u05A3-\u05B9\u05BB"
+ "-\u05BD\u05BF\u05C1-\u05C2\u05C4\u0622-\u0623\u0627\u0648\u064A-"
+ "\u0655\u0670\u06C1\u06D2\u06D5-\u06DC\u06DF-\u06E4\u06E7-\u06E8"
+ "\u06EA-\u06ED\u0711\u0730-\u074A\u0928\u0930\u0933\u093C\u094D"
+ "\u0951-\u0954\u0958-\u095F\u09BC\u09BE\u09C7\u09CD\u09D7\u09DC-"
+ "\u09DD\u09DF\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E\u0ABC"
+ "\u0ACD\u0B3C\u0B3E\u0B47\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B92"
+ "\u0BBE\u0BC6-\u0BC7\u0BCD\u0BD7\u0C46\u0C4D\u0C55-\u0C56\u0CBF"
+ "\u0CC2\u0CC6\u0CCA\u0CCD\u0CD5-\u0CD6\u0D3E\u0D46-\u0D47\u0D4D"
+ "\u0D57\u0DCA\u0DCF\u0DD9\u0DDC\u0DDF\u0E38-\u0E3A\u0E48-\u0E4B"
+ "\u0EB8-\u0EB9\u0EC8-\u0ECB\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F43"
+ "\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F71-\u0F76\u0F78\u0F7A-\u0F7D"
+ "\u0F80-\u0F84\u0F86-\u0F87\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9"
+ "\u0FC6\u1025\u102E\u1037\u1039\u1100-\u1112\u1161-\u1175\u11A8-"
+ "\u11C2\u17D2\u18A9\u1E00-\u1E03\u1E0A-\u1E0F\u1E12-\u1E1B\u1E20-"
+ "\u1E27\u1E2A-\u1E41\u1E44-\u1E53\u1E58-\u1E7D\u1E80-\u1E87\u1E8E"
+ "-\u1E91\u1E96-\u1E99\u1EA0-\u1EF3\u1EF6-\u1EF9\u1F00-\u1F11"
+ "\u1F18-\u1F19\u1F20-\u1F31\u1F38-\u1F39\u1F40-\u1F41\u1F48-"
+ "\u1F49\u1F50-\u1F51\u1F59\u1F60-\u1F71\u1F73-\u1F75\u1F77\u1F79"
+ "\u1F7B-\u1F7D\u1F80-\u1F81\u1F88-\u1F89\u1F90-\u1F91\u1F98-"
+ "\u1F99\u1FA0-\u1FA1\u1FA8-\u1FA9\u1FB3\u1FB6\u1FBB-\u1FBC\u1FBE-"
+ "\u1FBF\u1FC3\u1FC6\u1FC9\u1FCB-\u1FCC\u1FD3\u1FDB\u1FE3\u1FEB"
+ "\u1FEE-\u1FEF\u1FF3\u1FF6\u1FF9\u1FFB-\u1FFE\u2000-\u2001\u20D0-"
+ "\u20DC\u20E1\u2126\u212A-\u212B\u2190\u2192\u2194\u21D0\u21D2"
+ "\u21D4\u2203\u2208\u220B\u2223\u2225\u223C\u2243\u2245\u2248"
+ "\u224D\u2261\u2264-\u2265\u2272-\u2273\u2276-\u2277\u227A-\u227D"
+ "\u2282-\u2283\u2286-\u2287\u2291-\u2292\u22A2\u22A8-\u22A9\u22AB"
+ "\u22B2-\u22B5\u2329-\u232A\u302A-\u302F\u3046\u304B\u304D\u304F"
+ "\u3051\u3053\u3055\u3057\u3059\u305B\u305D\u305F\u3061\u3064"
+ "\u3066\u3068\u306F\u3072\u3075\u3078\u307B\u3099-\u309A\u309D"
+ "\u30A6\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB"
+ "\u30BD\u30BF\u30C1\u30C4\u30C6\u30C8\u30CF\u30D2\u30D5\u30D8"
+ "\u30DB\u30EF-\u30F2\u30FD\uAC00\uAC1C\uAC38\uAC54\uAC70\uAC8C"
+ "\uACA8\uACC4\uACE0\uACFC\uAD18\uAD34\uAD50\uAD6C\uAD88\uADA4"
+ "\uADC0\uADDC\uADF8\uAE14\uAE30\uAE4C\uAE68\uAE84\uAEA0\uAEBC"
+ "\uAED8\uAEF4\uAF10\uAF2C\uAF48\uAF64\uAF80\uAF9C\uAFB8\uAFD4"
+ "\uAFF0\uB00C\uB028\uB044\uB060\uB07C\uB098\uB0B4\uB0D0\uB0EC"
+ "\uB108\uB124\uB140\uB15C\uB178\uB194\uB1B0\uB1CC\uB1E8\uB204"
+ "\uB220\uB23C\uB258\uB274\uB290\uB2AC\uB2C8\uB2E4\uB300\uB31C"
+ "\uB338\uB354\uB370\uB38C\uB3A8\uB3C4\uB3E0\uB3FC\uB418\uB434"
+ "\uB450\uB46C\uB488\uB4A4\uB4C0\uB4DC\uB4F8\uB514\uB530\uB54C"
+ "\uB568\uB584\uB5A0\uB5BC\uB5D8\uB5F4\uB610\uB62C\uB648\uB664"
+ "\uB680\uB69C\uB6B8\uB6D4\uB6F0\uB70C\uB728\uB744\uB760\uB77C"
+ "\uB798\uB7B4\uB7D0\uB7EC\uB808\uB824\uB840\uB85C\uB878\uB894"
+ "\uB8B0\uB8CC\uB8E8\uB904\uB920\uB93C\uB958\uB974\uB990\uB9AC"
+ "\uB9C8\uB9E4\uBA00\uBA1C\uBA38\uBA54\uBA70\uBA8C\uBAA8\uBAC4"
+ "\uBAE0\uBAFC\uBB18\uBB34\uBB50\uBB6C\uBB88\uBBA4\uBBC0\uBBDC"
+ "\uBBF8\uBC14\uBC30\uBC4C\uBC68\uBC84\uBCA0\uBCBC\uBCD8\uBCF4"
+ "\uBD10\uBD2C\uBD48\uBD64\uBD80\uBD9C\uBDB8\uBDD4\uBDF0\uBE0C"
+ "\uBE28\uBE44\uBE60\uBE7C\uBE98\uBEB4\uBED0\uBEEC\uBF08\uBF24"
+ "\uBF40\uBF5C\uBF78\uBF94\uBFB0\uBFCC\uBFE8\uC004\uC020\uC03C"
+ "\uC058\uC074\uC090\uC0AC\uC0C8\uC0E4\uC100\uC11C\uC138\uC154"
+ "\uC170\uC18C\uC1A8\uC1C4\uC1E0\uC1FC\uC218\uC234\uC250\uC26C"
+ "\uC288\uC2A4\uC2C0\uC2DC\uC2F8\uC314\uC330\uC34C\uC368\uC384"
+ "\uC3A0\uC3BC\uC3D8\uC3F4\uC410\uC42C\uC448\uC464\uC480\uC49C"
+ "\uC4B8\uC4D4\uC4F0\uC50C\uC528\uC544\uC560\uC57C\uC598\uC5B4"
+ "\uC5D0\uC5EC\uC608\uC624\uC640\uC65C\uC678\uC694\uC6B0\uC6CC"
+ "\uC6E8\uC704\uC720\uC73C\uC758\uC774\uC790\uC7AC\uC7C8\uC7E4"
+ "\uC800\uC81C\uC838\uC854\uC870\uC88C\uC8A8\uC8C4\uC8E0\uC8FC"
+ "\uC918\uC934\uC950\uC96C\uC988\uC9A4\uC9C0\uC9DC\uC9F8\uCA14"
+ "\uCA30\uCA4C\uCA68\uCA84\uCAA0\uCABC\uCAD8\uCAF4\uCB10\uCB2C"
+ "\uCB48\uCB64\uCB80\uCB9C\uCBB8\uCBD4\uCBF0\uCC0C\uCC28\uCC44"
+ "\uCC60\uCC7C\uCC98\uCCB4\uCCD0\uCCEC\uCD08\uCD24\uCD40\uCD5C"
+ "\uCD78\uCD94\uCDB0\uCDCC\uCDE8\uCE04\uCE20\uCE3C\uCE58\uCE74"
+ "\uCE90\uCEAC\uCEC8\uCEE4\uCF00\uCF1C\uCF38\uCF54\uCF70\uCF8C"
+ "\uCFA8\uCFC4\uCFE0\uCFFC\uD018\uD034\uD050\uD06C\uD088\uD0A4"
+ "\uD0C0\uD0DC\uD0F8\uD114\uD130\uD14C\uD168\uD184\uD1A0\uD1BC"
+ "\uD1D8\uD1F4\uD210\uD22C\uD248\uD264\uD280\uD29C\uD2B8\uD2D4"
+ "\uD2F0\uD30C\uD328\uD344\uD360\uD37C\uD398\uD3B4\uD3D0\uD3EC"
+ "\uD408\uD424\uD440\uD45C\uD478\uD494\uD4B0\uD4CC\uD4E8\uD504"
+ "\uD520\uD53C\uD558\uD574\uD590\uD5AC\uD5C8\uD5E4\uD600\uD61C"
+ "\uD638\uD654\uD670\uD68C\uD6A8\uD6C4\uD6E0\uD6FC\uD718\uD734"
+ "\uD750\uD76C\uD788\uF900-\uFA0D\uFA10\uFA12\uFA15-\uFA1E\uFA20"
+ "\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB1D-\uFB1F\uFB2A-\uFB36\uFB38"
+ "-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-\uFB44\uFB46-\uFB4E\uFE20-"
+ "\uFE23\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
+ "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-"
+ "\\U0001D1C0\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^<->A-PR-Za-pr-z¨À-ÏÑ-ÖØ-Ýà-ïñ-öø-ýÿ-aC-dE-eE-gH-hI-iIL-lL-lN-n"
+ "N-nO-oR-rR-sŠ-šT-tU-uW-?O-oU-u?A-uA-?G-o?-??-??-??-??-??`-??-??;"
+ "????????O??ae??????-???????-??????????-?????????-??-??-??-??-??-"
+ "??-???-???-????-?????-??-??-??-???-???????-??-???????-???????-??"
+ "???????-??-????-??????-???????-???-?????????-??-??-??-??-???????"
+ "????-???-??-??-?????????????-??-??-????-??-??-??-??-??-??-??-??-"
+ "??-??-??-??-??-??-??-??-??-??-???-??-????-??-??-??-??-??-??-????"
+ "-??-?????-??????-?????-?\ -\ ?-???K-Å?????????|?~??˜?==-=?-??-??"
+ "-??-??-??-???-???-?<->?-???????????????????????-????????????????"
+ "????????-???????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "???????????????????????????-????-????-??-??-??-??-???-??-??-??-?"
+ "??-????-????-????-????-????-????-??]"*/
SKIPPABLES[KD] = new UnicodeSet(
"[^\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-\u00BA\u00BC-"
+ "\u00BE\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0"
+ "-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F"
+ "\u0112-\u0125\u0128-\u0130\u0132-\u0137\u0139-\u0140\u0143-"
+ "\u0149\u014C-\u0151\u0154-\u0165\u0168-\u017F\u01A0-\u01A1\u01AF"
+ "-\u01B0\u01C4-\u01DC\u01DE-\u01E3\u01E6-\u01F5\u01F8-\u021B"
+ "\u021E-\u021F\u0226-\u0233\u02B0-\u02B8\u02D8-\u02DD\u02E0-"
+ "\u02E4\u0300-\u034E\u0360-\u0362\u0374\u037A\u037E\u0384-\u038A"
+ "\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D0-\u03D6\u03F0"
+ "-\u03F2\u03F4-\u03F5\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419"
+ "\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u0483-"
+ "\u0486\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2"
+ "-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u0587\u0591-\u05A1\u05A3-"
+ "\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u0622-\u0626\u064B-"
+ "\u0655\u0670\u0675-\u0678\u06C0\u06C2\u06D3\u06D6-\u06DC\u06DF-"
+ "\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u0929\u0931"
+ "\u0934\u093C\u094D\u0951-\u0954\u0958-\u095F\u09BC\u09CB-\u09CD"
+ "\u09DC-\u09DD\u09DF\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E"
+ "\u0ABC\u0ACD\u0B3C\u0B48\u0B4B-\u0B4D\u0B5C-\u0B5D\u0B94\u0BCA-"
+ "\u0BCD\u0C48\u0C4D\u0C55-\u0C56\u0CC0\u0CC7-\u0CC8\u0CCA-\u0CCB"
+ "\u0CCD\u0D4A-\u0D4D\u0DCA\u0DDA\u0DDC-\u0DDE\u0E33\u0E38-\u0E3A"
+ "\u0E48-\u0E4B\u0EB3\u0EB8-\u0EB9\u0EC8-\u0ECB\u0EDC-\u0EDD\u0F0C"
+ "\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F43\u0F4D\u0F52\u0F57\u0F5C"
+ "\u0F69\u0F71-\u0F7D\u0F80-\u0F84\u0F86-\u0F87\u0F93\u0F9D\u0FA2"
+ "\u0FA7\u0FAC\u0FB9\u0FC6\u1026\u1037\u1039\u17D2\u18A9\u1E00-"
+ "\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48"
+ "-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4"
+ "\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-"
+ "\u1FF4\u1FF6-\u1FFE\u2000-\u200A\u2011\u2017\u2024-\u2026\u202F"
+ "\u2033-\u2034\u2036-\u2037\u203C\u203E\u2048-\u2049\u2070\u2074-"
+ "\u208E\u20A8\u20D0-\u20DC\u20E1\u2100-\u2103\u2105-\u2107\u2109-"
+ "\u2113\u2115-\u2116\u2119-\u211D\u2120-\u2122\u2124\u2126\u2128"
+ "\u212A-\u212D\u212F-\u2131\u2133-\u2139\u2153-\u217F\u219A-"
+ "\u219B\u21AE\u21CD-\u21CF\u2204\u2209\u220C\u2224\u2226\u222C-"
+ "\u222D\u222F-\u2230\u2241\u2244\u2247\u2249\u2260\u2262\u226D-"
+ "\u2271\u2274-\u2275\u2278-\u2279\u2280-\u2281\u2284-\u2285\u2288"
+ "-\u2289\u22AC-\u22AF\u22E0-\u22E3\u22EA-\u22ED\u2329-\u232A"
+ "\u2460-\u24EA\u2E9F\u2EF3\u2F00-\u2FD5\u3000\u302A-\u302F\u3036"
+ "\u3038-\u303A\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A"
+ "\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-"
+ "\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309C"
+ "\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC"
+ "\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0-\u30D1\u30D3-\u30D4"
+ "\u30D6-\u30D7\u30D9-\u30DA\u30DC-\u30DD\u30F4\u30F7-\u30FA\u30FE"
+ "\u3131-\u318E\u3192-\u319F\u3200-\u321C\u3220-\u3243\u3260-"
+ "\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B"
+ "-\u33DD\u33E0-\u33FE\uAC00-\uD7A3\uF900-\uFA0D\uFA10\uFA12\uFA15"
+ "-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06\uFB13"
+ "-\uFB17\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43-"
+ "\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0"
+ "-\uFDFB\uFE20-\uFE23\uFE30-\uFE44\uFE49-\uFE52\uFE54-\uFE66"
+ "\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E\uFF61"
+ "-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC"
+ "\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001"
+ "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D"
+ "1AD\\U0001D1BB-\\U0001D1C0\\U0001D400-\\U0001D454\\U0001D456-\\U0001D4"
+ "9C\\U0001D49E-\\U0001D49F\\U0001D4A2\\U0001D4A5-\\U0001D4A6\\U0001D4A9"
+ "-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C0"
+ "\\U0001D4C2-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A"
+ "\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539"
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-"
+ "\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9\\U0001D7CE-"
+ "\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^ ¨ª¯²-µ¸-º¼-¾À-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-dE-hI-I?-kL-?N-?O-oR-tU"
+ "-?O-oU-u?-uA-?G-??-??-??-??-??-??-?`-??-???;?-???-??-??-??-??-??"
+ "-??-????-????-????-??-??-??-??-??-??-??-??-??-???-??-??-???-???-"
+ "??-???-?????-??-??-??-???-???????-??-???-??-???????-???????-??-?"
+ "??-????-???-??-???-????-???-??-???-??-??-???-???????????-??-??-?"
+ "?????????????-??-??-??-??-??-??-?????-??-??-??-??-??-??-??-?\ -"
+ "\?-=·-…??-??-????-?°4-???-???-??-E?-lN-?P-R?-™Z?ZK-Ce-FM-??-??-?"
+ "??-???????-??-????????-??-??-??-??-??-??-??-??-?<->?-????-?"
+ "\ ?-???-?????????????????-??-??-??-??-???-??????????????????-??-"
+ "??-??-??-???-???-??-??-??-??-??-??-??-??-??-??-??-??-????-????-?"
+ "?-??-??-??-??-???-??-??-??-??-??-??-??-??-??-??-??-??-???-?!-~?-"
+ "??-??-??-??-??-??-???-????-????-????-????-????-????-????-????-??"
+ "????-????-????-??????-????-????-????-????-????-????-????-????-??"
+ "????-????-????-????-????-??]"*/
SKIPPABLES[KC] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\u00A0\u00A8\u00AA\u00AF\u00B2-\u00B5\u00B8-"
+ "\u00BA\u00BC-\u00BE\u00C0-\u00CF\u00D1-\u00D6\u00D8-\u00DD\u00E0"
+ "-\u00EF\u00F1-\u00F6\u00F8-\u00FD\u00FF-\u0103\u0106-\u010F"
+ "\u0112-\u0117\u011A-\u0121\u0124-\u0125\u0128-\u012D\u0130\u0132"
+ "-\u0133\u0139-\u013A\u013D-\u0140\u0143-\u0144\u0147-\u0149"
+ "\u014C-\u0151\u0154-\u0155\u0158-\u015D\u0160-\u0161\u0164-"
+ "\u0165\u0168-\u0171\u0174-\u017F\u01A0-\u01A1\u01AF-\u01B0\u01B7"
+ "\u01C4-\u01DC\u01DE-\u01E1\u01E6-\u01EB\u01F1-\u01F5\u01F8-"
+ "\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0292\u02B0-\u02B8"
+ "\u02D8-\u02DD\u02E0-\u02E4\u0300-\u034E\u0360-\u0362\u0374\u037A"
+ "\u037E\u0384-\u0385\u0387\u0391\u0395\u0397\u0399\u039F\u03A1"
+ "\u03A5\u03A9\u03AC\u03AE\u03B1\u03B5\u03B7\u03B9\u03BF\u03C1"
+ "\u03C5\u03C9-\u03CB\u03CE\u03D0-\u03D6\u03F0-\u03F2\u03F4-\u03F5"
+ "\u0406\u0410\u0413\u0415-\u0418\u041A\u041E\u0423\u0427\u042B"
+ "\u042D\u0430\u0433\u0435-\u0438\u043A\u043E\u0443\u0447\u044B"
+ "\u044D\u0456\u0474-\u0475\u0483-\u0486\u04D8-\u04D9\u04E8-\u04E9"
+ "\u0587\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2"
+ "\u05C4\u0622-\u0623\u0627\u0648\u064A-\u0655\u0670\u0675-\u0678"
+ "\u06C1\u06D2\u06D5-\u06DC\u06DF-\u06E4\u06E7-\u06E8\u06EA-\u06ED"
+ "\u0711\u0730-\u074A\u0928\u0930\u0933\u093C\u094D\u0951-\u0954"
+ "\u0958-\u095F\u09BC\u09BE\u09C7\u09CD\u09D7\u09DC-\u09DD\u09DF"
+ "\u0A33\u0A36\u0A3C\u0A4D\u0A59-\u0A5B\u0A5E\u0ABC\u0ACD\u0B3C"
+ "\u0B3E\u0B47\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B92\u0BBE\u0BC6-"
+ "\u0BC7\u0BCD\u0BD7\u0C46\u0C4D\u0C55-\u0C56\u0CBF\u0CC2\u0CC6"
+ "\u0CCA\u0CCD\u0CD5-\u0CD6\u0D3E\u0D46-\u0D47\u0D4D\u0D57\u0DCA"
+ "\u0DCF\u0DD9\u0DDC\u0DDF\u0E33\u0E38-\u0E3A\u0E48-\u0E4B\u0EB3"
+ "\u0EB8-\u0EB9\u0EC8-\u0ECB\u0EDC-\u0EDD\u0F0C\u0F18-\u0F19\u0F35"
+ "\u0F37\u0F39\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69\u0F71-\u0F7D"
+ "\u0F80-\u0F84\u0F86-\u0F87\u0F93\u0F9D\u0FA2\u0FA7\u0FAC\u0FB9"
+ "\u0FC6\u1025\u102E\u1037\u1039\u1100-\u1112\u1161-\u1175\u11A8-"
+ "\u11C2\u17D2\u18A9\u1E00-\u1E03\u1E0A-\u1E0F\u1E12-\u1E1B\u1E20-"
+ "\u1E27\u1E2A-\u1E41\u1E44-\u1E53\u1E58-\u1E7D\u1E80-\u1E87\u1E8E"
+ "-\u1E91\u1E96-\u1E9B\u1EA0-\u1EF3\u1EF6-\u1EF9\u1F00-\u1F11"
+ "\u1F18-\u1F19\u1F20-\u1F31\u1F38-\u1F39\u1F40-\u1F41\u1F48-"
+ "\u1F49\u1F50-\u1F51\u1F59\u1F60-\u1F71\u1F73-\u1F75\u1F77\u1F79"
+ "\u1F7B-\u1F7D\u1F80-\u1F81\u1F88-\u1F89\u1F90-\u1F91\u1F98-"
+ "\u1F99\u1FA0-\u1FA1\u1FA8-\u1FA9\u1FB3\u1FB6\u1FBB-\u1FC1\u1FC3"
+ "\u1FC6\u1FC9\u1FCB-\u1FCF\u1FD3\u1FDB\u1FDD-\u1FDF\u1FE3\u1FEB"
+ "\u1FED-\u1FEF\u1FF3\u1FF6\u1FF9\u1FFB-\u1FFE\u2000-\u200A\u2011"
+ "\u2017\u2024-\u2026\u202F\u2033-\u2034\u2036-\u2037\u203C\u203E"
+ "\u2048-\u2049\u2070\u2074-\u208E\u20A8\u20D0-\u20DC\u20E1\u2100-"
+ "\u2103\u2105-\u2107\u2109-\u2113\u2115-\u2116\u2119-\u211D\u2120"
+ "-\u2122\u2124\u2126\u2128\u212A-\u212D\u212F-\u2131\u2133-\u2139"
+ "\u2153-\u217F\u2190\u2192\u2194\u21D0\u21D2\u21D4\u2203\u2208"
+ "\u220B\u2223\u2225\u222C-\u222D\u222F-\u2230\u223C\u2243\u2245"
+ "\u2248\u224D\u2261\u2264-\u2265\u2272-\u2273\u2276-\u2277\u227A-"
+ "\u227D\u2282-\u2283\u2286-\u2287\u2291-\u2292\u22A2\u22A8-\u22A9"
+ "\u22AB\u22B2-\u22B5\u2329-\u232A\u2460-\u24EA\u2E9F\u2EF3\u2F00-"
+ "\u2FD5\u3000\u302A-\u302F\u3036\u3038-\u303A\u3046\u304B\u304D"
+ "\u304F\u3051\u3053\u3055\u3057\u3059\u305B\u305D\u305F\u3061"
+ "\u3064\u3066\u3068\u306F\u3072\u3075\u3078\u307B\u3099-\u309D"
+ "\u30A6\u30AB\u30AD\u30AF\u30B1\u30B3\u30B5\u30B7\u30B9\u30BB"
+ "\u30BD\u30BF\u30C1\u30C4\u30C6\u30C8\u30CF\u30D2\u30D5\u30D8"
+ "\u30DB\u30EF-\u30F2\u30FD\u3131-\u318E\u3192-\u319F\u3200-\u321C"
+ "\u3220-\u3243\u3260-\u327B\u3280-\u32B0\u32C0-\u32CB\u32D0-"
+ "\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uAC00\uAC1C\uAC38"
+ "\uAC54\uAC70\uAC8C\uACA8\uACC4\uACE0\uACFC\uAD18\uAD34\uAD50"
+ "\uAD6C\uAD88\uADA4\uADC0\uADDC\uADF8\uAE14\uAE30\uAE4C\uAE68"
+ "\uAE84\uAEA0\uAEBC\uAED8\uAEF4\uAF10\uAF2C\uAF48\uAF64\uAF80"
+ "\uAF9C\uAFB8\uAFD4\uAFF0\uB00C\uB028\uB044\uB060\uB07C\uB098"
+ "\uB0B4\uB0D0\uB0EC\uB108\uB124\uB140\uB15C\uB178\uB194\uB1B0"
+ "\uB1CC\uB1E8\uB204\uB220\uB23C\uB258\uB274\uB290\uB2AC\uB2C8"
+ "\uB2E4\uB300\uB31C\uB338\uB354\uB370\uB38C\uB3A8\uB3C4\uB3E0"
+ "\uB3FC\uB418\uB434\uB450\uB46C\uB488\uB4A4\uB4C0\uB4DC\uB4F8"
+ "\uB514\uB530\uB54C\uB568\uB584\uB5A0\uB5BC\uB5D8\uB5F4\uB610"
+ "\uB62C\uB648\uB664\uB680\uB69C\uB6B8\uB6D4\uB6F0\uB70C\uB728"
+ "\uB744\uB760\uB77C\uB798\uB7B4\uB7D0\uB7EC\uB808\uB824\uB840"
+ "\uB85C\uB878\uB894\uB8B0\uB8CC\uB8E8\uB904\uB920\uB93C\uB958"
+ "\uB974\uB990\uB9AC\uB9C8\uB9E4\uBA00\uBA1C\uBA38\uBA54\uBA70"
+ "\uBA8C\uBAA8\uBAC4\uBAE0\uBAFC\uBB18\uBB34\uBB50\uBB6C\uBB88"
+ "\uBBA4\uBBC0\uBBDC\uBBF8\uBC14\uBC30\uBC4C\uBC68\uBC84\uBCA0"
+ "\uBCBC\uBCD8\uBCF4\uBD10\uBD2C\uBD48\uBD64\uBD80\uBD9C\uBDB8"
+ "\uBDD4\uBDF0\uBE0C\uBE28\uBE44\uBE60\uBE7C\uBE98\uBEB4\uBED0"
+ "\uBEEC\uBF08\uBF24\uBF40\uBF5C\uBF78\uBF94\uBFB0\uBFCC\uBFE8"
+ "\uC004\uC020\uC03C\uC058\uC074\uC090\uC0AC\uC0C8\uC0E4\uC100"
+ "\uC11C\uC138\uC154\uC170\uC18C\uC1A8\uC1C4\uC1E0\uC1FC\uC218"
+ "\uC234\uC250\uC26C\uC288\uC2A4\uC2C0\uC2DC\uC2F8\uC314\uC330"
+ "\uC34C\uC368\uC384\uC3A0\uC3BC\uC3D8\uC3F4\uC410\uC42C\uC448"
+ "\uC464\uC480\uC49C\uC4B8\uC4D4\uC4F0\uC50C\uC528\uC544\uC560"
+ "\uC57C\uC598\uC5B4\uC5D0\uC5EC\uC608\uC624\uC640\uC65C\uC678"
+ "\uC694\uC6B0\uC6CC\uC6E8\uC704\uC720\uC73C\uC758\uC774\uC790"
+ "\uC7AC\uC7C8\uC7E4\uC800\uC81C\uC838\uC854\uC870\uC88C\uC8A8"
+ "\uC8C4\uC8E0\uC8FC\uC918\uC934\uC950\uC96C\uC988\uC9A4\uC9C0"
+ "\uC9DC\uC9F8\uCA14\uCA30\uCA4C\uCA68\uCA84\uCAA0\uCABC\uCAD8"
+ "\uCAF4\uCB10\uCB2C\uCB48\uCB64\uCB80\uCB9C\uCBB8\uCBD4\uCBF0"
+ "\uCC0C\uCC28\uCC44\uCC60\uCC7C\uCC98\uCCB4\uCCD0\uCCEC\uCD08"
+ "\uCD24\uCD40\uCD5C\uCD78\uCD94\uCDB0\uCDCC\uCDE8\uCE04\uCE20"
+ "\uCE3C\uCE58\uCE74\uCE90\uCEAC\uCEC8\uCEE4\uCF00\uCF1C\uCF38"
+ "\uCF54\uCF70\uCF8C\uCFA8\uCFC4\uCFE0\uCFFC\uD018\uD034\uD050"
+ "\uD06C\uD088\uD0A4\uD0C0\uD0DC\uD0F8\uD114\uD130\uD14C\uD168"
+ "\uD184\uD1A0\uD1BC\uD1D8\uD1F4\uD210\uD22C\uD248\uD264\uD280"
+ "\uD29C\uD2B8\uD2D4\uD2F0\uD30C\uD328\uD344\uD360\uD37C\uD398"
+ "\uD3B4\uD3D0\uD3EC\uD408\uD424\uD440\uD45C\uD478\uD494\uD4B0"
+ "\uD4CC\uD4E8\uD504\uD520\uD53C\uD558\uD574\uD590\uD5AC\uD5C8"
+ "\uD5E4\uD600\uD61C\uD638\uD654\uD670\uD68C\uD6A8\uD6C4\uD6E0"
+ "\uD6FC\uD718\uD734\uD750\uD76C\uD788\uF900-\uFA0D\uFA10\uFA12"
+ "\uFA15-\uFA1E\uFA20\uFA22\uFA25-\uFA26\uFA2A-\uFA2D\uFB00-\uFB06"
+ "\uFB13-\uFB17\uFB1D-\uFB36\uFB38-\uFB3C\uFB3E\uFB40-\uFB41\uFB43"
+ "-\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7"
+ "\uFDF0-\uFDFB\uFE20-\uFE23\uFE30-\uFE44\uFE49-\uFE52\uFE54-"
+ "\uFE66\uFE68-\uFE6B\uFE70-\uFE72\uFE74\uFE76-\uFEFC\uFF01-\uFF5E"
+ "\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-"
+ "\uFFDC\uFFE0-\uFFE6\uFFE8-\uFFEE\\U0001D15E-\\U0001D169\\U0001D16D-"
+ "\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-"
+ "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D400-\\U0001D454\\U0001D456-"
+ "\\U0001D49C\\U0001D49E-\\U0001D49F\\U0001D4A2\\U0001D4A5-\\U0001D4A6"
+ "\\U0001D4A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-"
+ "\\U0001D4C0\\U0001D4C2-\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-"
+ "\\U0001D50A\\U0001D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-"
+ "\\U0001D539\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546"
+ "\\U0001D54A-\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9"
+ "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
/*Unicode:
"[^<->A-PR-Za-pr-z ¨ª¯²-µ¸-º¼-¾À-ÏÑ-ÖØ-Ýà-ïñ-öø-ýÿ-aC-dE-eE-gH-hI"
+ "-iI?-?L-lL-?N-nN-?O-oR-rR-sŠ-šT-tU-uW-?O-oU-u??-uA-?G-o?-??-??-?"
+ "?-??-???-??-??-?`-??-???;?-?????????O??ae??????-???-??-??-?????-"
+ "??????????-?????????-??-??-??-???-??-??-???-???-????-???-????-??"
+ "-??-??-???-???????-??-???????-???????-?????????-??-????-??????-?"
+ "??????-???-??????????-??-???-??-??-???-???????????-??-??-???????"
+ "??????-??-??-????-??-??-??-??-??-??-??-??-??-??-??-??-??-??-??-?"
+ "?-??-??-???-??-????-??-??-??-??-??-??-????-?????-????-????-?????"
+ "-?\ -\?-=·-…??-??-????-?°4-???-???-??-E?-lN-?P-R?-™Z?ZK-Ce-FM-??"
+ "-??????????|??-??-?~??˜?==-=?-??-??-??-??-??-???-???-?<->?-????-"
+ "?\ ?-???-???????????????????????-???????????????????????-???-??-"
+ "??-??-??-??-??-??-??-??-??-?????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????????????????????????"
+ "????????????????????????????????????????????-????-????-??-??-??-"
+ "??-??-???-??-??-??-??-??-??-??-??-??-??-??-??-???-?!-~?-??-??-??"
+ "-??-??-??-???-????-????-????-????-????-????-????-????-??????-???"
+ "?-????-??????-????-????-????-????-????-????-????-????-??????-???"
+ "?-????-????-????-??]"*/
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
* $Date: 2001/12/01 21:46:25 $
* $Revision: 1.50 $
* $Date: 2001/12/03 02:10:24 $
* $Revision: 1.51 $
*
*****************************************************************************************
*/
@ -204,7 +204,7 @@ import com.ibm.util.Utility;
* Unicode property
* </table>
* @author Alan Liu
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.50 $ $Date: 2001/12/01 21:46:25 $
* @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.51 $ $Date: 2001/12/03 02:10:24 $
*/
public class UnicodeSet extends UnicodeFilter {
@ -905,6 +905,33 @@ public class UnicodeSet extends UnicodeFilter {
}
return true;
}
// TODO: Make this public
/**
* Some character in s is in the set.
*/
boolean containsSome(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
cp = UTF16.charAt(s, i);
if (contains(cp)) return true;
}
return false;
}
// TODO: Make this public
/**
* Every character in s is in the set.
*/
boolean containsAll(String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
cp = UTF16.charAt(s, i);
if (!contains(cp)) return false;
}
return true;
}
/**
* Adds all of the elements in the specified set to this set if

View File

@ -1,6 +1,6 @@
#--------------------------------------------------------------------
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:57 2001
#--------------------------------------------------------------------
@ -8,263 +8,266 @@
# Fullwidth-Halfwidth
# Mechanically generated from Unicode Character Database
# IDEOGRAPHIC SPACE then added, and
# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
# multicharacter
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
# single character
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>' '̄; # from FULLWIDTH MACRON
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>'¯'; # from FULLWIDTH MACRON
' '<>' '; # ideographic space (place this after MACRON)
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
←<>←; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
→<>→; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
←<>←; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
→<>→; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
# eof

View File

@ -3,8 +3,8 @@
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/Transliterator_Latin_Katakana.txt,v $
# $Date: 2001/12/01 00:51:28 $
# $Revision: 1.18 $
# $Date: 2001/12/03 02:10:26 $
# $Revision: 1.19 $
#--------------------------------------------------------------------
# note: a global filter is more efficient, but MUST include all source chars
@ -13,7 +13,7 @@
### WARNING -- must add width filter, both here and below!!! ###
:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;
:: fullwidth-halfwidth ();
:: [:Latin:] fullwidth-halfwidth ();
:: NFD (NFC);
:: Lower (); # whenever transliterating from cased to uncased script, include this
# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
@ -489,7 +489,7 @@ x > | ks ;
# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
:: NFC (NFD) ;
:: (halfwidth-fullwidth);
:: ([:Katakana:] halfwidth-fullwidth);
# note: a global filter is more efficient, but MUST include all source chars!!
#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);