ICU-20893 Unicode 13 data 2020feb19

This commit is contained in:
Markus Scherer 2020-02-19 15:17:30 -08:00
parent d95621c57f
commit af9ef2650b
31 changed files with 2278 additions and 2221 deletions

View File

@ -323,7 +323,7 @@ static const uint16_t ubidi_props_trieIndex[12536]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,
0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x81,0x41,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
0x81,0x41,0x81,0x81,0x81,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,

View File

@ -304,7 +304,7 @@ static const uint16_t ucase_props_trieIndex[12356]={
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
0x92,0xff91,0x92,0xff91,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,
0,4,0,0,0,0,0,0,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
0,4,0,0,0,0,0,4,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0x1719,1,0,0,0,
0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x44,0x64,0x64,

File diff suppressed because it is too large Load Diff

View File

@ -27,10 +27,12 @@
# Character Class Definitions.
#
$Han = [:Han:];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline} ];
$Extend = [\p{Word_Break = Extend}];
$Newline = [\p{Word_Break = Newline}];
$Extend = [\p{Word_Break = Extend}-$Han];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [\p{Word_Break = Format}];
@ -42,12 +44,11 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$WSegSpace = [\p{Word_Break = WSegSpace}];
$Extended_Pict = [\p{Extended_Pictographic}];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
$Ideographic = [\p{Ideographic}];

View File

@ -27,10 +27,12 @@
# Character Class Definitions.
#
$Han = [:Han:];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline} ];
$Extend = [\p{Word_Break = Extend}];
$Newline = [\p{Word_Break = Newline}];
$Extend = [\p{Word_Break = Extend}-$Han];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [\p{Word_Break = Format}];
@ -42,12 +44,11 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
$MidLetter = [\p{Word_Break = MidLetter} - [\:]];
$MidNum = [\p{Word_Break = MidNum} [.]];
$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$WSegSpace = [\p{Word_Break = WSegSpace}];
$Extended_Pict = [\p{Extended_Pictographic}];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
$Ideographic = [\p{Ideographic}];

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,6 +1,6 @@
# DerivedCoreProperties-13.0.0.txt
# Date: 2019-10-21, 14:30:30 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-01-22, 00:07:19 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -2873,6 +2873,7 @@ FF41..FF5A ; Cased
0483..0487 ; Case_Ignorable
0488..0489 ; Case_Ignorable
0559 ; Case_Ignorable
055F ; Case_Ignorable
0591..05BD ; Case_Ignorable
05BF ; Case_Ignorable
05C1..05C2 ; Case_Ignorable
@ -3303,7 +3304,7 @@ E0001 ; Case_Ignorable
E0020..E007F ; Case_Ignorable
E0100..E01EF ; Case_Ignorable
# Total code points: 2412
# Total code points: 2413
# ================================================

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# UCA_Rules_SHORT.txt
# Date: 2019-11-08, 22:14:11 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:33 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -19518,6 +19518,7 @@
< ꦱ
< ꦲ
< ꦴ
<<< ꦵ
< ꦼ
< ꦶ
< ꦷ
@ -19526,7 +19527,6 @@
< ꦽ
< ꦺ
< ꦻ
< ꦵ
< ꧀
< ᢀ
< ᢁ

View File

@ -62,7 +62,7 @@ https://unicode-org.atlassian.net/browse/ICU-20893
* Command-line environment setup
UNICODE_DATA=~/unidata/uni13/20191106
UNICODE_DATA=~/unidata/uni13/20200212
CLDR_SRC=~/cldr/uni/src
ICU_ROOT=~/icu/uni
ICU_SRC=$ICU_ROOT/src
@ -89,9 +89,12 @@ export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
- download Unicode files into $UNICODE_DATA
+ subfolders: emoji, idna, security, ucd, uca
+ inside ucd: extract Unihan.zip to "here" (.../ucd/Unihan/*.txt), delete Unihan.zip
+ split Unihan into single-property files
~/unitools/trunk/src$ py/splitunihan.py $UNICODE_DATA/ucd/Unihan
+ get GraphemeBreakTest-cldr.txt from $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt
or from the ucd/cldr/ output folder of the Unicode Tools:
Since Unicode 12/CLDR 35/ICU 64 CLDR uses modified break rules.
cp $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt icu4c/source/test/testdata
* for manual diffs and for Unicode Tools input data updates:
remove version suffixes from the file names
@ -155,7 +158,7 @@ export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
$ICU_ROOT/dbg/icu4c$ echo;echo; date; make -j7 install &> out.txt ; tail -n 30 out.txt ; date
* update spoof checker UnicodeSet initializers:
inclusionPat & recommendedPat in uspoof.cpp
inclusionPat & recommendedPat in i18n/uspoof.cpp
INCLUSION & RECOMMENDED in SpoofChecker.java
- make sure that the Unicode Tools tree contains the latest security data files
- go to Unicode Tools org.unicode.text.tools.RecommendedSetGenerator

View File

@ -1,6 +1,6 @@
# confusables.txt
# Date: 2019-10-22, 13:05:29 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-13, 01:38:49 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -1358,6 +1358,10 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
266A ; 1D158 1D165 1D16E ; MA #* ( ♪ → 𝅘𝅥𝅮 ) EIGHTH NOTE → MUSICAL SYMBOL NOTEHEAD BLACK, MUSICAL SYMBOL COMBINING STEM, MUSICAL SYMBOL COMBINING FLAG-1 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
02D9 ; 0971 ; MA #* ( ˙ → ॱ ) DOT ABOVE → DEVANAGARI SIGN HIGH SPACING DOT #
0D4E ; 0971 ; MA # ( ൎ → ॱ ) MALAYALAM LETTER DOT REPH → DEVANAGARI SIGN HIGH SPACING DOT # →˙→
@ -1418,13 +1422,13 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO
1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO #
1D7EE ; 0032 ; MA # ( 𝟮 → 2 ) MATHEMATICAL SANS-SERIF BOLD DIGIT TWO → DIGIT TWO #
1D7F8 ; 0032 ; MA # ( 𝟸 → 2 ) MATHEMATICAL MONOSPACE DIGIT TWO → DIGIT TWO #
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A75A ; 0032 ; MA # ( → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
01A7 ; 0032 ; MA # ( Ƨ → 2 ) LATIN CAPITAL LETTER TONE TWO → DIGIT TWO #
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
A644 ; 0032 ; MA # ( → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
14BF ; 0032 ; MA # ( → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
A6EF ; 0032 ; MA # ( → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
06F2 ; 0662 ; MA # ( ۲ → ‎٢‎ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
@ -1491,6 +1495,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
1D7EF ; 0033 ; MA # ( 𝟯 → 3 ) MATHEMATICAL SANS-SERIF BOLD DIGIT THREE → DIGIT THREE #
1D7F9 ; 0033 ; MA # ( 𝟹 → 3 ) MATHEMATICAL MONOSPACE DIGIT THREE → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
A7AB ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER REVERSED OPEN E → DIGIT THREE #
021C ; 0033 ; MA # ( Ȝ → 3 ) LATIN CAPITAL LETTER YOGH → DIGIT THREE # →Ʒ→
01B7 ; 0033 ; MA # ( Ʒ → 3 ) LATIN CAPITAL LETTER EZH → DIGIT THREE #
@ -1500,7 +1505,6 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→
16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→
118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
06F3 ; 0663 ; MA # ( ۳ → ‎٣‎ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE #
1E8C9 ; 0663 ; MA #* ( ‎𞣉‎ → ‎٣‎ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE #
@ -1530,9 +1534,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR #
1D7F0 ; 0034 ; MA # ( 𝟰 → 4 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR → DIGIT FOUR #
1D7FA ; 0034 ; MA # ( 𝟺 → 4 ) MATHEMATICAL MONOSPACE DIGIT FOUR → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
13CE ; 0034 ; MA # ( → 4 ) CHEROKEE LETTER SE → DIGIT FOUR #
118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
06F4 ; 0664 ; MA # ( ۴ → ‎٤‎ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR #
@ -1557,9 +1561,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE #
1D7F1 ; 0035 ; MA # ( 𝟱 → 5 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE → DIGIT FIVE #
1D7FB ; 0035 ; MA # ( 𝟻 → 5 ) MATHEMATICAL MONOSPACE DIGIT FIVE → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE #
118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE #
@ -1578,11 +1582,11 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX #
1D7F2 ; 0036 ; MA # ( 𝟲 → 6 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SIX → DIGIT SIX #
1D7FC ; 0036 ; MA # ( 𝟼 → 6 ) MATHEMATICAL MONOSPACE DIGIT SIX → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
2CD2 ; 0036 ; MA # ( → 6 ) COPTIC CAPITAL LETTER OLD COPTIC HEI → DIGIT SIX #
0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX #
13EE ; 0036 ; MA # ( → 6 ) CHEROKEE LETTER WV → DIGIT SIX #
118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
06F6 ; 0666 ; MA # ( ۶ → ‎٦‎ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX #
@ -1606,9 +1610,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
1D7F3 ; 0037 ; MA # ( 𝟳 → 7 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN → DIGIT SEVEN #
1D7FD ; 0037 ; MA # ( 𝟽 → 7 ) MATHEMATICAL MONOSPACE DIGIT SEVEN → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN #
118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN #
@ -1631,10 +1635,10 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT #
1D7F4 ; 0038 ; MA # ( 𝟴 → 8 ) MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT → DIGIT EIGHT #
1D7FE ; 0038 ; MA # ( 𝟾 → 8 ) MATHEMATICAL MONOSPACE DIGIT EIGHT → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT #
0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT #
1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT #
@ -1659,12 +1663,12 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
1D7F5 ; 0039 ; MA # ( 𝟵 → 9 ) MATHEMATICAL SANS-SERIF BOLD DIGIT NINE → DIGIT NINE #
1D7FF ; 0039 ; MA # ( 𝟿 → 9 ) MATHEMATICAL MONOSPACE DIGIT NINE → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
A76E ; 0039 ; MA # ( → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE #
2CCA ; 0039 ; MA # ( → 9 ) COPTIC CAPITAL LETTER DIALECT-P HORI → DIGIT NINE #
118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE #
118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE #
118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
0967 ; 0669 ; MA # ( १ → ‎٩‎ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE #
118E4 ; 0669 ; MA # ( 𑣤 → ‎٩‎ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE #
@ -2544,6 +2548,7 @@ FFE8 ; 006C ; MA #* ( → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL
1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L #
FF29 ; 006C ; MA # ( → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→
2160 ; 006C ; MA # ( → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→
@ -2601,7 +2606,6 @@ A4F2 ; 006C ; MA # ( → l ) LISU LETTER I → LATIN SMALL LETTER L # →I
16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
216C ; 004C ; MA # ( → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
@ -2972,6 +2976,7 @@ FBA6 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL ISOLATED FORM →
1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7EC ; 004F ; MA # ( 𝟬 → O ) MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
FF2F ; 004F ; MA # ( → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→
1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O #
1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O #
@ -3005,7 +3010,6 @@ A4F3 ; 004F ; MA # ( → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O #
10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O #
10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O #
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR #
1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→
@ -8024,8 +8028,6 @@ FA92 ; 6717 ; MA # ( 朗 → 朗 ) CJK COMPATIBILITY IDEOGRAPH-FA92 → CJK UNIF
FA93 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-FA93 → CJK UNIFIED IDEOGRAPH-671B #
2F8D9 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-2F8D9 → CJK UNIFIED IDEOGRAPH-671B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
5E50 ; 3B3A ; MA # ( 幐 → 㬺 ) CJK UNIFIED IDEOGRAPH-5E50 → CJK UNIFIED IDEOGRAPH-3B3A #
4420 ; 3B3B ; MA # ( 䐠 → 㬻 ) CJK UNIFIED IDEOGRAPH-4420 → CJK UNIFIED IDEOGRAPH-3B3B #
@ -8831,6 +8833,8 @@ F953 ; 808B ; MA # ( 肋 → 肋 ) CJK COMPATIBILITY IDEOGRAPH-F953 → CJK UNIF
2F984 ; 440B ; MA # ( 䐋 → 䐋 ) CJK COMPATIBILITY IDEOGRAPH-2F984 → CJK UNIFIED IDEOGRAPH-440B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
2F987 ; 267A7 ; MA # ( 𦞧 → 𦞧 ) CJK COMPATIBILITY IDEOGRAPH-2F987 → CJK UNIFIED IDEOGRAPH-267A7 #
2F988 ; 267B5 ; MA # ( 𦞵 → 𦞵 ) CJK COMPATIBILITY IDEOGRAPH-2F988 → CJK UNIFIED IDEOGRAPH-267B5 #
@ -9630,9 +9634,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
# total: 6311

View File

@ -1718,7 +1718,7 @@ cp;01C0;-Cased;-CWCM;gc=Lo;na=LATIN LETTER DENTAL CLICK;SB=LE
cp;01C1;-Cased;-CWCM;gc=Lo;na=LATIN LETTER LATERAL CLICK;SB=LE
cp;01C2;-Cased;-CWCM;gc=Lo;na=LATIN LETTER ALVEOLAR CLICK;SB=LE
cp;01C3;-Cased;-CWCM;gc=Lo;na=LATIN LETTER RETROFLEX CLICK;SB=LE
# Croatian digraphs matching Serbian Cyrillic letters
# Latin digraphs matching Serbian Cyrillic letters
cp;01C4;cf=01C6;CWCF;CWKCF;CWL;CWT;dm=0044 017D;dt=Com;na=LATIN CAPITAL LETTER DZ WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;scf=01C6;slc=01C6;stc=01C5;Upper
cp;01C5;cf=01C6;CWCF;CWKCF;CWL;CWU;dm=0044 017E;dt=Com;gc=Lt;na=LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;scf=01C6;slc=01C6;stc=01C5;suc=01C4
cp;01C6;CWKCF;CWT;CWU;dm=0064 017E;dt=Com;gc=Ll;Lower;na=LATIN SMALL LETTER DZ WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;SB=LO;stc=01C5;suc=01C4
@ -2034,14 +2034,14 @@ cp;02E2;Alpha;bc=L;Cased;CWKCF;dm=0073;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LE
cp;02E3;Alpha;bc=L;Cased;CWKCF;dm=0078;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LETTER SMALL X;NFKC_CF=0078;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;XIDC;XIDS
cp;02E4;Alpha;bc=L;Cased;CWKCF;dm=0295;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LETTER SMALL REVERSED GLOTTAL STOP;NFKC_CF=0295;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;XIDC;XIDS
# Tone letters
cp;02E5;na=MODIFIER LETTER EXTRA-HIGH TONE BAR;WB=XX
cp;02E6;na=MODIFIER LETTER HIGH TONE BAR;WB=XX
cp;02E7;na=MODIFIER LETTER MID TONE BAR;WB=XX
cp;02E8;na=MODIFIER LETTER LOW TONE BAR;WB=XX
cp;02E9;na=MODIFIER LETTER EXTRA-LOW TONE BAR;WB=XX
cp;02E5;na=MODIFIER LETTER EXTRA-HIGH TONE BAR
cp;02E6;na=MODIFIER LETTER HIGH TONE BAR
cp;02E7;na=MODIFIER LETTER MID TONE BAR
cp;02E8;na=MODIFIER LETTER LOW TONE BAR
cp;02E9;na=MODIFIER LETTER EXTRA-LOW TONE BAR
# Extended Bopomofo tone marks
cp;02EA;age=3.0;na=MODIFIER LETTER YIN DEPARTING TONE MARK;sc=Bopo;vo=U;WB=XX
cp;02EB;age=3.0;na=MODIFIER LETTER YANG DEPARTING TONE MARK;sc=Bopo;vo=U;WB=XX
cp;02EA;age=3.0;na=MODIFIER LETTER YIN DEPARTING TONE MARK;sc=Bopo;vo=U
cp;02EB;age=3.0;na=MODIFIER LETTER YANG DEPARTING TONE MARK;sc=Bopo;vo=U
# IPA modifiers
cp;02EC;age=3.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER VOICING;SB=LE;XIDC;XIDS
cp;02ED;age=3.0;na=MODIFIER LETTER UNASPIRATED
@ -2734,12 +2734,12 @@ cp;0556;cf=0586;CWCF;CWKCF;CWL;gc=Lu;na=ARMENIAN CAPITAL LETTER FEH;NFKC_CF=0586
unassigned;0557..0558
# Modifier letters
cp;0559;-Cased;CI;-CWCM;Dia;gc=Lm;na=ARMENIAN MODIFIER LETTER LEFT HALF RING;SB=LE
cp;055A;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN APOSTROPHE;SB=XX;WB=XX;-XIDC;-XIDS
cp;055A;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN APOSTROPHE;SB=XX;-XIDC;-XIDS
cp;055B;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN EMPHASIS MARK;SB=XX;-XIDC;-XIDS
cp;055C;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN EXCLAMATION MARK;SB=XX;-XIDC;-XIDS
cp;055D;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN COMMA;SB=SC;WB=XX;-XIDC;-XIDS
cp;055E;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN QUESTION MARK;SB=XX;-XIDC;-XIDS
cp;055F;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN ABBREVIATION MARK;SB=XX;WB=XX;-XIDC;-XIDS
cp;055F;-Alpha;-Cased;CI;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN ABBREVIATION MARK;SB=XX;WB=ML;-XIDC;-XIDS
# Lowercase letters
cp;0560;age=11.0;-CWCM;Lower;na=ARMENIAN SMALL LETTER TURNED AYB
cp;0561;CWT;CWU;Lower;na=ARMENIAN SMALL LETTER AYB;stc=0531;suc=0531
@ -2783,8 +2783,8 @@ cp;0586;CWT;CWU;Lower;na=ARMENIAN SMALL LETTER FEH;stc=0556;suc=0556
cp;0587;cf=0565 0582;CWCF;CWKCF;CWT;CWU;dm=0565 0582;dt=Com;lc=0587;Lower;na=ARMENIAN SMALL LIGATURE ECH YIWN;NFKC_CF=0565 0582;NFKC_QC=N;NFKD_QC=N;tc=0535 0582;uc=0535 0552
cp;0588;age=11.0;-CWCM;Lower;na=ARMENIAN SMALL LETTER YI WITH STROKE
# Punctuation
cp;0589;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;lb=IS;na=ARMENIAN FULL STOP;SB=ST;sc=Zyyy;scx=Armn Geor;STerm;Term;WB=MN;-XIDC;-XIDS
cp;058A;age=3.0;-Alpha;bc=ON;-Cased;-CWCM;Dash;gc=Pd;Hyphen;-IDC;-IDS;lb=BA;na=ARMENIAN HYPHEN;SB=XX;WB=XX;-XIDC;-XIDS
cp;0589;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;lb=IS;na=ARMENIAN FULL STOP;SB=ST;STerm;Term;WB=MN;-XIDC;-XIDS
cp;058A;age=3.0;-Alpha;bc=ON;-Cased;-CWCM;Dash;gc=Pd;Hyphen;-IDC;-IDS;lb=BA;na=ARMENIAN HYPHEN;SB=XX;-XIDC;-XIDS
unassigned;058B..058C
# Religious symbols
cp;058D;age=7.0;-Alpha;bc=ON;-Cased;-CWCM;gc=So;-IDC;-IDS;na=RIGHT-FACING ARMENIAN ETERNITY SIGN;SB=XX;WB=XX;-XIDC;-XIDS
@ -3567,9 +3567,9 @@ cp;0852;na=MANDAIC LETTER AQ
cp;0853;na=MANDAIC LETTER AR
cp;0854;jt=R;na=MANDAIC LETTER ASH
cp;0855;na=MANDAIC LETTER AT
cp;0856;jt=U;na=MANDAIC LETTER DUSHENNA
cp;0857;jt=U;na=MANDAIC LETTER KAD
cp;0858;jt=U;na=MANDAIC LETTER AIN
cp;0856;jt=R;na=MANDAIC LETTER DUSHENNA
cp;0857;jt=R;na=MANDAIC LETTER KAD
cp;0858;jt=R;na=MANDAIC LETTER AIN
# Diacritics
cp;0859;-Alpha;bc=NSM;ccc=220;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;-IDS;jt=T;lb=CM;na=MANDAIC AFFRICATION MARK;SB=EX;WB=Extend;-XIDS
cp;085A;-Alpha;bc=NSM;ccc=220;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;-IDS;jt=T;lb=CM;na=MANDAIC VOCALIZATION MARK;SB=EX;WB=Extend;-XIDS
@ -9102,10 +9102,10 @@ cp;1DF5;age=7.0;Dia;na=COMBINING UP TACK ABOVE
# Typicon marks
cp;1DF6;age=10.0;ccc=232;Dia;na=COMBINING KAVYKA ABOVE RIGHT
cp;1DF7;age=10.0;ccc=228;Dia;na=COMBINING KAVYKA ABOVE LEFT
cp;1DF8;age=10.0;ccc=228;Dia;na=COMBINING DOT ABOVE LEFT
# Miscellaneous marks
cp;1DF8;age=10.0;ccc=228;Dia;na=COMBINING DOT ABOVE LEFT;scx=Cyrl Syrc
cp;1DF9;age=10.0;ccc=220;Dia;na=COMBINING WIDE INVERTED BRIDGE BELOW
unassigned;1DFA
# Miscellaneous mark
cp;1DFB;age=9.0;InPC=Top;InSC=Syllable_Modifier;na=COMBINING DELETION MARK
# Double diacritic mark for UPA
cp;1DFC;age=6.0;ccc=233;na=COMBINING DOUBLE INVERTED BREVE BELOW
@ -9682,9 +9682,10 @@ cp;2024;CI;CWKCF;dm=002E;dt=Com;ea=A;lb=IN;na=ONE DOT LEADER;NFKC_CF=002E;NFKC_Q
cp;2025;CWKCF;dm=002E 002E;dt=Com;ea=A;lb=IN;na=TWO DOT LEADER;NFKC_CF=002E 002E;NFKC_QC=N;NFKD_QC=N
cp;2026;CWKCF;dm=002E 002E 002E;dt=Com;ea=A;lb=IN;na=HORIZONTAL ELLIPSIS;NFKC_CF=002E 002E 002E;NFKC_QC=N;NFKD_QC=N
cp;2027;CI;ea=A;lb=BA;na=HYPHENATION POINT;WB=ML
# Format characters
# Separators
cp;2028;bc=WS;gc=Zl;GCB=CN;-Gr_Base;lb=BK;na=LINE SEPARATOR;-Pat_Syn;Pat_WS;SB=SE;WB=NL;WSpace
cp;2029;bc=B;gc=Zp;GCB=CN;-Gr_Base;lb=BK;na=PARAGRAPH SEPARATOR;-Pat_Syn;Pat_WS;SB=SE;WB=NL;WSpace
# Format characters
cp;202A;bc=LRE;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=LEFT-TO-RIGHT EMBEDDING;Name_Alias=abbreviation=LRE;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
cp;202B;bc=RLE;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=RIGHT-TO-LEFT EMBEDDING;Name_Alias=abbreviation=RLE;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
cp;202C;bc=PDF;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=POP DIRECTIONAL FORMATTING;Name_Alias=abbreviation=PDF;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
@ -11837,8 +11838,9 @@ cp;27C4;age=4.1;bmg=27C3;na=OPEN SUPERSET
# Paired punctuation
cp;27C5;age=4.1;bmg=27C6;bpb=27C6;bpt=o;gc=Ps;lb=OP;na=LEFT S-SHAPED BAG DELIMITER;SB=CL
cp;27C6;age=4.1;bmg=27C5;bpb=27C5;bpt=c;gc=Pe;lb=CL;na=RIGHT S-SHAPED BAG DELIMITER;SB=CL
# Miscellaneous symbols
# Operator
cp;27C7;age=5.0;-Bidi_M;na=OR WITH DOT INSIDE
# Miscellaneous symbols
cp;27C8;age=5.0;bmg=27C9;na=REVERSE SOLIDUS PRECEDING SUBSET
cp;27C9;age=5.0;bmg=27C8;na=SUPERSET PRECEDING SOLIDUS
# Vertical line operator
@ -17107,17 +17109,17 @@ cp;A6F6;-Alpha;gc=Po;-IDC;-IDS;lb=BA;na=BAMUM SEMICOLON;SB=XX;Term;WB=XX;-XIDC;-
cp;A6F7;-Alpha;gc=Po;-IDC;-IDS;lb=BA;na=BAMUM QUESTION MARK;SB=ST;STerm;Term;WB=XX;-XIDC;-XIDS
unassigned;A6F8..A6FF
block;A700..A71F;age=4.1;bc=ON;blk=Modifier_Tone_Letters;CI;Dia;gc=Sk;Gr_Base;lb=AL;sc=Zyyy
block;A700..A71F;age=4.1;bc=ON;blk=Modifier_Tone_Letters;CI;Dia;gc=Sk;Gr_Base;lb=AL;sc=Zyyy;WB=LE
# A700..A71F Modifier Tone Letters
# Corner tone marks for Chinese
cp;A700;na=MODIFIER LETTER CHINESE TONE YIN PING
cp;A701;na=MODIFIER LETTER CHINESE TONE YANG PING
cp;A702;na=MODIFIER LETTER CHINESE TONE YIN SHANG
cp;A703;na=MODIFIER LETTER CHINESE TONE YANG SHANG
cp;A704;na=MODIFIER LETTER CHINESE TONE YIN QU
cp;A705;na=MODIFIER LETTER CHINESE TONE YANG QU
cp;A706;na=MODIFIER LETTER CHINESE TONE YIN RU
cp;A707;na=MODIFIER LETTER CHINESE TONE YANG RU
cp;A700;na=MODIFIER LETTER CHINESE TONE YIN PING;scx=Hani Latn;WB=XX
cp;A701;na=MODIFIER LETTER CHINESE TONE YANG PING;scx=Hani Latn;WB=XX
cp;A702;na=MODIFIER LETTER CHINESE TONE YIN SHANG;scx=Hani Latn;WB=XX
cp;A703;na=MODIFIER LETTER CHINESE TONE YANG SHANG;scx=Hani Latn;WB=XX
cp;A704;na=MODIFIER LETTER CHINESE TONE YIN QU;scx=Hani Latn;WB=XX
cp;A705;na=MODIFIER LETTER CHINESE TONE YANG QU;scx=Hani Latn;WB=XX
cp;A706;na=MODIFIER LETTER CHINESE TONE YIN RU;scx=Hani Latn;WB=XX
cp;A707;na=MODIFIER LETTER CHINESE TONE YANG RU;scx=Hani Latn;WB=XX
# Dotted tone letters
cp;A708;na=MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR
cp;A709;na=MODIFIER LETTER HIGH DOTTED TONE BAR
@ -17136,16 +17138,16 @@ cp;A714;na=MODIFIER LETTER MID LEFT-STEM TONE BAR
cp;A715;na=MODIFIER LETTER LOW LEFT-STEM TONE BAR
cp;A716;na=MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
# Chinantec tone marks
cp;A717;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT VERTICAL BAR;SB=LE;WB=LE;XIDC;XIDS
cp;A718;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT SLASH;SB=LE;WB=LE;XIDC;XIDS
cp;A719;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT HORIZONTAL BAR;SB=LE;WB=LE;XIDC;XIDS
cp;A71A;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOWER RIGHT CORNER ANGLE;SB=LE;WB=LE;XIDC;XIDS
cp;A717;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT VERTICAL BAR;SB=LE;XIDC;XIDS
cp;A718;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT SLASH;SB=LE;XIDC;XIDS
cp;A719;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT HORIZONTAL BAR;SB=LE;XIDC;XIDS
cp;A71A;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOWER RIGHT CORNER ANGLE;SB=LE;XIDC;XIDS
# Africanist tone letters
cp;A71B;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED UP ARROW;SB=LE;WB=LE;XIDC;XIDS
cp;A71C;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED DOWN ARROW;SB=LE;WB=LE;XIDC;XIDS
cp;A71D;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
cp;A71E;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
cp;A71F;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOW INVERTED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
cp;A71B;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED UP ARROW;SB=LE;XIDC;XIDS
cp;A71C;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED DOWN ARROW;SB=LE;XIDC;XIDS
cp;A71D;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED EXCLAMATION MARK;SB=LE;XIDC;XIDS
cp;A71E;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK;SB=LE;XIDC;XIDS
cp;A71F;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOW INVERTED EXCLAMATION MARK;SB=LE;XIDC;XIDS
block;A720..A7FF;age=5.1;Alpha;blk=Latin_Ext_D;Cased;CWCM;gc=Ll;Gr_Base;IDC;IDS;lb=AL;SB=LO;sc=Latn;WB=LE;XIDC;XIDS
# A720..A7FF Latin Extended-D
@ -30483,21 +30485,21 @@ cp;16F9D;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-5;WB=LE
cp;16F9E;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-6;WB=LE
cp;16F9F;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-8;WB=LE
block;16FE0..16FFF;age=13.0;Alpha;blk=Ideographic_Symbols;ea=W;gc=Lm;Gr_Base;IDC;lb=NS;SB=LE;sc=Zyyy;vo=U;WB=Extend;XIDC
block;16FE0..16FFF;age=13.0;Alpha;blk=Ideographic_Symbols;ea=W;gc=Lm;Gr_Base;IDC;lb=NS;SB=LE;sc=Hani;vo=U;WB=Extend;XIDC
# 16FE0..16FFF Ideographic Symbols and Punctuation
# Tangut mark
cp;16FE0;age=9.0;CI;Ext;IDS;na=TANGUT ITERATION MARK;sc=Tang;WB=LE;XIDS
# Nushu mark
cp;16FE1;age=10.0;CI;Ext;IDS;na=NUSHU ITERATION MARK;sc=Nshu;WB=LE;XIDS
# Marks used in ancient Chinese texts
cp;16FE2;age=12.0;-Alpha;bc=ON;gc=Po;-IDC;na=OLD CHINESE HOOK MARK;SB=XX;WB=XX;-XIDC
cp;16FE3;age=12.0;CI;Ext;IDS;na=OLD CHINESE ITERATION MARK;WB=LE;XIDS
cp;16FE2;age=12.0;-Alpha;bc=ON;gc=Po;-IDC;na=OLD CHINESE HOOK MARK;SB=XX;sc=Zyyy;WB=XX;-XIDC
cp;16FE3;age=12.0;CI;Ext;IDS;na=OLD CHINESE ITERATION MARK;sc=Zyyy;WB=LE;XIDS
# Small Khitan format character
cp;16FE4;-Alpha;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;Ideo;jt=T;lb=GL;na=KHITAN SMALL SCRIPT FILLER;SB=EX;sc=Kits
unassigned;16FE5..16FEF;vo=U
# Combining diacritics for CJK ideographs
cp;16FF0;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK CA;SB=EX;sc=Zinh
cp;16FF1;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK NHAY;SB=EX;sc=Zinh
cp;16FF0;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK CA;SB=EX
cp;16FF1;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK NHAY;SB=EX
unassigned;16FF2..16FFF;vo=U
block;17000..187FF;Alpha;blk=Tangut;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=ID;SB=LE;sc=Tang;vo=U;XIDC;XIDS
@ -31294,7 +31296,7 @@ cp;18AFD;age=13.0;na=TANGUT COMPONENT-766
cp;18AFE;age=13.0;na=TANGUT COMPONENT-767
cp;18AFF;age=13.0;na=TANGUT COMPONENT-768
block;18B00..18CFF;age=13.0;Alpha;blk=Khitan_Small_Script;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=ID;SB=LE;sc=Kits;vo=U;XIDC;XIDS
block;18B00..18CFF;age=13.0;Alpha;blk=Khitan_Small_Script;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=AL;SB=LE;sc=Kits;vo=U;XIDC;XIDS
# 18B00..18CFF Khitan Small Script
# Iteration mark
cp;18B00;na=KHITAN SMALL SCRIPT CHARACTER-18B00
@ -38273,7 +38275,7 @@ cp;1F909;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING NOTCHED HOOK
cp;1F90A;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING HOOK WITH DOT
cp;1F90B;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING NOTCHED HOOK WITH DOT
# Hand symbol
cp;1F90C;age=13.0;EBase;na=PINCHED FINGERS
cp;1F90C;age=13.0;EBase;lb=EB;na=PINCHED FINGERS
# Colored heart symbols
cp;1F90D;age=12.0;na=WHITE HEART
cp;1F90E;age=12.0;na=BROWN HEART
@ -38390,7 +38392,7 @@ cp;1F973;age=11.0;na=FACE WITH PARTY HORN AND PARTY HAT
cp;1F974;age=11.0;na=FACE WITH UNEVEN EYES AND WAVY MOUTH
cp;1F975;age=11.0;na=OVERHEATED FACE
cp;1F976;age=11.0;na=FREEZING FACE
cp;1F977;age=13.0;na=NINJA
cp;1F977;age=13.0;EBase;lb=EB;na=NINJA
cp;1F978;age=13.0;na=DISGUISED FACE
unassigned;1F979;ExtPict;lb=ID;vo=U
cp;1F97A;age=11.0;na=FACE WITH PLEADING EYES
@ -38732,7 +38734,7 @@ cp;1FAD5;na=FONDUE
cp;1FAD6;na=TEAPOT
unassigned;1FAD7..1FAFF;ExtPict;lb=ID;vo=U
block;1FB00..1FBFF;age=13.0;bc=ON;blk=Symbols_For_Legacy_Computing;ExtPict;gc=So;Gr_Base;lb=ID;sc=Zyyy
block;1FB00..1FBFF;age=13.0;bc=ON;blk=Symbols_For_Legacy_Computing;gc=So;Gr_Base;lb=AL;sc=Zyyy
# 1FB00..1FBFF Symbols for Legacy Computing
# Block mosaic terminal graphic characters
cp;1FB00;na=BLOCK SEXTANT-1
@ -38885,7 +38887,7 @@ cp;1FB8F;na=LOWER HALF MEDIUM SHADE
cp;1FB90;na=INVERSE MEDIUM SHADE
cp;1FB91;na=UPPER HALF BLOCK AND LOWER HALF INVERSE MEDIUM SHADE
cp;1FB92;na=UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
unassigned;1FB93;ExtPict;lb=ID
unassigned;1FB93
cp;1FB94;na=LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK
# Fill characters
cp;1FB95;na=CHECKER BOARD FILL
@ -38951,19 +38953,19 @@ cp;1FBC7;na=STICK FIGURE LEANING LEFT
cp;1FBC8;na=STICK FIGURE LEANING RIGHT
cp;1FBC9;na=STICK FIGURE WITH DRESS
cp;1FBCA;na=WHITE UP-POINTING CHEVRON
unassigned;1FBCB..1FBEF;ExtPict;lb=ID
unassigned;1FBCB..1FBEF
# Segmented digits
cp;1FBF0;bc=EN;CWKCF;dm=0030;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT ZERO;NFKC_CF=0030;NFKC_QC=N;NFKD_QC=N;nt=De;nv=0;XIDC
cp;1FBF1;bc=EN;CWKCF;dm=0031;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT ONE;NFKC_CF=0031;NFKC_QC=N;NFKD_QC=N;nt=De;nv=1;XIDC
cp;1FBF2;bc=EN;CWKCF;dm=0032;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT TWO;NFKC_CF=0032;NFKC_QC=N;NFKD_QC=N;nt=De;nv=2;XIDC
cp;1FBF3;bc=EN;CWKCF;dm=0033;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT THREE;NFKC_CF=0033;NFKC_QC=N;NFKD_QC=N;nt=De;nv=3;XIDC
cp;1FBF4;bc=EN;CWKCF;dm=0034;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT FOUR;NFKC_CF=0034;NFKC_QC=N;NFKD_QC=N;nt=De;nv=4;XIDC
cp;1FBF5;bc=EN;CWKCF;dm=0035;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT FIVE;NFKC_CF=0035;NFKC_QC=N;NFKD_QC=N;nt=De;nv=5;XIDC
cp;1FBF6;bc=EN;CWKCF;dm=0036;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT SIX;NFKC_CF=0036;NFKC_QC=N;NFKD_QC=N;nt=De;nv=6;XIDC
cp;1FBF7;bc=EN;CWKCF;dm=0037;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT SEVEN;NFKC_CF=0037;NFKC_QC=N;NFKD_QC=N;nt=De;nv=7;XIDC
cp;1FBF8;bc=EN;CWKCF;dm=0038;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT EIGHT;NFKC_CF=0038;NFKC_QC=N;NFKD_QC=N;nt=De;nv=8;XIDC
cp;1FBF9;bc=EN;CWKCF;dm=0039;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT NINE;NFKC_CF=0039;NFKC_QC=N;NFKD_QC=N;nt=De;nv=9;XIDC
unassigned;1FBFA..1FBFF;ExtPict;lb=ID
cp;1FBF0;bc=EN;CWKCF;dm=0030;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT ZERO;NFKC_CF=0030;NFKC_QC=N;NFKD_QC=N;nt=De;nv=0;SB=NU;WB=NU;XIDC
cp;1FBF1;bc=EN;CWKCF;dm=0031;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT ONE;NFKC_CF=0031;NFKC_QC=N;NFKD_QC=N;nt=De;nv=1;SB=NU;WB=NU;XIDC
cp;1FBF2;bc=EN;CWKCF;dm=0032;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT TWO;NFKC_CF=0032;NFKC_QC=N;NFKD_QC=N;nt=De;nv=2;SB=NU;WB=NU;XIDC
cp;1FBF3;bc=EN;CWKCF;dm=0033;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT THREE;NFKC_CF=0033;NFKC_QC=N;NFKD_QC=N;nt=De;nv=3;SB=NU;WB=NU;XIDC
cp;1FBF4;bc=EN;CWKCF;dm=0034;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT FOUR;NFKC_CF=0034;NFKC_QC=N;NFKD_QC=N;nt=De;nv=4;SB=NU;WB=NU;XIDC
cp;1FBF5;bc=EN;CWKCF;dm=0035;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT FIVE;NFKC_CF=0035;NFKC_QC=N;NFKD_QC=N;nt=De;nv=5;SB=NU;WB=NU;XIDC
cp;1FBF6;bc=EN;CWKCF;dm=0036;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT SIX;NFKC_CF=0036;NFKC_QC=N;NFKD_QC=N;nt=De;nv=6;SB=NU;WB=NU;XIDC
cp;1FBF7;bc=EN;CWKCF;dm=0037;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT SEVEN;NFKC_CF=0037;NFKC_QC=N;NFKD_QC=N;nt=De;nv=7;SB=NU;WB=NU;XIDC
cp;1FBF8;bc=EN;CWKCF;dm=0038;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT EIGHT;NFKC_CF=0038;NFKC_QC=N;NFKD_QC=N;nt=De;nv=8;SB=NU;WB=NU;XIDC
cp;1FBF9;bc=EN;CWKCF;dm=0039;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT NINE;NFKC_CF=0039;NFKC_QC=N;NFKD_QC=N;nt=De;nv=9;SB=NU;WB=NU;XIDC
unassigned;1FBFA..1FBFF
# No block
unassigned;1FC00..1FF7F;ExtPict;lb=ID

View File

@ -102,25 +102,25 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
u"\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-"
u"\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8"
u"\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD"
u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02-"
u"\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-"
u"\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D81-"
u"\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-"
u"\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE"
u"\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81"
u"\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-"
u"\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF"
u"\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-"
u"\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-"
u"\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92"
u"\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB"
u"\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7"
u"\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-"
u"\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D"
u"\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-"
u"\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-"
u"\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2"
u"\\u17D7\\u17DC\\u17E0-\\u17E9\\u1ABF\\u1AC0\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02"
u"\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-"
u"\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-"
u"\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-"
u"\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6"
u"\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-"
u"\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-"
u"\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9"
u"\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-"
u"\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-"
u"\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84"
u"\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
u"\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-"
u"\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-"
u"\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288"
u"\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-"
u"\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-"
u"\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-"
u"\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
u"\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-"
u"\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70"
u"\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA"
@ -131,14 +131,14 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
u"\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E"
u"\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-"
u"\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793"
u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA7F5\\uA7F6\\uA9E7-\\uA9FE"
u"\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
u"\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66-\\uAB68\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
u"\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
u"\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-"
u"\\U0001B152\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-"
u"\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-"
u"\\U0002EBE0\\U00030000-\\U0003134A]";
u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76"
u"\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26"
u"\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13"
u"\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303"
u"\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152"
u"\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734"
u"\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0"
u"\\U00030000-\\U0003134A]";
gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status);
if (gRecommendedSet == NULL) {

View File

@ -1991,10 +1991,15 @@ RBBIWordMonkey::RBBIWordMonkey()
fMidNumLetSet = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]", status);
fMidLetterSet = new UnicodeSet(u"[\\p{Word_Break = MidLetter}]", status);
fMidNumSet = new UnicodeSet(u"[\\p{Word_Break = MidNum}]", status);
fNumericSet = new UnicodeSet(u"[[\\p{Word_Break = Numeric}][\\uff10-\\uff19]]", status);
fNumericSet = new UnicodeSet(u"[\\p{Word_Break = Numeric}]", status);
fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status);
fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status);
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status);
// There are some sc=Hani characters with WB=Extend.
// The break rules need to pick one or the other because
// Extend overlapping with something else is messy.
// For Unicode 13, we chose to keep U+16FF0 & U+16FF1
// in $Han (for $dictionary) and out of $Extend.
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}-[:Hani:]]", status);
fWSegSpaceSet = new UnicodeSet(u"[\\p{Word_Break = WSegSpace}]", status);
fZWJSet = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]", status);

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2019-11-08, 22:14:17 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:40 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -945,9 +945,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -1111,6 +1113,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -1232,7 +1235,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -66995,6 +67000,7 @@ A75E 0062
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -67303,6 +67309,7 @@ A7C2 0062
2C72 0041
2C73 0062
2C72 0062
0334 1AC0
1AC0 0334
028D 0021
1AC0 0021
@ -92154,6 +92161,7 @@ A806 003F
A806 0061
A806 0041
A806 0062
0334 A82C
A82C 0334
A82C 0021
A82C 003F
@ -96505,12 +96513,14 @@ A8C4 0062
11938 0041
11935 11930 0062
11938 0062
0334 1193D
1193D 0334
1193D 0021
1193D 003F
1193D 0061
1193D 0041
1193D 0062
0334 1193E
1193E 0334
1193E 0021
1193E 003F
@ -114560,10 +114570,15 @@ A9B2 0061
A9B2 0041
A9B2 0062
A9B4 0021
A9B5 0021
A9B4 003F
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -114604,11 +114619,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
0334 A9C0
A9C0 0334
A9C0 0021

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2019-11-08, 22:14:19 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:42 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -5371,9 +5371,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -5537,6 +5539,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -5658,7 +5661,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -72709,6 +72714,7 @@ FF37 003F
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -72928,6 +72934,7 @@ A7C2 0062
1AC0 003F
AB69 0021
AB69 003F
0334 1AC0
1AC0 0334
028D 0061
028D 0041
@ -98376,6 +98383,7 @@ A806 0041
A806 0062
A82C 0021
A82C 003F
0334 A82C
A82C 0334
A82C 0061
A82C 0041
@ -103473,12 +103481,14 @@ A8C4 0062
11938 0062
1193D 0021
1193D 003F
0334 1193D
1193D 0334
1193D 0061
1193D 0041
1193D 0062
1193E 0021
1193E 003F
0334 1193E
1193E 0334
1193E 0061
1193E 0041
@ -122266,9 +122276,14 @@ A9B2 0041
A9B2 0062
A9B4 0021
A9B4 003F
A9B5 0021
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -122309,11 +122324,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
A9C0 0021
A9C0 003F
0334 A9C0

View File

@ -1,6 +1,6 @@
# GraphemeBreakTest-13.0.0.txt
# Date: 2019-11-20, 22:53:31 GMT
# © 2019 Unicode®, Inc.
# GraphemeBreakTest-cldr-13.0.0.txt
# Date: 2020-02-07, 21:43:46 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#

View File

@ -14,11 +14,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -30,14 +31,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet}];
MidLetter = [\p{Word_Break = MidLetter}];
MidNum = [\p{Word_Break = MidNum}];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -13,11 +13,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en_US_POSIX;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -29,14 +30,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
MidNum = [\p{Word_Break = MidNum} [.]];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -317,25 +317,25 @@ public class SpoofChecker {
+ "\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-"
+ "\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8"
+ "\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD"
+ "\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02-"
+ "\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-"
+ "\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D81-"
+ "\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-"
+ "\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE"
+ "\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81"
+ "\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-"
+ "\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF"
+ "\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-"
+ "\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-"
+ "\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92"
+ "\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB"
+ "\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7"
+ "\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-"
+ "\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D"
+ "\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-"
+ "\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-"
+ "\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2"
+ "\\u17D7\\u17DC\\u17E0-\\u17E9\\u1ABF\\u1AC0\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
+ "\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02"
+ "\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-"
+ "\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-"
+ "\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-"
+ "\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6"
+ "\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-"
+ "\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-"
+ "\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9"
+ "\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-"
+ "\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-"
+ "\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84"
+ "\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
+ "\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-"
+ "\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-"
+ "\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288"
+ "\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-"
+ "\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-"
+ "\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-"
+ "\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
+ "\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-"
+ "\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70"
+ "\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA"
@ -346,14 +346,14 @@ public class SpoofChecker {
+ "\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E"
+ "\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-"
+ "\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793"
+ "\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA7F5\\uA7F6\\uA9E7-\\uA9FE"
+ "\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
+ "\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66-\\uAB68\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
+ "\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
+ "\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-"
+ "\\U0001B152\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-"
+ "\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-"
+ "\\U0002EBE0\\U00030000-\\U0003134A]"
+ "\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76"
+ "\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26"
+ "\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13"
+ "\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303"
+ "\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152"
+ "\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734"
+ "\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0"
+ "\\U00030000-\\U0003134A]"
).freeze();
// Note: data from IdentifierStatus.txt & IdentifierType.txt
// There is tooling to generate this constant in the unicodetools project:

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8614c8ffed62a613452938a27f3b5398bc6fea93ccb5799c7a540d992b6d22c5
size 12999230
oid sha256:7e641819877ea4d794fa878ed139748a4d60c0ea164e1e7663727c9ae930192c
size 12999311

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2019-11-08, 22:14:17 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:40 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -945,9 +945,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -1111,6 +1113,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -1232,7 +1235,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -66995,6 +67000,7 @@ A75E 0062
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -67303,6 +67309,7 @@ A7C2 0062
2C72 0041
2C73 0062
2C72 0062
0334 1AC0
1AC0 0334
028D 0021
1AC0 0021
@ -92154,6 +92161,7 @@ A806 003F
A806 0061
A806 0041
A806 0062
0334 A82C
A82C 0334
A82C 0021
A82C 003F
@ -96505,12 +96513,14 @@ A8C4 0062
11938 0041
11935 11930 0062
11938 0062
0334 1193D
1193D 0334
1193D 0021
1193D 003F
1193D 0061
1193D 0041
1193D 0062
0334 1193E
1193E 0334
1193E 0021
1193E 003F
@ -114560,10 +114570,15 @@ A9B2 0061
A9B2 0041
A9B2 0062
A9B4 0021
A9B5 0021
A9B4 003F
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -114604,11 +114619,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
0334 A9C0
A9C0 0334
A9C0 0021

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2019-11-08, 22:14:19 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:42 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -5371,9 +5371,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -5537,6 +5539,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -5658,7 +5661,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -72709,6 +72714,7 @@ FF37 003F
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -72928,6 +72934,7 @@ A7C2 0062
1AC0 003F
AB69 0021
AB69 003F
0334 1AC0
1AC0 0334
028D 0061
028D 0041
@ -98376,6 +98383,7 @@ A806 0041
A806 0062
A82C 0021
A82C 003F
0334 A82C
A82C 0334
A82C 0061
A82C 0041
@ -103473,12 +103481,14 @@ A8C4 0062
11938 0062
1193D 0021
1193D 003F
0334 1193D
1193D 0334
1193D 0061
1193D 0041
1193D 0062
1193E 0021
1193E 003F
0334 1193E
1193E 0334
1193E 0061
1193E 0041
@ -122266,9 +122276,14 @@ A9B2 0041
A9B2 0062
A9B4 0021
A9B4 003F
A9B5 0021
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -122309,11 +122324,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
A9C0 0021
A9C0 003F
0334 A9C0

View File

@ -1,6 +1,6 @@
# confusables.txt
# Date: 2019-10-22, 13:05:29 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-13, 01:38:49 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -1358,6 +1358,10 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
266A ; 1D158 1D165 1D16E ; MA #* ( ♪ → 𝅘𝅥𝅮 ) EIGHTH NOTE → MUSICAL SYMBOL NOTEHEAD BLACK, MUSICAL SYMBOL COMBINING STEM, MUSICAL SYMBOL COMBINING FLAG-1 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
02D9 ; 0971 ; MA #* ( ˙ → ॱ ) DOT ABOVE → DEVANAGARI SIGN HIGH SPACING DOT #
0D4E ; 0971 ; MA # ( ൎ → ॱ ) MALAYALAM LETTER DOT REPH → DEVANAGARI SIGN HIGH SPACING DOT # →˙→
@ -1418,13 +1422,13 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO
1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO #
1D7EE ; 0032 ; MA # ( 𝟮 → 2 ) MATHEMATICAL SANS-SERIF BOLD DIGIT TWO → DIGIT TWO #
1D7F8 ; 0032 ; MA # ( 𝟸 → 2 ) MATHEMATICAL MONOSPACE DIGIT TWO → DIGIT TWO #
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A75A ; 0032 ; MA # ( → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
01A7 ; 0032 ; MA # ( Ƨ → 2 ) LATIN CAPITAL LETTER TONE TWO → DIGIT TWO #
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
A644 ; 0032 ; MA # ( → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
14BF ; 0032 ; MA # ( → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
A6EF ; 0032 ; MA # ( → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
06F2 ; 0662 ; MA # ( ۲ → ‎٢‎ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
@ -1491,6 +1495,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
1D7EF ; 0033 ; MA # ( 𝟯 → 3 ) MATHEMATICAL SANS-SERIF BOLD DIGIT THREE → DIGIT THREE #
1D7F9 ; 0033 ; MA # ( 𝟹 → 3 ) MATHEMATICAL MONOSPACE DIGIT THREE → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
A7AB ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER REVERSED OPEN E → DIGIT THREE #
021C ; 0033 ; MA # ( Ȝ → 3 ) LATIN CAPITAL LETTER YOGH → DIGIT THREE # →Ʒ→
01B7 ; 0033 ; MA # ( Ʒ → 3 ) LATIN CAPITAL LETTER EZH → DIGIT THREE #
@ -1500,7 +1505,6 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→
16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→
118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
06F3 ; 0663 ; MA # ( ۳ → ‎٣‎ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE #
1E8C9 ; 0663 ; MA #* ( ‎𞣉‎ → ‎٣‎ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE #
@ -1530,9 +1534,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR #
1D7F0 ; 0034 ; MA # ( 𝟰 → 4 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR → DIGIT FOUR #
1D7FA ; 0034 ; MA # ( 𝟺 → 4 ) MATHEMATICAL MONOSPACE DIGIT FOUR → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
13CE ; 0034 ; MA # ( → 4 ) CHEROKEE LETTER SE → DIGIT FOUR #
118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
06F4 ; 0664 ; MA # ( ۴ → ‎٤‎ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR #
@ -1557,9 +1561,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE #
1D7F1 ; 0035 ; MA # ( 𝟱 → 5 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE → DIGIT FIVE #
1D7FB ; 0035 ; MA # ( 𝟻 → 5 ) MATHEMATICAL MONOSPACE DIGIT FIVE → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE #
118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE #
@ -1578,11 +1582,11 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX #
1D7F2 ; 0036 ; MA # ( 𝟲 → 6 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SIX → DIGIT SIX #
1D7FC ; 0036 ; MA # ( 𝟼 → 6 ) MATHEMATICAL MONOSPACE DIGIT SIX → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
2CD2 ; 0036 ; MA # ( → 6 ) COPTIC CAPITAL LETTER OLD COPTIC HEI → DIGIT SIX #
0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX #
13EE ; 0036 ; MA # ( → 6 ) CHEROKEE LETTER WV → DIGIT SIX #
118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
06F6 ; 0666 ; MA # ( ۶ → ‎٦‎ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX #
@ -1606,9 +1610,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
1D7F3 ; 0037 ; MA # ( 𝟳 → 7 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN → DIGIT SEVEN #
1D7FD ; 0037 ; MA # ( 𝟽 → 7 ) MATHEMATICAL MONOSPACE DIGIT SEVEN → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN #
118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN #
@ -1631,10 +1635,10 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT #
1D7F4 ; 0038 ; MA # ( 𝟴 → 8 ) MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT → DIGIT EIGHT #
1D7FE ; 0038 ; MA # ( 𝟾 → 8 ) MATHEMATICAL MONOSPACE DIGIT EIGHT → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT #
0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT #
1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT #
@ -1659,12 +1663,12 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
1D7F5 ; 0039 ; MA # ( 𝟵 → 9 ) MATHEMATICAL SANS-SERIF BOLD DIGIT NINE → DIGIT NINE #
1D7FF ; 0039 ; MA # ( 𝟿 → 9 ) MATHEMATICAL MONOSPACE DIGIT NINE → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
A76E ; 0039 ; MA # ( → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE #
2CCA ; 0039 ; MA # ( → 9 ) COPTIC CAPITAL LETTER DIALECT-P HORI → DIGIT NINE #
118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE #
118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE #
118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
0967 ; 0669 ; MA # ( १ → ‎٩‎ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE #
118E4 ; 0669 ; MA # ( 𑣤 → ‎٩‎ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE #
@ -2544,6 +2548,7 @@ FFE8 ; 006C ; MA #* ( → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL
1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L #
FF29 ; 006C ; MA # ( → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→
2160 ; 006C ; MA # ( → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→
@ -2601,7 +2606,6 @@ A4F2 ; 006C ; MA # ( → l ) LISU LETTER I → LATIN SMALL LETTER L # →I
16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
216C ; 004C ; MA # ( → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
@ -2972,6 +2976,7 @@ FBA6 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL ISOLATED FORM →
1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7EC ; 004F ; MA # ( 𝟬 → O ) MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
FF2F ; 004F ; MA # ( → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→
1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O #
1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O #
@ -3005,7 +3010,6 @@ A4F3 ; 004F ; MA # ( → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O #
10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O #
10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O #
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR #
1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→
@ -8024,8 +8028,6 @@ FA92 ; 6717 ; MA # ( 朗 → 朗 ) CJK COMPATIBILITY IDEOGRAPH-FA92 → CJK UNIF
FA93 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-FA93 → CJK UNIFIED IDEOGRAPH-671B #
2F8D9 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-2F8D9 → CJK UNIFIED IDEOGRAPH-671B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
5E50 ; 3B3A ; MA # ( 幐 → 㬺 ) CJK UNIFIED IDEOGRAPH-5E50 → CJK UNIFIED IDEOGRAPH-3B3A #
4420 ; 3B3B ; MA # ( 䐠 → 㬻 ) CJK UNIFIED IDEOGRAPH-4420 → CJK UNIFIED IDEOGRAPH-3B3B #
@ -8831,6 +8833,8 @@ F953 ; 808B ; MA # ( 肋 → 肋 ) CJK COMPATIBILITY IDEOGRAPH-F953 → CJK UNIF
2F984 ; 440B ; MA # ( 䐋 → 䐋 ) CJK COMPATIBILITY IDEOGRAPH-2F984 → CJK UNIFIED IDEOGRAPH-440B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
2F987 ; 267A7 ; MA # ( 𦞧 → 𦞧 ) CJK COMPATIBILITY IDEOGRAPH-2F987 → CJK UNIFIED IDEOGRAPH-267A7 #
2F988 ; 267B5 ; MA # ( 𦞵 → 𦞵 ) CJK COMPATIBILITY IDEOGRAPH-2F988 → CJK UNIFIED IDEOGRAPH-267B5 #
@ -9630,9 +9634,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
# total: 6311

View File

@ -357,10 +357,15 @@ public class RBBITestMonkey extends TestFmwk {
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]");
fNumericSet = new UnicodeSet("[[\\p{Word_Break = Numeric}][\\uFF10-\\uff19]]");
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]");
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]");
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]");
// There are some sc=Hani characters with WB=Extend.
// The break rules need to pick one or the other because
// Extend overlapping with something else is messy.
// For Unicode 13, we chose to keep U+16FF0 & U+16FF1
// in $Han (for $dictionary) and out of $Extend.
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}-[:Hani:]]");
fWSegSpaceSet = new UnicodeSet("[\\p{Word_Break = WSegSpace}]");
fZWJSet = new UnicodeSet("[\\p{Word_Break = ZWJ}]");
fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]");

View File

@ -14,11 +14,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -30,14 +31,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet}];
MidLetter = [\p{Word_Break = MidLetter}];
MidNum = [\p{Word_Break = MidNum}];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -13,11 +13,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en_US_POSIX;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -29,14 +30,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
MidNum = [\p{Word_Break = MidNum} [.]];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];