ICU-20893 Unicode 13 data 2020feb19
This commit is contained in:
parent
d95621c57f
commit
af9ef2650b
@ -323,7 +323,7 @@ static const uint16_t ubidi_props_trieIndex[12536]={
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,
|
||||
0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
|
||||
0x81,0x41,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
|
||||
0x81,0x41,0x81,0x81,0x81,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
|
||||
0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1,
|
||||
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
|
||||
|
@ -304,7 +304,7 @@ static const uint16_t ucase_props_trieIndex[12356]={
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
|
||||
0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,
|
||||
0,4,0,0,0,0,0,0,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
|
||||
0,4,0,0,0,0,0,4,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
|
||||
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
|
||||
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0x1719,1,0,0,0,
|
||||
0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x44,0x64,0x64,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -27,10 +27,12 @@
|
||||
# Character Class Definitions.
|
||||
#
|
||||
|
||||
$Han = [:Han:];
|
||||
|
||||
$CR = [\p{Word_Break = CR}];
|
||||
$LF = [\p{Word_Break = LF}];
|
||||
$Newline = [\p{Word_Break = Newline} ];
|
||||
$Extend = [\p{Word_Break = Extend}];
|
||||
$Newline = [\p{Word_Break = Newline}];
|
||||
$Extend = [\p{Word_Break = Extend}-$Han];
|
||||
$ZWJ = [\p{Word_Break = ZWJ}];
|
||||
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
||||
$Format = [\p{Word_Break = Format}];
|
||||
@ -42,12 +44,11 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
$MidLetter = [\p{Word_Break = MidLetter}];
|
||||
$MidNum = [\p{Word_Break = MidNum}];
|
||||
$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079
|
||||
$Numeric = [\p{Word_Break = Numeric}];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
$WSegSpace = [\p{Word_Break = WSegSpace}];
|
||||
$Extended_Pict = [\p{Extended_Pictographic}];
|
||||
|
||||
$Han = [:Han:];
|
||||
$Hiragana = [:Hiragana:];
|
||||
$Ideographic = [\p{Ideographic}];
|
||||
|
||||
|
@ -27,10 +27,12 @@
|
||||
# Character Class Definitions.
|
||||
#
|
||||
|
||||
$Han = [:Han:];
|
||||
|
||||
$CR = [\p{Word_Break = CR}];
|
||||
$LF = [\p{Word_Break = LF}];
|
||||
$Newline = [\p{Word_Break = Newline} ];
|
||||
$Extend = [\p{Word_Break = Extend}];
|
||||
$Newline = [\p{Word_Break = Newline}];
|
||||
$Extend = [\p{Word_Break = Extend}-$Han];
|
||||
$ZWJ = [\p{Word_Break = ZWJ}];
|
||||
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
||||
$Format = [\p{Word_Break = Format}];
|
||||
@ -42,12 +44,11 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
|
||||
$MidLetter = [\p{Word_Break = MidLetter} - [\:]];
|
||||
$MidNum = [\p{Word_Break = MidNum} [.]];
|
||||
$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079
|
||||
$Numeric = [\p{Word_Break = Numeric}];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
$WSegSpace = [\p{Word_Break = WSegSpace}];
|
||||
$Extended_Pict = [\p{Extended_Pictographic}];
|
||||
|
||||
$Han = [:Han:];
|
||||
$Hiragana = [:Hiragana:];
|
||||
$Ideographic = [\p{Ideographic}];
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,6 +1,6 @@
|
||||
# DerivedCoreProperties-13.0.0.txt
|
||||
# Date: 2019-10-21, 14:30:30 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-01-22, 00:07:19 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -2873,6 +2873,7 @@ FF41..FF5A ; Cased
|
||||
0483..0487 ; Case_Ignorable
|
||||
0488..0489 ; Case_Ignorable
|
||||
0559 ; Case_Ignorable
|
||||
055F ; Case_Ignorable
|
||||
0591..05BD ; Case_Ignorable
|
||||
05BF ; Case_Ignorable
|
||||
05C1..05C2 ; Case_Ignorable
|
||||
@ -3303,7 +3304,7 @@ E0001 ; Case_Ignorable
|
||||
E0020..E007F ; Case_Ignorable
|
||||
E0100..E01EF ; Case_Ignorable
|
||||
|
||||
# Total code points: 2412
|
||||
# Total code points: 2413
|
||||
|
||||
# ================================================
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
# UCA_Rules_SHORT.txt
|
||||
# Date: 2019-11-08, 22:14:11 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-12, 17:50:33 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# UCA Version: 13.0.0
|
||||
@ -19518,6 +19518,7 @@
|
||||
< ꦱ
|
||||
< ꦲ
|
||||
< ꦴ
|
||||
<<< ꦵ
|
||||
< ꦼ
|
||||
< ꦶ
|
||||
< ꦷ
|
||||
@ -19526,7 +19527,6 @@
|
||||
< ꦽ
|
||||
< ꦺ
|
||||
< ꦻ
|
||||
< ꦵ
|
||||
< ꧀
|
||||
< ᢀ
|
||||
< ᢁ
|
||||
|
@ -62,7 +62,7 @@ https://unicode-org.atlassian.net/browse/ICU-20893
|
||||
|
||||
* Command-line environment setup
|
||||
|
||||
UNICODE_DATA=~/unidata/uni13/20191106
|
||||
UNICODE_DATA=~/unidata/uni13/20200212
|
||||
CLDR_SRC=~/cldr/uni/src
|
||||
ICU_ROOT=~/icu/uni
|
||||
ICU_SRC=$ICU_ROOT/src
|
||||
@ -89,9 +89,12 @@ export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
|
||||
- download Unicode files into $UNICODE_DATA
|
||||
+ subfolders: emoji, idna, security, ucd, uca
|
||||
+ inside ucd: extract Unihan.zip to "here" (.../ucd/Unihan/*.txt), delete Unihan.zip
|
||||
+ split Unihan into single-property files
|
||||
~/unitools/trunk/src$ py/splitunihan.py $UNICODE_DATA/ucd/Unihan
|
||||
+ get GraphemeBreakTest-cldr.txt from $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt
|
||||
or from the ucd/cldr/ output folder of the Unicode Tools:
|
||||
Since Unicode 12/CLDR 35/ICU 64 CLDR uses modified break rules.
|
||||
cp $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt icu4c/source/test/testdata
|
||||
|
||||
* for manual diffs and for Unicode Tools input data updates:
|
||||
remove version suffixes from the file names
|
||||
@ -155,7 +158,7 @@ export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
|
||||
$ICU_ROOT/dbg/icu4c$ echo;echo; date; make -j7 install &> out.txt ; tail -n 30 out.txt ; date
|
||||
|
||||
* update spoof checker UnicodeSet initializers:
|
||||
inclusionPat & recommendedPat in uspoof.cpp
|
||||
inclusionPat & recommendedPat in i18n/uspoof.cpp
|
||||
INCLUSION & RECOMMENDED in SpoofChecker.java
|
||||
- make sure that the Unicode Tools tree contains the latest security data files
|
||||
- go to Unicode Tools org.unicode.text.tools.RecommendedSetGenerator
|
||||
|
@ -1,6 +1,6 @@
|
||||
# confusables.txt
|
||||
# Date: 2019-10-22, 13:05:29 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-13, 01:38:49 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -1358,6 +1358,10 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
|
||||
|
||||
266A ; 1D158 1D165 1D16E ; MA #* ( ♪ → 𝅘𝅥𝅮 ) EIGHTH NOTE → MUSICAL SYMBOL NOTEHEAD BLACK, MUSICAL SYMBOL COMBINING STEM, MUSICAL SYMBOL COMBINING FLAG-1 #
|
||||
|
||||
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
|
||||
|
||||
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
|
||||
|
||||
02D9 ; 0971 ; MA #* ( ˙ → ॱ ) DOT ABOVE → DEVANAGARI SIGN HIGH SPACING DOT #
|
||||
0D4E ; 0971 ; MA # ( ൎ → ॱ ) MALAYALAM LETTER DOT REPH → DEVANAGARI SIGN HIGH SPACING DOT # →˙→
|
||||
|
||||
@ -1418,13 +1422,13 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO
|
||||
1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO #
|
||||
1D7EE ; 0032 ; MA # ( 𝟮 → 2 ) MATHEMATICAL SANS-SERIF BOLD DIGIT TWO → DIGIT TWO #
|
||||
1D7F8 ; 0032 ; MA # ( 𝟸 → 2 ) MATHEMATICAL MONOSPACE DIGIT TWO → DIGIT TWO #
|
||||
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
|
||||
A75A ; 0032 ; MA # ( Ꝛ → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
|
||||
01A7 ; 0032 ; MA # ( Ƨ → 2 ) LATIN CAPITAL LETTER TONE TWO → DIGIT TWO #
|
||||
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
|
||||
A644 ; 0032 ; MA # ( Ꙅ → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
|
||||
14BF ; 0032 ; MA # ( ᒿ → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
|
||||
A6EF ; 0032 ; MA # ( ꛯ → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
|
||||
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
|
||||
|
||||
A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
|
||||
06F2 ; 0662 ; MA # ( ۲ → ٢ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
|
||||
@ -1491,6 +1495,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
|
||||
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
|
||||
1D7EF ; 0033 ; MA # ( 𝟯 → 3 ) MATHEMATICAL SANS-SERIF BOLD DIGIT THREE → DIGIT THREE #
|
||||
1D7F9 ; 0033 ; MA # ( 𝟹 → 3 ) MATHEMATICAL MONOSPACE DIGIT THREE → DIGIT THREE #
|
||||
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
|
||||
A7AB ; 0033 ; MA # ( Ɜ → 3 ) LATIN CAPITAL LETTER REVERSED OPEN E → DIGIT THREE #
|
||||
021C ; 0033 ; MA # ( Ȝ → 3 ) LATIN CAPITAL LETTER YOGH → DIGIT THREE # →Ʒ→
|
||||
01B7 ; 0033 ; MA # ( Ʒ → 3 ) LATIN CAPITAL LETTER EZH → DIGIT THREE #
|
||||
@ -1500,7 +1505,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→
|
||||
16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→
|
||||
118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE #
|
||||
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
|
||||
|
||||
06F3 ; 0663 ; MA # ( ۳ → ٣ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE #
|
||||
1E8C9 ; 0663 ; MA #* ( 𞣉 → ٣ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE #
|
||||
@ -1530,9 +1534,9 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR #
|
||||
1D7F0 ; 0034 ; MA # ( 𝟰 → 4 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR → DIGIT FOUR #
|
||||
1D7FA ; 0034 ; MA # ( 𝟺 → 4 ) MATHEMATICAL MONOSPACE DIGIT FOUR → DIGIT FOUR #
|
||||
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
|
||||
13CE ; 0034 ; MA # ( Ꮞ → 4 ) CHEROKEE LETTER SE → DIGIT FOUR #
|
||||
118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR #
|
||||
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
|
||||
|
||||
06F4 ; 0664 ; MA # ( ۴ → ٤ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR #
|
||||
|
||||
@ -1557,9 +1561,9 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE #
|
||||
1D7F1 ; 0035 ; MA # ( 𝟱 → 5 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE → DIGIT FIVE #
|
||||
1D7FB ; 0035 ; MA # ( 𝟻 → 5 ) MATHEMATICAL MONOSPACE DIGIT FIVE → DIGIT FIVE #
|
||||
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
|
||||
01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE #
|
||||
118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE #
|
||||
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
|
||||
|
||||
2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE #
|
||||
|
||||
@ -1578,11 +1582,11 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX #
|
||||
1D7F2 ; 0036 ; MA # ( 𝟲 → 6 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SIX → DIGIT SIX #
|
||||
1D7FC ; 0036 ; MA # ( 𝟼 → 6 ) MATHEMATICAL MONOSPACE DIGIT SIX → DIGIT SIX #
|
||||
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
|
||||
2CD2 ; 0036 ; MA # ( Ⳓ → 6 ) COPTIC CAPITAL LETTER OLD COPTIC HEI → DIGIT SIX #
|
||||
0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX #
|
||||
13EE ; 0036 ; MA # ( Ꮾ → 6 ) CHEROKEE LETTER WV → DIGIT SIX #
|
||||
118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX #
|
||||
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
|
||||
|
||||
06F6 ; 0666 ; MA # ( ۶ → ٦ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX #
|
||||
|
||||
@ -1606,9 +1610,9 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
|
||||
1D7F3 ; 0037 ; MA # ( 𝟳 → 7 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN → DIGIT SEVEN #
|
||||
1D7FD ; 0037 ; MA # ( 𝟽 → 7 ) MATHEMATICAL MONOSPACE DIGIT SEVEN → DIGIT SEVEN #
|
||||
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
|
||||
104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN #
|
||||
118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN #
|
||||
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
|
||||
|
||||
2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN #
|
||||
|
||||
@ -1631,10 +1635,10 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT #
|
||||
1D7F4 ; 0038 ; MA # ( 𝟴 → 8 ) MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT → DIGIT EIGHT #
|
||||
1D7FE ; 0038 ; MA # ( 𝟾 → 8 ) MATHEMATICAL MONOSPACE DIGIT EIGHT → DIGIT EIGHT #
|
||||
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
|
||||
0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT #
|
||||
0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT #
|
||||
1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT #
|
||||
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
|
||||
|
||||
0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT #
|
||||
|
||||
@ -1659,12 +1663,12 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
|
||||
1D7F5 ; 0039 ; MA # ( 𝟵 → 9 ) MATHEMATICAL SANS-SERIF BOLD DIGIT NINE → DIGIT NINE #
|
||||
1D7FF ; 0039 ; MA # ( 𝟿 → 9 ) MATHEMATICAL MONOSPACE DIGIT NINE → DIGIT NINE #
|
||||
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
|
||||
A76E ; 0039 ; MA # ( Ꝯ → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE #
|
||||
2CCA ; 0039 ; MA # ( Ⳋ → 9 ) COPTIC CAPITAL LETTER DIALECT-P HORI → DIGIT NINE #
|
||||
118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE #
|
||||
118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE #
|
||||
118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE #
|
||||
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
|
||||
|
||||
0967 ; 0669 ; MA # ( १ → ٩ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE #
|
||||
118E4 ; 0669 ; MA # ( 𑣤 → ٩ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE #
|
||||
@ -2544,6 +2548,7 @@ FFE8 ; 006C ; MA #* ( │ → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL
|
||||
1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L #
|
||||
FF29 ; 006C ; MA # ( I → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→
|
||||
2160 ; 006C ; MA # ( Ⅰ → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→
|
||||
@ -2601,7 +2606,6 @@ A4F2 ; 006C ; MA # ( ꓲ → l ) LISU LETTER I → LATIN SMALL LETTER L # →I
|
||||
16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→
|
||||
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
|
||||
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
|
||||
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
|
||||
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
|
||||
216C ; 004C ; MA # ( Ⅼ → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
|
||||
@ -2972,6 +2976,7 @@ FBA6 ; 006F ; MA # ( ﮦ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM →
|
||||
1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
1D7EC ; 004F ; MA # ( 𝟬 → O ) MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
FF2F ; 004F ; MA # ( O → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→
|
||||
1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O #
|
||||
1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O #
|
||||
@ -3005,7 +3010,6 @@ A4F3 ; 004F ; MA # ( ꓳ → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
|
||||
102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O #
|
||||
10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O #
|
||||
10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O #
|
||||
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
|
||||
2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR #
|
||||
1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→
|
||||
@ -8024,8 +8028,6 @@ FA92 ; 6717 ; MA # ( 朗 → 朗 ) CJK COMPATIBILITY IDEOGRAPH-FA92 → CJK UNIF
|
||||
FA93 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-FA93 → CJK UNIFIED IDEOGRAPH-671B #
|
||||
2F8D9 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-2F8D9 → CJK UNIFIED IDEOGRAPH-671B #
|
||||
|
||||
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
|
||||
|
||||
5E50 ; 3B3A ; MA # ( 幐 → 㬺 ) CJK UNIFIED IDEOGRAPH-5E50 → CJK UNIFIED IDEOGRAPH-3B3A #
|
||||
|
||||
4420 ; 3B3B ; MA # ( 䐠 → 㬻 ) CJK UNIFIED IDEOGRAPH-4420 → CJK UNIFIED IDEOGRAPH-3B3B #
|
||||
@ -8831,6 +8833,8 @@ F953 ; 808B ; MA # ( 肋 → 肋 ) CJK COMPATIBILITY IDEOGRAPH-F953 → CJK UNIF
|
||||
|
||||
2F984 ; 440B ; MA # ( 䐋 → 䐋 ) CJK COMPATIBILITY IDEOGRAPH-2F984 → CJK UNIFIED IDEOGRAPH-440B #
|
||||
|
||||
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
|
||||
|
||||
2F987 ; 267A7 ; MA # ( 𦞧 → 𦞧 ) CJK COMPATIBILITY IDEOGRAPH-2F987 → CJK UNIFIED IDEOGRAPH-267A7 #
|
||||
|
||||
2F988 ; 267B5 ; MA # ( 𦞵 → 𦞵 ) CJK COMPATIBILITY IDEOGRAPH-2F988 → CJK UNIFIED IDEOGRAPH-267B5 #
|
||||
@ -9630,9 +9634,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
|
||||
|
||||
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
|
||||
|
||||
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
|
||||
|
||||
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
|
||||
|
||||
# total: 6311
|
||||
|
||||
|
@ -1718,7 +1718,7 @@ cp;01C0;-Cased;-CWCM;gc=Lo;na=LATIN LETTER DENTAL CLICK;SB=LE
|
||||
cp;01C1;-Cased;-CWCM;gc=Lo;na=LATIN LETTER LATERAL CLICK;SB=LE
|
||||
cp;01C2;-Cased;-CWCM;gc=Lo;na=LATIN LETTER ALVEOLAR CLICK;SB=LE
|
||||
cp;01C3;-Cased;-CWCM;gc=Lo;na=LATIN LETTER RETROFLEX CLICK;SB=LE
|
||||
# Croatian digraphs matching Serbian Cyrillic letters
|
||||
# Latin digraphs matching Serbian Cyrillic letters
|
||||
cp;01C4;cf=01C6;CWCF;CWKCF;CWL;CWT;dm=0044 017D;dt=Com;na=LATIN CAPITAL LETTER DZ WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;scf=01C6;slc=01C6;stc=01C5;Upper
|
||||
cp;01C5;cf=01C6;CWCF;CWKCF;CWL;CWU;dm=0044 017E;dt=Com;gc=Lt;na=LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;scf=01C6;slc=01C6;stc=01C5;suc=01C4
|
||||
cp;01C6;CWKCF;CWT;CWU;dm=0064 017E;dt=Com;gc=Ll;Lower;na=LATIN SMALL LETTER DZ WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;SB=LO;stc=01C5;suc=01C4
|
||||
@ -2034,14 +2034,14 @@ cp;02E2;Alpha;bc=L;Cased;CWKCF;dm=0073;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LE
|
||||
cp;02E3;Alpha;bc=L;Cased;CWKCF;dm=0078;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LETTER SMALL X;NFKC_CF=0078;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;XIDC;XIDS
|
||||
cp;02E4;Alpha;bc=L;Cased;CWKCF;dm=0295;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LETTER SMALL REVERSED GLOTTAL STOP;NFKC_CF=0295;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;XIDC;XIDS
|
||||
# Tone letters
|
||||
cp;02E5;na=MODIFIER LETTER EXTRA-HIGH TONE BAR;WB=XX
|
||||
cp;02E6;na=MODIFIER LETTER HIGH TONE BAR;WB=XX
|
||||
cp;02E7;na=MODIFIER LETTER MID TONE BAR;WB=XX
|
||||
cp;02E8;na=MODIFIER LETTER LOW TONE BAR;WB=XX
|
||||
cp;02E9;na=MODIFIER LETTER EXTRA-LOW TONE BAR;WB=XX
|
||||
cp;02E5;na=MODIFIER LETTER EXTRA-HIGH TONE BAR
|
||||
cp;02E6;na=MODIFIER LETTER HIGH TONE BAR
|
||||
cp;02E7;na=MODIFIER LETTER MID TONE BAR
|
||||
cp;02E8;na=MODIFIER LETTER LOW TONE BAR
|
||||
cp;02E9;na=MODIFIER LETTER EXTRA-LOW TONE BAR
|
||||
# Extended Bopomofo tone marks
|
||||
cp;02EA;age=3.0;na=MODIFIER LETTER YIN DEPARTING TONE MARK;sc=Bopo;vo=U;WB=XX
|
||||
cp;02EB;age=3.0;na=MODIFIER LETTER YANG DEPARTING TONE MARK;sc=Bopo;vo=U;WB=XX
|
||||
cp;02EA;age=3.0;na=MODIFIER LETTER YIN DEPARTING TONE MARK;sc=Bopo;vo=U
|
||||
cp;02EB;age=3.0;na=MODIFIER LETTER YANG DEPARTING TONE MARK;sc=Bopo;vo=U
|
||||
# IPA modifiers
|
||||
cp;02EC;age=3.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER VOICING;SB=LE;XIDC;XIDS
|
||||
cp;02ED;age=3.0;na=MODIFIER LETTER UNASPIRATED
|
||||
@ -2734,12 +2734,12 @@ cp;0556;cf=0586;CWCF;CWKCF;CWL;gc=Lu;na=ARMENIAN CAPITAL LETTER FEH;NFKC_CF=0586
|
||||
unassigned;0557..0558
|
||||
# Modifier letters
|
||||
cp;0559;-Cased;CI;-CWCM;Dia;gc=Lm;na=ARMENIAN MODIFIER LETTER LEFT HALF RING;SB=LE
|
||||
cp;055A;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN APOSTROPHE;SB=XX;WB=XX;-XIDC;-XIDS
|
||||
cp;055A;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN APOSTROPHE;SB=XX;-XIDC;-XIDS
|
||||
cp;055B;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN EMPHASIS MARK;SB=XX;-XIDC;-XIDS
|
||||
cp;055C;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN EXCLAMATION MARK;SB=XX;-XIDC;-XIDS
|
||||
cp;055D;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN COMMA;SB=SC;WB=XX;-XIDC;-XIDS
|
||||
cp;055E;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN QUESTION MARK;SB=XX;-XIDC;-XIDS
|
||||
cp;055F;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN ABBREVIATION MARK;SB=XX;WB=XX;-XIDC;-XIDS
|
||||
cp;055F;-Alpha;-Cased;CI;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN ABBREVIATION MARK;SB=XX;WB=ML;-XIDC;-XIDS
|
||||
# Lowercase letters
|
||||
cp;0560;age=11.0;-CWCM;Lower;na=ARMENIAN SMALL LETTER TURNED AYB
|
||||
cp;0561;CWT;CWU;Lower;na=ARMENIAN SMALL LETTER AYB;stc=0531;suc=0531
|
||||
@ -2783,8 +2783,8 @@ cp;0586;CWT;CWU;Lower;na=ARMENIAN SMALL LETTER FEH;stc=0556;suc=0556
|
||||
cp;0587;cf=0565 0582;CWCF;CWKCF;CWT;CWU;dm=0565 0582;dt=Com;lc=0587;Lower;na=ARMENIAN SMALL LIGATURE ECH YIWN;NFKC_CF=0565 0582;NFKC_QC=N;NFKD_QC=N;tc=0535 0582;uc=0535 0552
|
||||
cp;0588;age=11.0;-CWCM;Lower;na=ARMENIAN SMALL LETTER YI WITH STROKE
|
||||
# Punctuation
|
||||
cp;0589;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;lb=IS;na=ARMENIAN FULL STOP;SB=ST;sc=Zyyy;scx=Armn Geor;STerm;Term;WB=MN;-XIDC;-XIDS
|
||||
cp;058A;age=3.0;-Alpha;bc=ON;-Cased;-CWCM;Dash;gc=Pd;Hyphen;-IDC;-IDS;lb=BA;na=ARMENIAN HYPHEN;SB=XX;WB=XX;-XIDC;-XIDS
|
||||
cp;0589;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;lb=IS;na=ARMENIAN FULL STOP;SB=ST;STerm;Term;WB=MN;-XIDC;-XIDS
|
||||
cp;058A;age=3.0;-Alpha;bc=ON;-Cased;-CWCM;Dash;gc=Pd;Hyphen;-IDC;-IDS;lb=BA;na=ARMENIAN HYPHEN;SB=XX;-XIDC;-XIDS
|
||||
unassigned;058B..058C
|
||||
# Religious symbols
|
||||
cp;058D;age=7.0;-Alpha;bc=ON;-Cased;-CWCM;gc=So;-IDC;-IDS;na=RIGHT-FACING ARMENIAN ETERNITY SIGN;SB=XX;WB=XX;-XIDC;-XIDS
|
||||
@ -3567,9 +3567,9 @@ cp;0852;na=MANDAIC LETTER AQ
|
||||
cp;0853;na=MANDAIC LETTER AR
|
||||
cp;0854;jt=R;na=MANDAIC LETTER ASH
|
||||
cp;0855;na=MANDAIC LETTER AT
|
||||
cp;0856;jt=U;na=MANDAIC LETTER DUSHENNA
|
||||
cp;0857;jt=U;na=MANDAIC LETTER KAD
|
||||
cp;0858;jt=U;na=MANDAIC LETTER AIN
|
||||
cp;0856;jt=R;na=MANDAIC LETTER DUSHENNA
|
||||
cp;0857;jt=R;na=MANDAIC LETTER KAD
|
||||
cp;0858;jt=R;na=MANDAIC LETTER AIN
|
||||
# Diacritics
|
||||
cp;0859;-Alpha;bc=NSM;ccc=220;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;-IDS;jt=T;lb=CM;na=MANDAIC AFFRICATION MARK;SB=EX;WB=Extend;-XIDS
|
||||
cp;085A;-Alpha;bc=NSM;ccc=220;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;-IDS;jt=T;lb=CM;na=MANDAIC VOCALIZATION MARK;SB=EX;WB=Extend;-XIDS
|
||||
@ -9102,10 +9102,10 @@ cp;1DF5;age=7.0;Dia;na=COMBINING UP TACK ABOVE
|
||||
# Typicon marks
|
||||
cp;1DF6;age=10.0;ccc=232;Dia;na=COMBINING KAVYKA ABOVE RIGHT
|
||||
cp;1DF7;age=10.0;ccc=228;Dia;na=COMBINING KAVYKA ABOVE LEFT
|
||||
cp;1DF8;age=10.0;ccc=228;Dia;na=COMBINING DOT ABOVE LEFT
|
||||
# Miscellaneous marks
|
||||
cp;1DF8;age=10.0;ccc=228;Dia;na=COMBINING DOT ABOVE LEFT;scx=Cyrl Syrc
|
||||
cp;1DF9;age=10.0;ccc=220;Dia;na=COMBINING WIDE INVERTED BRIDGE BELOW
|
||||
unassigned;1DFA
|
||||
# Miscellaneous mark
|
||||
cp;1DFB;age=9.0;InPC=Top;InSC=Syllable_Modifier;na=COMBINING DELETION MARK
|
||||
# Double diacritic mark for UPA
|
||||
cp;1DFC;age=6.0;ccc=233;na=COMBINING DOUBLE INVERTED BREVE BELOW
|
||||
@ -9682,9 +9682,10 @@ cp;2024;CI;CWKCF;dm=002E;dt=Com;ea=A;lb=IN;na=ONE DOT LEADER;NFKC_CF=002E;NFKC_Q
|
||||
cp;2025;CWKCF;dm=002E 002E;dt=Com;ea=A;lb=IN;na=TWO DOT LEADER;NFKC_CF=002E 002E;NFKC_QC=N;NFKD_QC=N
|
||||
cp;2026;CWKCF;dm=002E 002E 002E;dt=Com;ea=A;lb=IN;na=HORIZONTAL ELLIPSIS;NFKC_CF=002E 002E 002E;NFKC_QC=N;NFKD_QC=N
|
||||
cp;2027;CI;ea=A;lb=BA;na=HYPHENATION POINT;WB=ML
|
||||
# Format characters
|
||||
# Separators
|
||||
cp;2028;bc=WS;gc=Zl;GCB=CN;-Gr_Base;lb=BK;na=LINE SEPARATOR;-Pat_Syn;Pat_WS;SB=SE;WB=NL;WSpace
|
||||
cp;2029;bc=B;gc=Zp;GCB=CN;-Gr_Base;lb=BK;na=PARAGRAPH SEPARATOR;-Pat_Syn;Pat_WS;SB=SE;WB=NL;WSpace
|
||||
# Format characters
|
||||
cp;202A;bc=LRE;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=LEFT-TO-RIGHT EMBEDDING;Name_Alias=abbreviation=LRE;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
|
||||
cp;202B;bc=RLE;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=RIGHT-TO-LEFT EMBEDDING;Name_Alias=abbreviation=RLE;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
|
||||
cp;202C;bc=PDF;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=POP DIRECTIONAL FORMATTING;Name_Alias=abbreviation=PDF;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
|
||||
@ -11837,8 +11838,9 @@ cp;27C4;age=4.1;bmg=27C3;na=OPEN SUPERSET
|
||||
# Paired punctuation
|
||||
cp;27C5;age=4.1;bmg=27C6;bpb=27C6;bpt=o;gc=Ps;lb=OP;na=LEFT S-SHAPED BAG DELIMITER;SB=CL
|
||||
cp;27C6;age=4.1;bmg=27C5;bpb=27C5;bpt=c;gc=Pe;lb=CL;na=RIGHT S-SHAPED BAG DELIMITER;SB=CL
|
||||
# Miscellaneous symbols
|
||||
# Operator
|
||||
cp;27C7;age=5.0;-Bidi_M;na=OR WITH DOT INSIDE
|
||||
# Miscellaneous symbols
|
||||
cp;27C8;age=5.0;bmg=27C9;na=REVERSE SOLIDUS PRECEDING SUBSET
|
||||
cp;27C9;age=5.0;bmg=27C8;na=SUPERSET PRECEDING SOLIDUS
|
||||
# Vertical line operator
|
||||
@ -17107,17 +17109,17 @@ cp;A6F6;-Alpha;gc=Po;-IDC;-IDS;lb=BA;na=BAMUM SEMICOLON;SB=XX;Term;WB=XX;-XIDC;-
|
||||
cp;A6F7;-Alpha;gc=Po;-IDC;-IDS;lb=BA;na=BAMUM QUESTION MARK;SB=ST;STerm;Term;WB=XX;-XIDC;-XIDS
|
||||
unassigned;A6F8..A6FF
|
||||
|
||||
block;A700..A71F;age=4.1;bc=ON;blk=Modifier_Tone_Letters;CI;Dia;gc=Sk;Gr_Base;lb=AL;sc=Zyyy
|
||||
block;A700..A71F;age=4.1;bc=ON;blk=Modifier_Tone_Letters;CI;Dia;gc=Sk;Gr_Base;lb=AL;sc=Zyyy;WB=LE
|
||||
# A700..A71F Modifier Tone Letters
|
||||
# Corner tone marks for Chinese
|
||||
cp;A700;na=MODIFIER LETTER CHINESE TONE YIN PING
|
||||
cp;A701;na=MODIFIER LETTER CHINESE TONE YANG PING
|
||||
cp;A702;na=MODIFIER LETTER CHINESE TONE YIN SHANG
|
||||
cp;A703;na=MODIFIER LETTER CHINESE TONE YANG SHANG
|
||||
cp;A704;na=MODIFIER LETTER CHINESE TONE YIN QU
|
||||
cp;A705;na=MODIFIER LETTER CHINESE TONE YANG QU
|
||||
cp;A706;na=MODIFIER LETTER CHINESE TONE YIN RU
|
||||
cp;A707;na=MODIFIER LETTER CHINESE TONE YANG RU
|
||||
cp;A700;na=MODIFIER LETTER CHINESE TONE YIN PING;scx=Hani Latn;WB=XX
|
||||
cp;A701;na=MODIFIER LETTER CHINESE TONE YANG PING;scx=Hani Latn;WB=XX
|
||||
cp;A702;na=MODIFIER LETTER CHINESE TONE YIN SHANG;scx=Hani Latn;WB=XX
|
||||
cp;A703;na=MODIFIER LETTER CHINESE TONE YANG SHANG;scx=Hani Latn;WB=XX
|
||||
cp;A704;na=MODIFIER LETTER CHINESE TONE YIN QU;scx=Hani Latn;WB=XX
|
||||
cp;A705;na=MODIFIER LETTER CHINESE TONE YANG QU;scx=Hani Latn;WB=XX
|
||||
cp;A706;na=MODIFIER LETTER CHINESE TONE YIN RU;scx=Hani Latn;WB=XX
|
||||
cp;A707;na=MODIFIER LETTER CHINESE TONE YANG RU;scx=Hani Latn;WB=XX
|
||||
# Dotted tone letters
|
||||
cp;A708;na=MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR
|
||||
cp;A709;na=MODIFIER LETTER HIGH DOTTED TONE BAR
|
||||
@ -17136,16 +17138,16 @@ cp;A714;na=MODIFIER LETTER MID LEFT-STEM TONE BAR
|
||||
cp;A715;na=MODIFIER LETTER LOW LEFT-STEM TONE BAR
|
||||
cp;A716;na=MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
|
||||
# Chinantec tone marks
|
||||
cp;A717;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT VERTICAL BAR;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A718;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT SLASH;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A719;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT HORIZONTAL BAR;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A71A;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOWER RIGHT CORNER ANGLE;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A717;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT VERTICAL BAR;SB=LE;XIDC;XIDS
|
||||
cp;A718;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT SLASH;SB=LE;XIDC;XIDS
|
||||
cp;A719;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT HORIZONTAL BAR;SB=LE;XIDC;XIDS
|
||||
cp;A71A;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOWER RIGHT CORNER ANGLE;SB=LE;XIDC;XIDS
|
||||
# Africanist tone letters
|
||||
cp;A71B;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED UP ARROW;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A71C;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED DOWN ARROW;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A71D;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A71E;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A71F;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOW INVERTED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
|
||||
cp;A71B;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED UP ARROW;SB=LE;XIDC;XIDS
|
||||
cp;A71C;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED DOWN ARROW;SB=LE;XIDC;XIDS
|
||||
cp;A71D;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED EXCLAMATION MARK;SB=LE;XIDC;XIDS
|
||||
cp;A71E;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK;SB=LE;XIDC;XIDS
|
||||
cp;A71F;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOW INVERTED EXCLAMATION MARK;SB=LE;XIDC;XIDS
|
||||
|
||||
block;A720..A7FF;age=5.1;Alpha;blk=Latin_Ext_D;Cased;CWCM;gc=Ll;Gr_Base;IDC;IDS;lb=AL;SB=LO;sc=Latn;WB=LE;XIDC;XIDS
|
||||
# A720..A7FF Latin Extended-D
|
||||
@ -30483,21 +30485,21 @@ cp;16F9D;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-5;WB=LE
|
||||
cp;16F9E;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-6;WB=LE
|
||||
cp;16F9F;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-8;WB=LE
|
||||
|
||||
block;16FE0..16FFF;age=13.0;Alpha;blk=Ideographic_Symbols;ea=W;gc=Lm;Gr_Base;IDC;lb=NS;SB=LE;sc=Zyyy;vo=U;WB=Extend;XIDC
|
||||
block;16FE0..16FFF;age=13.0;Alpha;blk=Ideographic_Symbols;ea=W;gc=Lm;Gr_Base;IDC;lb=NS;SB=LE;sc=Hani;vo=U;WB=Extend;XIDC
|
||||
# 16FE0..16FFF Ideographic Symbols and Punctuation
|
||||
# Tangut mark
|
||||
cp;16FE0;age=9.0;CI;Ext;IDS;na=TANGUT ITERATION MARK;sc=Tang;WB=LE;XIDS
|
||||
# Nushu mark
|
||||
cp;16FE1;age=10.0;CI;Ext;IDS;na=NUSHU ITERATION MARK;sc=Nshu;WB=LE;XIDS
|
||||
# Marks used in ancient Chinese texts
|
||||
cp;16FE2;age=12.0;-Alpha;bc=ON;gc=Po;-IDC;na=OLD CHINESE HOOK MARK;SB=XX;WB=XX;-XIDC
|
||||
cp;16FE3;age=12.0;CI;Ext;IDS;na=OLD CHINESE ITERATION MARK;WB=LE;XIDS
|
||||
cp;16FE2;age=12.0;-Alpha;bc=ON;gc=Po;-IDC;na=OLD CHINESE HOOK MARK;SB=XX;sc=Zyyy;WB=XX;-XIDC
|
||||
cp;16FE3;age=12.0;CI;Ext;IDS;na=OLD CHINESE ITERATION MARK;sc=Zyyy;WB=LE;XIDS
|
||||
# Small Khitan format character
|
||||
cp;16FE4;-Alpha;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;Ideo;jt=T;lb=GL;na=KHITAN SMALL SCRIPT FILLER;SB=EX;sc=Kits
|
||||
unassigned;16FE5..16FEF;vo=U
|
||||
# Combining diacritics for CJK ideographs
|
||||
cp;16FF0;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK CA;SB=EX;sc=Zinh
|
||||
cp;16FF1;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK NHAY;SB=EX;sc=Zinh
|
||||
cp;16FF0;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK CA;SB=EX
|
||||
cp;16FF1;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK NHAY;SB=EX
|
||||
unassigned;16FF2..16FFF;vo=U
|
||||
|
||||
block;17000..187FF;Alpha;blk=Tangut;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=ID;SB=LE;sc=Tang;vo=U;XIDC;XIDS
|
||||
@ -31294,7 +31296,7 @@ cp;18AFD;age=13.0;na=TANGUT COMPONENT-766
|
||||
cp;18AFE;age=13.0;na=TANGUT COMPONENT-767
|
||||
cp;18AFF;age=13.0;na=TANGUT COMPONENT-768
|
||||
|
||||
block;18B00..18CFF;age=13.0;Alpha;blk=Khitan_Small_Script;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=ID;SB=LE;sc=Kits;vo=U;XIDC;XIDS
|
||||
block;18B00..18CFF;age=13.0;Alpha;blk=Khitan_Small_Script;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=AL;SB=LE;sc=Kits;vo=U;XIDC;XIDS
|
||||
# 18B00..18CFF Khitan Small Script
|
||||
# Iteration mark
|
||||
cp;18B00;na=KHITAN SMALL SCRIPT CHARACTER-18B00
|
||||
@ -38273,7 +38275,7 @@ cp;1F909;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING NOTCHED HOOK
|
||||
cp;1F90A;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING HOOK WITH DOT
|
||||
cp;1F90B;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING NOTCHED HOOK WITH DOT
|
||||
# Hand symbol
|
||||
cp;1F90C;age=13.0;EBase;na=PINCHED FINGERS
|
||||
cp;1F90C;age=13.0;EBase;lb=EB;na=PINCHED FINGERS
|
||||
# Colored heart symbols
|
||||
cp;1F90D;age=12.0;na=WHITE HEART
|
||||
cp;1F90E;age=12.0;na=BROWN HEART
|
||||
@ -38390,7 +38392,7 @@ cp;1F973;age=11.0;na=FACE WITH PARTY HORN AND PARTY HAT
|
||||
cp;1F974;age=11.0;na=FACE WITH UNEVEN EYES AND WAVY MOUTH
|
||||
cp;1F975;age=11.0;na=OVERHEATED FACE
|
||||
cp;1F976;age=11.0;na=FREEZING FACE
|
||||
cp;1F977;age=13.0;na=NINJA
|
||||
cp;1F977;age=13.0;EBase;lb=EB;na=NINJA
|
||||
cp;1F978;age=13.0;na=DISGUISED FACE
|
||||
unassigned;1F979;ExtPict;lb=ID;vo=U
|
||||
cp;1F97A;age=11.0;na=FACE WITH PLEADING EYES
|
||||
@ -38732,7 +38734,7 @@ cp;1FAD5;na=FONDUE
|
||||
cp;1FAD6;na=TEAPOT
|
||||
unassigned;1FAD7..1FAFF;ExtPict;lb=ID;vo=U
|
||||
|
||||
block;1FB00..1FBFF;age=13.0;bc=ON;blk=Symbols_For_Legacy_Computing;ExtPict;gc=So;Gr_Base;lb=ID;sc=Zyyy
|
||||
block;1FB00..1FBFF;age=13.0;bc=ON;blk=Symbols_For_Legacy_Computing;gc=So;Gr_Base;lb=AL;sc=Zyyy
|
||||
# 1FB00..1FBFF Symbols for Legacy Computing
|
||||
# Block mosaic terminal graphic characters
|
||||
cp;1FB00;na=BLOCK SEXTANT-1
|
||||
@ -38885,7 +38887,7 @@ cp;1FB8F;na=LOWER HALF MEDIUM SHADE
|
||||
cp;1FB90;na=INVERSE MEDIUM SHADE
|
||||
cp;1FB91;na=UPPER HALF BLOCK AND LOWER HALF INVERSE MEDIUM SHADE
|
||||
cp;1FB92;na=UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
|
||||
unassigned;1FB93;ExtPict;lb=ID
|
||||
unassigned;1FB93
|
||||
cp;1FB94;na=LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK
|
||||
# Fill characters
|
||||
cp;1FB95;na=CHECKER BOARD FILL
|
||||
@ -38951,19 +38953,19 @@ cp;1FBC7;na=STICK FIGURE LEANING LEFT
|
||||
cp;1FBC8;na=STICK FIGURE LEANING RIGHT
|
||||
cp;1FBC9;na=STICK FIGURE WITH DRESS
|
||||
cp;1FBCA;na=WHITE UP-POINTING CHEVRON
|
||||
unassigned;1FBCB..1FBEF;ExtPict;lb=ID
|
||||
unassigned;1FBCB..1FBEF
|
||||
# Segmented digits
|
||||
cp;1FBF0;bc=EN;CWKCF;dm=0030;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT ZERO;NFKC_CF=0030;NFKC_QC=N;NFKD_QC=N;nt=De;nv=0;XIDC
|
||||
cp;1FBF1;bc=EN;CWKCF;dm=0031;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT ONE;NFKC_CF=0031;NFKC_QC=N;NFKD_QC=N;nt=De;nv=1;XIDC
|
||||
cp;1FBF2;bc=EN;CWKCF;dm=0032;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT TWO;NFKC_CF=0032;NFKC_QC=N;NFKD_QC=N;nt=De;nv=2;XIDC
|
||||
cp;1FBF3;bc=EN;CWKCF;dm=0033;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT THREE;NFKC_CF=0033;NFKC_QC=N;NFKD_QC=N;nt=De;nv=3;XIDC
|
||||
cp;1FBF4;bc=EN;CWKCF;dm=0034;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT FOUR;NFKC_CF=0034;NFKC_QC=N;NFKD_QC=N;nt=De;nv=4;XIDC
|
||||
cp;1FBF5;bc=EN;CWKCF;dm=0035;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT FIVE;NFKC_CF=0035;NFKC_QC=N;NFKD_QC=N;nt=De;nv=5;XIDC
|
||||
cp;1FBF6;bc=EN;CWKCF;dm=0036;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT SIX;NFKC_CF=0036;NFKC_QC=N;NFKD_QC=N;nt=De;nv=6;XIDC
|
||||
cp;1FBF7;bc=EN;CWKCF;dm=0037;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT SEVEN;NFKC_CF=0037;NFKC_QC=N;NFKD_QC=N;nt=De;nv=7;XIDC
|
||||
cp;1FBF8;bc=EN;CWKCF;dm=0038;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT EIGHT;NFKC_CF=0038;NFKC_QC=N;NFKD_QC=N;nt=De;nv=8;XIDC
|
||||
cp;1FBF9;bc=EN;CWKCF;dm=0039;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT NINE;NFKC_CF=0039;NFKC_QC=N;NFKD_QC=N;nt=De;nv=9;XIDC
|
||||
unassigned;1FBFA..1FBFF;ExtPict;lb=ID
|
||||
cp;1FBF0;bc=EN;CWKCF;dm=0030;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT ZERO;NFKC_CF=0030;NFKC_QC=N;NFKD_QC=N;nt=De;nv=0;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF1;bc=EN;CWKCF;dm=0031;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT ONE;NFKC_CF=0031;NFKC_QC=N;NFKD_QC=N;nt=De;nv=1;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF2;bc=EN;CWKCF;dm=0032;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT TWO;NFKC_CF=0032;NFKC_QC=N;NFKD_QC=N;nt=De;nv=2;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF3;bc=EN;CWKCF;dm=0033;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT THREE;NFKC_CF=0033;NFKC_QC=N;NFKD_QC=N;nt=De;nv=3;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF4;bc=EN;CWKCF;dm=0034;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT FOUR;NFKC_CF=0034;NFKC_QC=N;NFKD_QC=N;nt=De;nv=4;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF5;bc=EN;CWKCF;dm=0035;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT FIVE;NFKC_CF=0035;NFKC_QC=N;NFKD_QC=N;nt=De;nv=5;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF6;bc=EN;CWKCF;dm=0036;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT SIX;NFKC_CF=0036;NFKC_QC=N;NFKD_QC=N;nt=De;nv=6;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF7;bc=EN;CWKCF;dm=0037;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT SEVEN;NFKC_CF=0037;NFKC_QC=N;NFKD_QC=N;nt=De;nv=7;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF8;bc=EN;CWKCF;dm=0038;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT EIGHT;NFKC_CF=0038;NFKC_QC=N;NFKD_QC=N;nt=De;nv=8;SB=NU;WB=NU;XIDC
|
||||
cp;1FBF9;bc=EN;CWKCF;dm=0039;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT NINE;NFKC_CF=0039;NFKC_QC=N;NFKD_QC=N;nt=De;nv=9;SB=NU;WB=NU;XIDC
|
||||
unassigned;1FBFA..1FBFF
|
||||
|
||||
# No block
|
||||
unassigned;1FC00..1FF7F;ExtPict;lb=ID
|
||||
|
@ -102,25 +102,25 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
|
||||
u"\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-"
|
||||
u"\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8"
|
||||
u"\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD"
|
||||
u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02-"
|
||||
u"\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-"
|
||||
u"\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D81-"
|
||||
u"\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-"
|
||||
u"\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE"
|
||||
u"\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81"
|
||||
u"\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-"
|
||||
u"\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF"
|
||||
u"\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-"
|
||||
u"\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-"
|
||||
u"\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92"
|
||||
u"\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB"
|
||||
u"\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7"
|
||||
u"\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-"
|
||||
u"\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D"
|
||||
u"\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-"
|
||||
u"\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-"
|
||||
u"\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2"
|
||||
u"\\u17D7\\u17DC\\u17E0-\\u17E9\\u1ABF\\u1AC0\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
|
||||
u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02"
|
||||
u"\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-"
|
||||
u"\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-"
|
||||
u"\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-"
|
||||
u"\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6"
|
||||
u"\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-"
|
||||
u"\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-"
|
||||
u"\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9"
|
||||
u"\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-"
|
||||
u"\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-"
|
||||
u"\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84"
|
||||
u"\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
|
||||
u"\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-"
|
||||
u"\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-"
|
||||
u"\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288"
|
||||
u"\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-"
|
||||
u"\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-"
|
||||
u"\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-"
|
||||
u"\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
|
||||
u"\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-"
|
||||
u"\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70"
|
||||
u"\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA"
|
||||
@ -131,14 +131,14 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
|
||||
u"\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E"
|
||||
u"\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-"
|
||||
u"\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793"
|
||||
u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA7F5\\uA7F6\\uA9E7-\\uA9FE"
|
||||
u"\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
|
||||
u"\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66-\\uAB68\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
|
||||
u"\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
|
||||
u"\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-"
|
||||
u"\\U0001B152\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-"
|
||||
u"\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-"
|
||||
u"\\U0002EBE0\\U00030000-\\U0003134A]";
|
||||
u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76"
|
||||
u"\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26"
|
||||
u"\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13"
|
||||
u"\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303"
|
||||
u"\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152"
|
||||
u"\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734"
|
||||
u"\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0"
|
||||
u"\\U00030000-\\U0003134A]";
|
||||
|
||||
gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status);
|
||||
if (gRecommendedSet == NULL) {
|
||||
|
@ -1991,10 +1991,15 @@ RBBIWordMonkey::RBBIWordMonkey()
|
||||
fMidNumLetSet = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]", status);
|
||||
fMidLetterSet = new UnicodeSet(u"[\\p{Word_Break = MidLetter}]", status);
|
||||
fMidNumSet = new UnicodeSet(u"[\\p{Word_Break = MidNum}]", status);
|
||||
fNumericSet = new UnicodeSet(u"[[\\p{Word_Break = Numeric}][\\uff10-\\uff19]]", status);
|
||||
fNumericSet = new UnicodeSet(u"[\\p{Word_Break = Numeric}]", status);
|
||||
fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status);
|
||||
fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status);
|
||||
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status);
|
||||
// There are some sc=Hani characters with WB=Extend.
|
||||
// The break rules need to pick one or the other because
|
||||
// Extend overlapping with something else is messy.
|
||||
// For Unicode 13, we chose to keep U+16FF0 & U+16FF1
|
||||
// in $Han (for $dictionary) and out of $Extend.
|
||||
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}-[:Hani:]]", status);
|
||||
fWSegSpaceSet = new UnicodeSet(u"[\\p{Word_Break = WSegSpace}]", status);
|
||||
|
||||
fZWJSet = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]", status);
|
||||
|
@ -1,6 +1,6 @@
|
||||
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
|
||||
# Date: 2019-11-08, 22:14:17 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-12, 17:50:40 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# UCA Version: 13.0.0
|
||||
@ -945,9 +945,11 @@ FB1E 0334
|
||||
0652 0334
|
||||
0334 0653
|
||||
0653 0334
|
||||
0334 10EAC
|
||||
10EAC 0334
|
||||
0334 0654
|
||||
0654 0334
|
||||
0334 10EAB
|
||||
10EAB 0334
|
||||
0334 0655
|
||||
0655 0334
|
||||
@ -1111,6 +1113,7 @@ A9B3 0334
|
||||
116B7 0334
|
||||
0334 1183A
|
||||
1183A 0334
|
||||
0334 11943
|
||||
11943 0334
|
||||
0334 11D42
|
||||
11D42 0334
|
||||
@ -1232,7 +1235,9 @@ A92D 0334
|
||||
302E 0334
|
||||
0334 302F
|
||||
302F 0334
|
||||
0334 16FF0
|
||||
16FF0 0334
|
||||
0334 16FF1
|
||||
16FF1 0334
|
||||
0334 20D0
|
||||
20D0 0334
|
||||
@ -66995,6 +67000,7 @@ A75E 0062
|
||||
0057 0323 0334
|
||||
0057 0334 0323
|
||||
1E88 0334
|
||||
0334 1ABF
|
||||
1ABF 0334
|
||||
0334 1DF1
|
||||
1DF1 0334
|
||||
@ -67303,6 +67309,7 @@ A7C2 0062
|
||||
2C72 0041
|
||||
2C73 0062
|
||||
2C72 0062
|
||||
0334 1AC0
|
||||
1AC0 0334
|
||||
028D 0021
|
||||
1AC0 0021
|
||||
@ -92154,6 +92161,7 @@ A806 003F
|
||||
A806 0061
|
||||
A806 0041
|
||||
A806 0062
|
||||
0334 A82C
|
||||
A82C 0334
|
||||
A82C 0021
|
||||
A82C 003F
|
||||
@ -96505,12 +96513,14 @@ A8C4 0062
|
||||
11938 0041
|
||||
11935 11930 0062
|
||||
11938 0062
|
||||
0334 1193D
|
||||
1193D 0334
|
||||
1193D 0021
|
||||
1193D 003F
|
||||
1193D 0061
|
||||
1193D 0041
|
||||
1193D 0062
|
||||
0334 1193E
|
||||
1193E 0334
|
||||
1193E 0021
|
||||
1193E 003F
|
||||
@ -114560,10 +114570,15 @@ A9B2 0061
|
||||
A9B2 0041
|
||||
A9B2 0062
|
||||
A9B4 0021
|
||||
A9B5 0021
|
||||
A9B4 003F
|
||||
A9B5 003F
|
||||
A9B4 0061
|
||||
A9B4 0041
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B4 0062
|
||||
A9B5 0062
|
||||
A9BC 0021
|
||||
A9BC 003F
|
||||
A9BC 0061
|
||||
@ -114604,11 +114619,6 @@ A9BB 003F
|
||||
A9BB 0061
|
||||
A9BB 0041
|
||||
A9BB 0062
|
||||
A9B5 0021
|
||||
A9B5 003F
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B5 0062
|
||||
0334 A9C0
|
||||
A9C0 0334
|
||||
A9C0 0021
|
||||
|
@ -1,6 +1,6 @@
|
||||
# CollationTest_CLDR_SHIFTED_SHORT.txt
|
||||
# Date: 2019-11-08, 22:14:19 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-12, 17:50:42 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# UCA Version: 13.0.0
|
||||
@ -5371,9 +5371,11 @@ FB1E 0334
|
||||
0652 0334
|
||||
0334 0653
|
||||
0653 0334
|
||||
0334 10EAC
|
||||
10EAC 0334
|
||||
0334 0654
|
||||
0654 0334
|
||||
0334 10EAB
|
||||
10EAB 0334
|
||||
0334 0655
|
||||
0655 0334
|
||||
@ -5537,6 +5539,7 @@ A9B3 0334
|
||||
116B7 0334
|
||||
0334 1183A
|
||||
1183A 0334
|
||||
0334 11943
|
||||
11943 0334
|
||||
0334 11D42
|
||||
11D42 0334
|
||||
@ -5658,7 +5661,9 @@ A92D 0334
|
||||
302E 0334
|
||||
0334 302F
|
||||
302F 0334
|
||||
0334 16FF0
|
||||
16FF0 0334
|
||||
0334 16FF1
|
||||
16FF1 0334
|
||||
0334 20D0
|
||||
20D0 0334
|
||||
@ -72709,6 +72714,7 @@ FF37 003F
|
||||
0057 0323 0334
|
||||
0057 0334 0323
|
||||
1E88 0334
|
||||
0334 1ABF
|
||||
1ABF 0334
|
||||
0334 1DF1
|
||||
1DF1 0334
|
||||
@ -72928,6 +72934,7 @@ A7C2 0062
|
||||
1AC0 003F
|
||||
AB69 0021
|
||||
AB69 003F
|
||||
0334 1AC0
|
||||
1AC0 0334
|
||||
028D 0061
|
||||
028D 0041
|
||||
@ -98376,6 +98383,7 @@ A806 0041
|
||||
A806 0062
|
||||
A82C 0021
|
||||
A82C 003F
|
||||
0334 A82C
|
||||
A82C 0334
|
||||
A82C 0061
|
||||
A82C 0041
|
||||
@ -103473,12 +103481,14 @@ A8C4 0062
|
||||
11938 0062
|
||||
1193D 0021
|
||||
1193D 003F
|
||||
0334 1193D
|
||||
1193D 0334
|
||||
1193D 0061
|
||||
1193D 0041
|
||||
1193D 0062
|
||||
1193E 0021
|
||||
1193E 003F
|
||||
0334 1193E
|
||||
1193E 0334
|
||||
1193E 0061
|
||||
1193E 0041
|
||||
@ -122266,9 +122276,14 @@ A9B2 0041
|
||||
A9B2 0062
|
||||
A9B4 0021
|
||||
A9B4 003F
|
||||
A9B5 0021
|
||||
A9B5 003F
|
||||
A9B4 0061
|
||||
A9B4 0041
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B4 0062
|
||||
A9B5 0062
|
||||
A9BC 0021
|
||||
A9BC 003F
|
||||
A9BC 0061
|
||||
@ -122309,11 +122324,6 @@ A9BB 003F
|
||||
A9BB 0061
|
||||
A9BB 0041
|
||||
A9BB 0062
|
||||
A9B5 0021
|
||||
A9B5 003F
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B5 0062
|
||||
A9C0 0021
|
||||
A9C0 003F
|
||||
0334 A9C0
|
||||
|
@ -1,6 +1,6 @@
|
||||
# GraphemeBreakTest-13.0.0.txt
|
||||
# Date: 2019-11-20, 22:53:31 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# GraphemeBreakTest-cldr-13.0.0.txt
|
||||
# Date: 2020-02-07, 21:43:46 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
@ -14,11 +14,12 @@
|
||||
type = word; # one of grapheme | word | line | sentence
|
||||
locale = en;
|
||||
|
||||
Han = [:Han:];
|
||||
|
||||
CR = [\p{Word_Break = CR}];
|
||||
LF = [\p{Word_Break = LF}];
|
||||
Newline = [\p{Word_Break = Newline}];
|
||||
Extend = [\p{Word_Break = Extend}];
|
||||
Extend = [\p{Word_Break = Extend}-Han];
|
||||
ZWJ = [\p{Word_Break = ZWJ}];
|
||||
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
||||
Format = [\p{Word_Break = Format}];
|
||||
@ -30,14 +31,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
MidLetter = [\p{Word_Break = MidLetter}];
|
||||
MidNum = [\p{Word_Break = MidNum}];
|
||||
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
|
||||
Numeric = [\p{Word_Break = Numeric}];
|
||||
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
WSegSpace = [\p{Word_Break = WSegSpace}];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
#define dictionary, with the effect being that those characters don't appear in test data.
|
||||
|
||||
Han = [:Han:];
|
||||
Hiragana = [:Hiragana:];
|
||||
|
||||
Control = [\p{Grapheme_Cluster_Break = Control}];
|
||||
|
@ -13,11 +13,12 @@
|
||||
type = word; # one of grapheme | word | line | sentence
|
||||
locale = en_US_POSIX;
|
||||
|
||||
Han = [:Han:];
|
||||
|
||||
CR = [\p{Word_Break = CR}];
|
||||
LF = [\p{Word_Break = LF}];
|
||||
Newline = [\p{Word_Break = Newline}];
|
||||
Extend = [\p{Word_Break = Extend}];
|
||||
Extend = [\p{Word_Break = Extend}-Han];
|
||||
ZWJ = [\p{Word_Break = ZWJ}];
|
||||
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
||||
Format = [\p{Word_Break = Format}];
|
||||
@ -29,14 +30,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
|
||||
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
|
||||
MidNum = [\p{Word_Break = MidNum} [.]];
|
||||
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
|
||||
Numeric = [\p{Word_Break = Numeric}];
|
||||
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
WSegSpace = [\p{Word_Break = WSegSpace}];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
#define dictionary, with the effect being that those characters don't appear in test data.
|
||||
|
||||
Han = [:Han:];
|
||||
Hiragana = [:Hiragana:];
|
||||
|
||||
Control = [\p{Grapheme_Cluster_Break = Control}];
|
||||
|
@ -317,25 +317,25 @@ public class SpoofChecker {
|
||||
+ "\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-"
|
||||
+ "\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8"
|
||||
+ "\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD"
|
||||
+ "\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02-"
|
||||
+ "\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-"
|
||||
+ "\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D81-"
|
||||
+ "\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-"
|
||||
+ "\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE"
|
||||
+ "\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81"
|
||||
+ "\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-"
|
||||
+ "\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF"
|
||||
+ "\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-"
|
||||
+ "\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-"
|
||||
+ "\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92"
|
||||
+ "\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB"
|
||||
+ "\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7"
|
||||
+ "\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-"
|
||||
+ "\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D"
|
||||
+ "\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-"
|
||||
+ "\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-"
|
||||
+ "\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2"
|
||||
+ "\\u17D7\\u17DC\\u17E0-\\u17E9\\u1ABF\\u1AC0\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
|
||||
+ "\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02"
|
||||
+ "\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-"
|
||||
+ "\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-"
|
||||
+ "\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-"
|
||||
+ "\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6"
|
||||
+ "\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-"
|
||||
+ "\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-"
|
||||
+ "\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9"
|
||||
+ "\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-"
|
||||
+ "\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-"
|
||||
+ "\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84"
|
||||
+ "\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
|
||||
+ "\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-"
|
||||
+ "\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-"
|
||||
+ "\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288"
|
||||
+ "\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-"
|
||||
+ "\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-"
|
||||
+ "\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-"
|
||||
+ "\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
|
||||
+ "\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-"
|
||||
+ "\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70"
|
||||
+ "\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA"
|
||||
@ -346,14 +346,14 @@ public class SpoofChecker {
|
||||
+ "\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E"
|
||||
+ "\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-"
|
||||
+ "\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793"
|
||||
+ "\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA7F5\\uA7F6\\uA9E7-\\uA9FE"
|
||||
+ "\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
|
||||
+ "\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66-\\uAB68\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
|
||||
+ "\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
|
||||
+ "\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-"
|
||||
+ "\\U0001B152\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-"
|
||||
+ "\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-"
|
||||
+ "\\U0002EBE0\\U00030000-\\U0003134A]"
|
||||
+ "\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76"
|
||||
+ "\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26"
|
||||
+ "\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13"
|
||||
+ "\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303"
|
||||
+ "\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152"
|
||||
+ "\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734"
|
||||
+ "\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0"
|
||||
+ "\\U00030000-\\U0003134A]"
|
||||
).freeze();
|
||||
// Note: data from IdentifierStatus.txt & IdentifierType.txt
|
||||
// There is tooling to generate this constant in the unicodetools project:
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8614c8ffed62a613452938a27f3b5398bc6fea93ccb5799c7a540d992b6d22c5
|
||||
size 12999230
|
||||
oid sha256:7e641819877ea4d794fa878ed139748a4d60c0ea164e1e7663727c9ae930192c
|
||||
size 12999311
|
||||
|
@ -1,6 +1,6 @@
|
||||
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
|
||||
# Date: 2019-11-08, 22:14:17 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-12, 17:50:40 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# UCA Version: 13.0.0
|
||||
@ -945,9 +945,11 @@ FB1E 0334
|
||||
0652 0334
|
||||
0334 0653
|
||||
0653 0334
|
||||
0334 10EAC
|
||||
10EAC 0334
|
||||
0334 0654
|
||||
0654 0334
|
||||
0334 10EAB
|
||||
10EAB 0334
|
||||
0334 0655
|
||||
0655 0334
|
||||
@ -1111,6 +1113,7 @@ A9B3 0334
|
||||
116B7 0334
|
||||
0334 1183A
|
||||
1183A 0334
|
||||
0334 11943
|
||||
11943 0334
|
||||
0334 11D42
|
||||
11D42 0334
|
||||
@ -1232,7 +1235,9 @@ A92D 0334
|
||||
302E 0334
|
||||
0334 302F
|
||||
302F 0334
|
||||
0334 16FF0
|
||||
16FF0 0334
|
||||
0334 16FF1
|
||||
16FF1 0334
|
||||
0334 20D0
|
||||
20D0 0334
|
||||
@ -66995,6 +67000,7 @@ A75E 0062
|
||||
0057 0323 0334
|
||||
0057 0334 0323
|
||||
1E88 0334
|
||||
0334 1ABF
|
||||
1ABF 0334
|
||||
0334 1DF1
|
||||
1DF1 0334
|
||||
@ -67303,6 +67309,7 @@ A7C2 0062
|
||||
2C72 0041
|
||||
2C73 0062
|
||||
2C72 0062
|
||||
0334 1AC0
|
||||
1AC0 0334
|
||||
028D 0021
|
||||
1AC0 0021
|
||||
@ -92154,6 +92161,7 @@ A806 003F
|
||||
A806 0061
|
||||
A806 0041
|
||||
A806 0062
|
||||
0334 A82C
|
||||
A82C 0334
|
||||
A82C 0021
|
||||
A82C 003F
|
||||
@ -96505,12 +96513,14 @@ A8C4 0062
|
||||
11938 0041
|
||||
11935 11930 0062
|
||||
11938 0062
|
||||
0334 1193D
|
||||
1193D 0334
|
||||
1193D 0021
|
||||
1193D 003F
|
||||
1193D 0061
|
||||
1193D 0041
|
||||
1193D 0062
|
||||
0334 1193E
|
||||
1193E 0334
|
||||
1193E 0021
|
||||
1193E 003F
|
||||
@ -114560,10 +114570,15 @@ A9B2 0061
|
||||
A9B2 0041
|
||||
A9B2 0062
|
||||
A9B4 0021
|
||||
A9B5 0021
|
||||
A9B4 003F
|
||||
A9B5 003F
|
||||
A9B4 0061
|
||||
A9B4 0041
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B4 0062
|
||||
A9B5 0062
|
||||
A9BC 0021
|
||||
A9BC 003F
|
||||
A9BC 0061
|
||||
@ -114604,11 +114619,6 @@ A9BB 003F
|
||||
A9BB 0061
|
||||
A9BB 0041
|
||||
A9BB 0062
|
||||
A9B5 0021
|
||||
A9B5 003F
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B5 0062
|
||||
0334 A9C0
|
||||
A9C0 0334
|
||||
A9C0 0021
|
||||
|
@ -1,6 +1,6 @@
|
||||
# CollationTest_CLDR_SHIFTED_SHORT.txt
|
||||
# Date: 2019-11-08, 22:14:19 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-12, 17:50:42 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# UCA Version: 13.0.0
|
||||
@ -5371,9 +5371,11 @@ FB1E 0334
|
||||
0652 0334
|
||||
0334 0653
|
||||
0653 0334
|
||||
0334 10EAC
|
||||
10EAC 0334
|
||||
0334 0654
|
||||
0654 0334
|
||||
0334 10EAB
|
||||
10EAB 0334
|
||||
0334 0655
|
||||
0655 0334
|
||||
@ -5537,6 +5539,7 @@ A9B3 0334
|
||||
116B7 0334
|
||||
0334 1183A
|
||||
1183A 0334
|
||||
0334 11943
|
||||
11943 0334
|
||||
0334 11D42
|
||||
11D42 0334
|
||||
@ -5658,7 +5661,9 @@ A92D 0334
|
||||
302E 0334
|
||||
0334 302F
|
||||
302F 0334
|
||||
0334 16FF0
|
||||
16FF0 0334
|
||||
0334 16FF1
|
||||
16FF1 0334
|
||||
0334 20D0
|
||||
20D0 0334
|
||||
@ -72709,6 +72714,7 @@ FF37 003F
|
||||
0057 0323 0334
|
||||
0057 0334 0323
|
||||
1E88 0334
|
||||
0334 1ABF
|
||||
1ABF 0334
|
||||
0334 1DF1
|
||||
1DF1 0334
|
||||
@ -72928,6 +72934,7 @@ A7C2 0062
|
||||
1AC0 003F
|
||||
AB69 0021
|
||||
AB69 003F
|
||||
0334 1AC0
|
||||
1AC0 0334
|
||||
028D 0061
|
||||
028D 0041
|
||||
@ -98376,6 +98383,7 @@ A806 0041
|
||||
A806 0062
|
||||
A82C 0021
|
||||
A82C 003F
|
||||
0334 A82C
|
||||
A82C 0334
|
||||
A82C 0061
|
||||
A82C 0041
|
||||
@ -103473,12 +103481,14 @@ A8C4 0062
|
||||
11938 0062
|
||||
1193D 0021
|
||||
1193D 003F
|
||||
0334 1193D
|
||||
1193D 0334
|
||||
1193D 0061
|
||||
1193D 0041
|
||||
1193D 0062
|
||||
1193E 0021
|
||||
1193E 003F
|
||||
0334 1193E
|
||||
1193E 0334
|
||||
1193E 0061
|
||||
1193E 0041
|
||||
@ -122266,9 +122276,14 @@ A9B2 0041
|
||||
A9B2 0062
|
||||
A9B4 0021
|
||||
A9B4 003F
|
||||
A9B5 0021
|
||||
A9B5 003F
|
||||
A9B4 0061
|
||||
A9B4 0041
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B4 0062
|
||||
A9B5 0062
|
||||
A9BC 0021
|
||||
A9BC 003F
|
||||
A9BC 0061
|
||||
@ -122309,11 +122324,6 @@ A9BB 003F
|
||||
A9BB 0061
|
||||
A9BB 0041
|
||||
A9BB 0062
|
||||
A9B5 0021
|
||||
A9B5 003F
|
||||
A9B5 0061
|
||||
A9B5 0041
|
||||
A9B5 0062
|
||||
A9C0 0021
|
||||
A9C0 003F
|
||||
0334 A9C0
|
||||
|
@ -1,6 +1,6 @@
|
||||
# confusables.txt
|
||||
# Date: 2019-10-22, 13:05:29 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Date: 2020-02-13, 01:38:49 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
@ -1358,6 +1358,10 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
|
||||
|
||||
266A ; 1D158 1D165 1D16E ; MA #* ( ♪ → 𝅘𝅥𝅮 ) EIGHTH NOTE → MUSICAL SYMBOL NOTEHEAD BLACK, MUSICAL SYMBOL COMBINING STEM, MUSICAL SYMBOL COMBINING FLAG-1 #
|
||||
|
||||
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
|
||||
|
||||
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
|
||||
|
||||
02D9 ; 0971 ; MA #* ( ˙ → ॱ ) DOT ABOVE → DEVANAGARI SIGN HIGH SPACING DOT #
|
||||
0D4E ; 0971 ; MA # ( ൎ → ॱ ) MALAYALAM LETTER DOT REPH → DEVANAGARI SIGN HIGH SPACING DOT # →˙→
|
||||
|
||||
@ -1418,13 +1422,13 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO
|
||||
1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO #
|
||||
1D7EE ; 0032 ; MA # ( 𝟮 → 2 ) MATHEMATICAL SANS-SERIF BOLD DIGIT TWO → DIGIT TWO #
|
||||
1D7F8 ; 0032 ; MA # ( 𝟸 → 2 ) MATHEMATICAL MONOSPACE DIGIT TWO → DIGIT TWO #
|
||||
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
|
||||
A75A ; 0032 ; MA # ( Ꝛ → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
|
||||
01A7 ; 0032 ; MA # ( Ƨ → 2 ) LATIN CAPITAL LETTER TONE TWO → DIGIT TWO #
|
||||
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
|
||||
A644 ; 0032 ; MA # ( Ꙅ → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
|
||||
14BF ; 0032 ; MA # ( ᒿ → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
|
||||
A6EF ; 0032 ; MA # ( ꛯ → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
|
||||
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
|
||||
|
||||
A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
|
||||
06F2 ; 0662 ; MA # ( ۲ → ٢ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
|
||||
@ -1491,6 +1495,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ٢ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
|
||||
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
|
||||
1D7EF ; 0033 ; MA # ( 𝟯 → 3 ) MATHEMATICAL SANS-SERIF BOLD DIGIT THREE → DIGIT THREE #
|
||||
1D7F9 ; 0033 ; MA # ( 𝟹 → 3 ) MATHEMATICAL MONOSPACE DIGIT THREE → DIGIT THREE #
|
||||
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
|
||||
A7AB ; 0033 ; MA # ( Ɜ → 3 ) LATIN CAPITAL LETTER REVERSED OPEN E → DIGIT THREE #
|
||||
021C ; 0033 ; MA # ( Ȝ → 3 ) LATIN CAPITAL LETTER YOGH → DIGIT THREE # →Ʒ→
|
||||
01B7 ; 0033 ; MA # ( Ʒ → 3 ) LATIN CAPITAL LETTER EZH → DIGIT THREE #
|
||||
@ -1500,7 +1505,6 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→
|
||||
16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→
|
||||
118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE #
|
||||
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
|
||||
|
||||
06F3 ; 0663 ; MA # ( ۳ → ٣ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE #
|
||||
1E8C9 ; 0663 ; MA #* ( 𞣉 → ٣ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE #
|
||||
@ -1530,9 +1534,9 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR #
|
||||
1D7F0 ; 0034 ; MA # ( 𝟰 → 4 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR → DIGIT FOUR #
|
||||
1D7FA ; 0034 ; MA # ( 𝟺 → 4 ) MATHEMATICAL MONOSPACE DIGIT FOUR → DIGIT FOUR #
|
||||
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
|
||||
13CE ; 0034 ; MA # ( Ꮞ → 4 ) CHEROKEE LETTER SE → DIGIT FOUR #
|
||||
118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR #
|
||||
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
|
||||
|
||||
06F4 ; 0664 ; MA # ( ۴ → ٤ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR #
|
||||
|
||||
@ -1557,9 +1561,9 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE #
|
||||
1D7F1 ; 0035 ; MA # ( 𝟱 → 5 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE → DIGIT FIVE #
|
||||
1D7FB ; 0035 ; MA # ( 𝟻 → 5 ) MATHEMATICAL MONOSPACE DIGIT FIVE → DIGIT FIVE #
|
||||
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
|
||||
01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE #
|
||||
118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE #
|
||||
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
|
||||
|
||||
2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE #
|
||||
|
||||
@ -1578,11 +1582,11 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX #
|
||||
1D7F2 ; 0036 ; MA # ( 𝟲 → 6 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SIX → DIGIT SIX #
|
||||
1D7FC ; 0036 ; MA # ( 𝟼 → 6 ) MATHEMATICAL MONOSPACE DIGIT SIX → DIGIT SIX #
|
||||
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
|
||||
2CD2 ; 0036 ; MA # ( Ⳓ → 6 ) COPTIC CAPITAL LETTER OLD COPTIC HEI → DIGIT SIX #
|
||||
0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX #
|
||||
13EE ; 0036 ; MA # ( Ꮾ → 6 ) CHEROKEE LETTER WV → DIGIT SIX #
|
||||
118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX #
|
||||
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
|
||||
|
||||
06F6 ; 0666 ; MA # ( ۶ → ٦ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX #
|
||||
|
||||
@ -1606,9 +1610,9 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
|
||||
1D7F3 ; 0037 ; MA # ( 𝟳 → 7 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN → DIGIT SEVEN #
|
||||
1D7FD ; 0037 ; MA # ( 𝟽 → 7 ) MATHEMATICAL MONOSPACE DIGIT SEVEN → DIGIT SEVEN #
|
||||
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
|
||||
104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN #
|
||||
118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN #
|
||||
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
|
||||
|
||||
2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN #
|
||||
|
||||
@ -1631,10 +1635,10 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT #
|
||||
1D7F4 ; 0038 ; MA # ( 𝟴 → 8 ) MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT → DIGIT EIGHT #
|
||||
1D7FE ; 0038 ; MA # ( 𝟾 → 8 ) MATHEMATICAL MONOSPACE DIGIT EIGHT → DIGIT EIGHT #
|
||||
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
|
||||
0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT #
|
||||
0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT #
|
||||
1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT #
|
||||
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
|
||||
|
||||
0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT #
|
||||
|
||||
@ -1659,12 +1663,12 @@ A76A ; 0033 ; MA # ( Ꝫ → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
|
||||
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
|
||||
1D7F5 ; 0039 ; MA # ( 𝟵 → 9 ) MATHEMATICAL SANS-SERIF BOLD DIGIT NINE → DIGIT NINE #
|
||||
1D7FF ; 0039 ; MA # ( 𝟿 → 9 ) MATHEMATICAL MONOSPACE DIGIT NINE → DIGIT NINE #
|
||||
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
|
||||
A76E ; 0039 ; MA # ( Ꝯ → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE #
|
||||
2CCA ; 0039 ; MA # ( Ⳋ → 9 ) COPTIC CAPITAL LETTER DIALECT-P HORI → DIGIT NINE #
|
||||
118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE #
|
||||
118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE #
|
||||
118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE #
|
||||
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
|
||||
|
||||
0967 ; 0669 ; MA # ( १ → ٩ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE #
|
||||
118E4 ; 0669 ; MA # ( 𑣤 → ٩ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE #
|
||||
@ -2544,6 +2548,7 @@ FFE8 ; 006C ; MA #* ( │ → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL
|
||||
1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L #
|
||||
FF29 ; 006C ; MA # ( I → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→
|
||||
2160 ; 006C ; MA # ( Ⅰ → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→
|
||||
@ -2601,7 +2606,6 @@ A4F2 ; 006C ; MA # ( ꓲ → l ) LISU LETTER I → LATIN SMALL LETTER L # →I
|
||||
16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→
|
||||
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
|
||||
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
|
||||
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
|
||||
|
||||
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
|
||||
216C ; 004C ; MA # ( Ⅼ → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
|
||||
@ -2972,6 +2976,7 @@ FBA6 ; 006F ; MA # ( ﮦ → o ) ARABIC LETTER HEH GOAL ISOLATED FORM →
|
||||
1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
1D7EC ; 004F ; MA # ( 𝟬 → O ) MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
FF2F ; 004F ; MA # ( O → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→
|
||||
1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O #
|
||||
1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O #
|
||||
@ -3005,7 +3010,6 @@ A4F3 ; 004F ; MA # ( ꓳ → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
|
||||
102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O #
|
||||
10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O #
|
||||
10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O #
|
||||
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
|
||||
|
||||
2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR #
|
||||
1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→
|
||||
@ -8024,8 +8028,6 @@ FA92 ; 6717 ; MA # ( 朗 → 朗 ) CJK COMPATIBILITY IDEOGRAPH-FA92 → CJK UNIF
|
||||
FA93 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-FA93 → CJK UNIFIED IDEOGRAPH-671B #
|
||||
2F8D9 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-2F8D9 → CJK UNIFIED IDEOGRAPH-671B #
|
||||
|
||||
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
|
||||
|
||||
5E50 ; 3B3A ; MA # ( 幐 → 㬺 ) CJK UNIFIED IDEOGRAPH-5E50 → CJK UNIFIED IDEOGRAPH-3B3A #
|
||||
|
||||
4420 ; 3B3B ; MA # ( 䐠 → 㬻 ) CJK UNIFIED IDEOGRAPH-4420 → CJK UNIFIED IDEOGRAPH-3B3B #
|
||||
@ -8831,6 +8833,8 @@ F953 ; 808B ; MA # ( 肋 → 肋 ) CJK COMPATIBILITY IDEOGRAPH-F953 → CJK UNIF
|
||||
|
||||
2F984 ; 440B ; MA # ( 䐋 → 䐋 ) CJK COMPATIBILITY IDEOGRAPH-2F984 → CJK UNIFIED IDEOGRAPH-440B #
|
||||
|
||||
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
|
||||
|
||||
2F987 ; 267A7 ; MA # ( 𦞧 → 𦞧 ) CJK COMPATIBILITY IDEOGRAPH-2F987 → CJK UNIFIED IDEOGRAPH-267A7 #
|
||||
|
||||
2F988 ; 267B5 ; MA # ( 𦞵 → 𦞵 ) CJK COMPATIBILITY IDEOGRAPH-2F988 → CJK UNIFIED IDEOGRAPH-267B5 #
|
||||
@ -9630,9 +9634,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
|
||||
|
||||
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
|
||||
|
||||
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
|
||||
|
||||
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
|
||||
|
||||
# total: 6311
|
||||
|
||||
|
@ -357,10 +357,15 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
|
||||
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
|
||||
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]");
|
||||
fNumericSet = new UnicodeSet("[[\\p{Word_Break = Numeric}][\\uFF10-\\uff19]]");
|
||||
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]");
|
||||
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]");
|
||||
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
|
||||
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]");
|
||||
// There are some sc=Hani characters with WB=Extend.
|
||||
// The break rules need to pick one or the other because
|
||||
// Extend overlapping with something else is messy.
|
||||
// For Unicode 13, we chose to keep U+16FF0 & U+16FF1
|
||||
// in $Han (for $dictionary) and out of $Extend.
|
||||
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}-[:Hani:]]");
|
||||
fWSegSpaceSet = new UnicodeSet("[\\p{Word_Break = WSegSpace}]");
|
||||
fZWJSet = new UnicodeSet("[\\p{Word_Break = ZWJ}]");
|
||||
fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]");
|
||||
|
@ -14,11 +14,12 @@
|
||||
type = word; # one of grapheme | word | line | sentence
|
||||
locale = en;
|
||||
|
||||
Han = [:Han:];
|
||||
|
||||
CR = [\p{Word_Break = CR}];
|
||||
LF = [\p{Word_Break = LF}];
|
||||
Newline = [\p{Word_Break = Newline}];
|
||||
Extend = [\p{Word_Break = Extend}];
|
||||
Extend = [\p{Word_Break = Extend}-Han];
|
||||
ZWJ = [\p{Word_Break = ZWJ}];
|
||||
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
||||
Format = [\p{Word_Break = Format}];
|
||||
@ -30,14 +31,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
MidLetter = [\p{Word_Break = MidLetter}];
|
||||
MidNum = [\p{Word_Break = MidNum}];
|
||||
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
|
||||
Numeric = [\p{Word_Break = Numeric}];
|
||||
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
WSegSpace = [\p{Word_Break = WSegSpace}];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
#define dictionary, with the effect being that those characters don't appear in test data.
|
||||
|
||||
Han = [:Han:];
|
||||
Hiragana = [:Hiragana:];
|
||||
|
||||
Control = [\p{Grapheme_Cluster_Break = Control}];
|
||||
|
@ -13,11 +13,12 @@
|
||||
type = word; # one of grapheme | word | line | sentence
|
||||
locale = en_US_POSIX;
|
||||
|
||||
Han = [:Han:];
|
||||
|
||||
CR = [\p{Word_Break = CR}];
|
||||
LF = [\p{Word_Break = LF}];
|
||||
Newline = [\p{Word_Break = Newline}];
|
||||
Extend = [\p{Word_Break = Extend}];
|
||||
Extend = [\p{Word_Break = Extend}-Han];
|
||||
ZWJ = [\p{Word_Break = ZWJ}];
|
||||
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
|
||||
Format = [\p{Word_Break = Format}];
|
||||
@ -29,14 +30,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
|
||||
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
|
||||
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
|
||||
MidNum = [\p{Word_Break = MidNum} [.]];
|
||||
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
|
||||
Numeric = [\p{Word_Break = Numeric}];
|
||||
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
WSegSpace = [\p{Word_Break = WSegSpace}];
|
||||
Extended_Pict = [:ExtPict:];
|
||||
|
||||
#define dictionary, with the effect being that those characters don't appear in test data.
|
||||
|
||||
Han = [:Han:];
|
||||
Hiragana = [:Hiragana:];
|
||||
|
||||
Control = [\p{Grapheme_Cluster_Break = Control}];
|
||||
|
Loading…
Reference in New Issue
Block a user