ICU-20919 Merge branch 'maint/maint-66' into maint-66-merge

Conflicts:
	icu4j/main/shared/data/icudata.jar
This commit is contained in:
Shane Carr 2020-02-20 13:16:24 -08:00
commit bb1f00efb8
57 changed files with 2408 additions and 2342 deletions

View File

@ -5,12 +5,12 @@
-->
<head>
<META http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>ICU4C API Comparison: ICU 65 with ICU 66 (preview)</title>
<title>ICU4C API Comparison: ICU 65 with ICU 66</title>
<link type="text/css" href="icu4c.css" rel="stylesheet">
</head>
<body>
<a name="#_top"></a>
<h1>ICU4C API Comparison: ICU 65 with ICU 66 (preview)</h1>
<h1>ICU4C API Comparison: ICU 65 with ICU 66</h1>
<div id="toc">
<ul>
<li>
@ -1501,7 +1501,7 @@
<a href="#_top">(jump back to top)</a>
<hr>
<p>
<i><font size="-1">Contents generated by StableAPI tool on Tue Dec 03 15:47:14 PST 2019<br>
<i><font size="-1">Contents generated by StableAPI tool on Wed Feb 19 10:40:34 PST 2020<br>
Copyright &copy; 2017 and later: Unicode, Inc. and others.<br>
License &amp; terms of use: http://www.unicode.org/copyright.html
</font></i>

View File

@ -5,7 +5,7 @@
License & terms of use: http://www.unicode.org/copyright.html
-->
# ICU4C API Comparison: ICU 65 with ICU 66 (preview)
# ICU4C API Comparison: ICU 65 with ICU 66
> _Note_ Markdown format of this document is new for ICU 65.
@ -520,7 +520,7 @@ This section shows cases where the signature was "simplified" for the sake of co
## Colophon
Contents generated by StableAPI tool on Tue Dec 03 15:19:41 PST 2019
Contents generated by StableAPI tool on Wed Feb 19 10:40:39 PST 2020
Copyright © 2019 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html

View File

@ -1,6 +1,6 @@
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining

View File

@ -3,7 +3,7 @@
<html lang="en-US" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
<head>
<title>ReadMe for ICU 66.0.1</title>
<title>ReadMe for ICU 66.1</title>
<meta name="COPYRIGHT" content=
"Copyright (C) 2016 and later: Unicode, Inc. and others. License &amp; terms of use: http://www.unicode.org/copyright.html"/>
<!-- meta name="COPYRIGHT" content=
@ -24,7 +24,7 @@
-->
<!-- <body> -->
<body class="milestone">
<body class="rc">
<p class="only-draft"><b>Note:</b> This is a draft readme.</p>
<h1>
@ -33,7 +33,7 @@
<span class="only-rc">Release Candidate</span>
<!-- <span class="only-milestone">(Milestone Release)</span> -->
<span class="only-milestone">(Preview Release)</span>
<abbr title="International Components for Unicode">ICU</abbr> 66.0.1 ReadMe
<abbr title="International Components for Unicode">ICU</abbr> 66.1 ReadMe
</h1>
<!-- Most of the time we shouldn't need to comment/uncomment this paragraph, just change the body class -->
@ -47,7 +47,7 @@
<p class="note only-rc">This is a release candidate version of ICU4C.
It is not recommended for production use.</p>
<p>Last updated: 2019-Nov-27<br/>
<p>Last updated: 2020-Feb-12<br/>
Copyright &copy; 2016 and later: Unicode, Inc. and others. License &amp; terms of use:
<a href="http://www.unicode.org/copyright.html">http://www.unicode.org/copyright.html</a><br/>
Copyright &copy; 1997-2016 International Business Machines Corporation and others.

View File

@ -323,7 +323,7 @@ static const uint16_t ubidi_props_trieIndex[12536]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,
0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x81,0x41,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
0x81,0x41,0x81,0x81,0x81,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,
0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,

View File

@ -304,7 +304,7 @@ static const uint16_t ucase_props_trieIndex[12356]={
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
0x92,0xff91,0x92,0xff91,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,
0,4,0,0,0,0,0,0,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
0,4,0,0,0,0,0,4,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0x1719,1,0,0,0,
0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x44,0x64,0x64,

View File

@ -687,13 +687,13 @@ void toUpper(uint32_t options,
if (change) {
ByteSinkUtil::appendTwoBytes(upper, sink);
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
sink.Append(u8"\u0308", 2); // restore or add a dialytika
sink.Append(reinterpret_cast<const char*>(u8"\u0308"), 2); // restore or add a dialytika
}
if (addTonos) {
sink.Append(u8"\u0301", 2);
sink.Append(reinterpret_cast<const char*>(u8"\u0301"), 2);
}
while (numYpogegrammeni > 0) {
sink.Append(u8"\u0399", 2);
sink.Append(reinterpret_cast<const char*>(u8"\u0399"), 2);
--numYpogegrammeni;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -66,13 +66,13 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_MINOR_NUM 0
#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_PATCHLEVEL_NUM 1
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
/** The current ICU build level version as an integer.
* This value is for use by ICU clients. It defaults to 0.
@ -139,7 +139,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "66.0.1"
#define U_ICU_VERSION "66.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -158,7 +158,7 @@
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "66.0.1"
#define U_ICU_DATA_VERSION "66.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for ICU 66.0.1.
# Generated by GNU Autoconf 2.69 for ICU 66.1.
#
# Report bugs to <http://icu-project.org/bugs>.
#
@ -582,8 +582,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ICU'
PACKAGE_TARNAME='International Components for Unicode'
PACKAGE_VERSION='66.0.1'
PACKAGE_STRING='ICU 66.0.1'
PACKAGE_VERSION='66.1'
PACKAGE_STRING='ICU 66.1'
PACKAGE_BUGREPORT='http://icu-project.org/bugs'
PACKAGE_URL='http://icu-project.org'
@ -1362,7 +1362,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ICU 66.0.1 to adapt to many kinds of systems.
\`configure' configures ICU 66.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1428,7 +1428,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ICU 66.0.1:";;
short | recursive ) echo "Configuration of ICU 66.1:";;
esac
cat <<\_ACEOF
@ -1566,7 +1566,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ICU configure 66.0.1
ICU configure 66.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@ -2312,7 +2312,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ICU $as_me 66.0.1, which was
It was created by ICU $as_me 66.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -8532,7 +8532,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ICU $as_me 66.0.1, which was
This file was extended by ICU $as_me 66.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -8586,7 +8586,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
ICU config.status 66.0.1
ICU config.status 66.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@ -27,10 +27,12 @@
# Character Class Definitions.
#
$Han = [:Han:];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline} ];
$Extend = [\p{Word_Break = Extend}];
$Newline = [\p{Word_Break = Newline}];
$Extend = [\p{Word_Break = Extend}-$Han];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [\p{Word_Break = Format}];
@ -42,12 +44,11 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$WSegSpace = [\p{Word_Break = WSegSpace}];
$Extended_Pict = [\p{Extended_Pictographic}];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
$Ideographic = [\p{Ideographic}];

View File

@ -27,10 +27,12 @@
# Character Class Definitions.
#
$Han = [:Han:];
$CR = [\p{Word_Break = CR}];
$LF = [\p{Word_Break = LF}];
$Newline = [\p{Word_Break = Newline} ];
$Extend = [\p{Word_Break = Extend}];
$Newline = [\p{Word_Break = Newline}];
$Extend = [\p{Word_Break = Extend}-$Han];
$ZWJ = [\p{Word_Break = ZWJ}];
$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
$Format = [\p{Word_Break = Format}];
@ -42,12 +44,11 @@ $Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
$MidLetter = [\p{Word_Break = MidLetter} - [\:]];
$MidNum = [\p{Word_Break = MidNum} [.]];
$Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$WSegSpace = [\p{Word_Break = WSegSpace}];
$Extended_Pict = [\p{Extended_Pictographic}];
$Han = [:Han:];
$Hiragana = [:Hiragana:];
$Ideographic = [\p{Ideographic}];

View File

@ -1349,8 +1349,7 @@ zh{
"<*宅檡"
"<*窄鉙"
"<*债砦債寨瘵"
"<*枬沾毡旃栴粘蛅飦惉詀趈詹閚谵噡嶦薝邅霑氈氊瞻鹯旜譫饘鳣驙魙鱣鸇"
"<*讝"
"<*枬沾毡旃栴粘蛅飦惉詀趈詹閚谵噡嶦薝邅霑氈氊瞻鹯旜譫饘鳣驙魙鱣鸇讝"
"<*斩飐展盏崭斬琖搌盞嶃嶄榐颭嫸醆橏輾黵"
"<*占佔战栈桟站偡绽菚棧湛戦綻嶘輚戰虥虦覱轏譧蘸驏"
"<*张張章傽鄣嫜彰慞漳獐粻蔁遧暲樟璋餦蟑騿鱆麞"

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -8,6 +8,6 @@
// ***************************************************************************
icuver:table(nofallback){
DataVersion { "66.0.1.0" }
ICUVersion { "66.0.1.0" }
DataVersion { "66.1.0.0" }
ICUVersion { "66.1.0.0" }
}

View File

@ -72,6 +72,11 @@ numberingSystems:table(nofallback){
desc{"०१२३४५६७८९"}
radix:int{10}
}
diak{
algorithmic:int{0}
desc{"𑥐𑥑𑥒𑥓𑥔𑥕𑥖𑥗𑥘𑥙"}
radix:int{10}
}
ethi{
algorithmic:int{1}
desc{"%ethiopic"}
@ -337,6 +342,11 @@ numberingSystems:table(nofallback){
desc{"꣐꣑꣒꣓꣔꣕꣖꣗꣘꣙"}
radix:int{10}
}
segment{
algorithmic:int{0}
desc{"🯰🯱🯲🯳🯴🯵🯶🯷🯸🯹"}
radix:int{10}
}
shrd{
algorithmic:int{0}
desc{"𑇐𑇑𑇒𑇓𑇔𑇕𑇖𑇗𑇘𑇙"}

View File

@ -75,6 +75,7 @@
[㦚䁵匾惼扁碥稨窆糄萹藊褊貶贬鴘𠓫𠪂𡈯𡬯𡬲𡬸𢴂𤀫𥣝𥣰𦟣𦽟𨖠𪖯𱉡]→biǎn;
[㝸㣐㭓㲢㳎㳒㴜㵷㺹䉸䒪䛒䡢䪻便卞变変峅弁徧忭抃昪汳汴玣緶缏艑苄覍變辡辧-辩辫辮辯遍釆閞𠭹𠯴𠷖𢭥𣈠𣝜𣪭𣸇𤀲𤺇𤻶𥍚𦉙𧩰𨚕𨧕𨳲𩩯𩰍𪉱𫔰𬸸]→biàn;
[炞]→bian;
[𰻝𰻞]→biáng;
[⺣㶾䁃䁭䅺䙳䮽儦墂幖彪摽杓标標淲滮瀌灬熛爂猋瘭磦穮脿膘臕蔈藨謤贆鏢鑣镖镳颩颮颷飆-飈飊飑飙飚驃驫骉骠髟𠔂𠚠𢒯𣄠𤂆𤆀𤐫𥲦𦔗𦔩𦠎𦾑𧥍𨭚𩙪𩪊𩴩𩽁𬭺𬴍𰷫𱃔𱃠]→biāo;
[㟽㠒㯹䔸婊檦表裱褾諘錶𢅚𥘤𧝪𰾍]→biǎo;
[㧼䞄俵鰾鳔𠬪𢿏𧳀𧴎𧴕]→biào;
@ -1388,8 +1389,7 @@
[㡯宅檡𦑱𩏪𩏽𪀥𰗛]→zhái;
[䍉窄鉙𠏰𤢒𥞅𧲻𧻍𩬫𰽨]→zhǎi;
[㩟䐱债債寨瘵砦𠑞𡍥𢯌𣩭𥍪𥰾𦤧𨝋𪑽]→zhài;
[㣶㮵䦓䩇䱳䶨噡嶦惉旃旜枬栴毡氈氊沾瞻粘薝蛅詀詹譫谵趈邅閚霑飦饘驙魙鱣鳣鸇鹯𠌲𠟧𡅹𡕁𡭞𢧗𣢤𣮿𤘇𥙡𥶕𦧚𦪣𧋱𧒝𧮪𧽆𧾍𨊈𩉗𩔣𩼼𪃋𪉜𪏉𪡏𫗞𫗴𫘰𬸵𱂷𱌵]→zhān;
[讝𰵨]→zhán;
[㣶㮵䦓䩇䱳䶨噡嶦惉旃旜枬栴毡氈氊沾瞻粘薝蛅詀詹譫讝谵趈邅閚霑飦饘驙魙鱣鳣鸇鹯𠌲𠟧𡅹𡕁𡭞𢧗𣢤𣮿𤘇𥙡𥶕𦧚𦪣𧋱𧒝𧮪𧽆𧾍𨊈𩉗𩔣𩼼𪃋𪉜𪏉𪡏𫗞𫗴𫘰𬸵𰵨𱂷𱌵]→zhān;
[㔊㜊㞡㠭䁪䁴䆄䎒䟋䡀䩅䩆䱼嫸展崭嶃嶄搌斩斬榐橏琖盏盞輾醆颭飐黵𠟉𡽻𢅺𣀁𣛷𥇢𥴐𥿜𦈻𦗢𧎰𧔡𧖉𧬆𧲮𨣁𨣚𨫀𨭖𨺿𩕊𫔑𬍙𬪨𬭫𬱱]→zhǎn;
[㟞㺘㻵䋎䗃䘺䪌䱠佔偡占嶘战戦戰栈桟棧湛站綻绽菚蘸虥虦覱譧輚轏驏𡁳𡓦𢈽𢤚𢧐𣳤𤖆𤜇𧀡𧂁𧙭𧝑𧮺𧸪𨇩𨼈𨼮𩆯𩥇𩨍𩰃𪗦𪘪𬘜𬥿𰊅𰲠𰲳𰹼𰺞]→zhàn;
[䛫傽嫜张張彰慞暲樟漳獐璋章粻蔁蟑遧鄣餦騿鱆麞𡈠𢕎𢕔𢷢𣌞𤍤𧐊𧽣𨄰𩌬𪅂𫗠𫜂𫠒𬦵𰪭]→zhāng;

View File

@ -1,6 +1,6 @@
# DerivedCoreProperties-13.0.0.txt
# Date: 2019-10-21, 14:30:30 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-01-22, 00:07:19 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -2873,6 +2873,7 @@ FF41..FF5A ; Cased
0483..0487 ; Case_Ignorable
0488..0489 ; Case_Ignorable
0559 ; Case_Ignorable
055F ; Case_Ignorable
0591..05BD ; Case_Ignorable
05BF ; Case_Ignorable
05C1..05C2 ; Case_Ignorable
@ -3303,7 +3304,7 @@ E0001 ; Case_Ignorable
E0020..E007F ; Case_Ignorable
E0100..E01EF ; Case_Ignorable
# Total code points: 2412
# Total code points: 2413
# ================================================

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# UCA_Rules_SHORT.txt
# Date: 2019-11-08, 22:14:11 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:33 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -19518,6 +19518,7 @@
< ꦱ
< ꦲ
< ꦴ
<<< ꦵ
< ꦼ
< ꦶ
< ꦷ
@ -19526,7 +19527,6 @@
< ꦽ
< ꦺ
< ꦻ
< ꦵ
< ꧀
< ᢀ
< ᢁ

View File

@ -62,7 +62,7 @@ https://unicode-org.atlassian.net/browse/ICU-20893
* Command-line environment setup
UNICODE_DATA=~/unidata/uni13/20191106
UNICODE_DATA=~/unidata/uni13/20200212
CLDR_SRC=~/cldr/uni/src
ICU_ROOT=~/icu/uni
ICU_SRC=$ICU_ROOT/src
@ -89,9 +89,12 @@ export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
- download Unicode files into $UNICODE_DATA
+ subfolders: emoji, idna, security, ucd, uca
+ inside ucd: extract Unihan.zip to "here" (.../ucd/Unihan/*.txt), delete Unihan.zip
+ split Unihan into single-property files
~/unitools/trunk/src$ py/splitunihan.py $UNICODE_DATA/ucd/Unihan
+ get GraphemeBreakTest-cldr.txt from $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt
or from the ucd/cldr/ output folder of the Unicode Tools:
Since Unicode 12/CLDR 35/ICU 64 CLDR uses modified break rules.
cp $CLDR_SRC/common/properties/segments/GraphemeBreakTest.txt icu4c/source/test/testdata
* for manual diffs and for Unicode Tools input data updates:
remove version suffixes from the file names
@ -155,7 +158,7 @@ export LD_LIBRARY_PATH=$ICU_ROOT/dbg/icu4c/lib
$ICU_ROOT/dbg/icu4c$ echo;echo; date; make -j7 install &> out.txt ; tail -n 30 out.txt ; date
* update spoof checker UnicodeSet initializers:
inclusionPat & recommendedPat in uspoof.cpp
inclusionPat & recommendedPat in i18n/uspoof.cpp
INCLUSION & RECOMMENDED in SpoofChecker.java
- make sure that the Unicode Tools tree contains the latest security data files
- go to Unicode Tools org.unicode.text.tools.RecommendedSetGenerator

View File

@ -1,6 +1,6 @@
# confusables.txt
# Date: 2019-10-22, 13:05:29 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-13, 01:38:49 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -1358,6 +1358,10 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
266A ; 1D158 1D165 1D16E ; MA #* ( ♪ → 𝅘𝅥𝅮 ) EIGHTH NOTE → MUSICAL SYMBOL NOTEHEAD BLACK, MUSICAL SYMBOL COMBINING STEM, MUSICAL SYMBOL COMBINING FLAG-1 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
02D9 ; 0971 ; MA #* ( ˙ → ॱ ) DOT ABOVE → DEVANAGARI SIGN HIGH SPACING DOT #
0D4E ; 0971 ; MA # ( ൎ → ॱ ) MALAYALAM LETTER DOT REPH → DEVANAGARI SIGN HIGH SPACING DOT # →˙→
@ -1418,13 +1422,13 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO
1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO #
1D7EE ; 0032 ; MA # ( 𝟮 → 2 ) MATHEMATICAL SANS-SERIF BOLD DIGIT TWO → DIGIT TWO #
1D7F8 ; 0032 ; MA # ( 𝟸 → 2 ) MATHEMATICAL MONOSPACE DIGIT TWO → DIGIT TWO #
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A75A ; 0032 ; MA # ( → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
01A7 ; 0032 ; MA # ( Ƨ → 2 ) LATIN CAPITAL LETTER TONE TWO → DIGIT TWO #
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
A644 ; 0032 ; MA # ( → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
14BF ; 0032 ; MA # ( → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
A6EF ; 0032 ; MA # ( → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
06F2 ; 0662 ; MA # ( ۲ → ‎٢‎ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
@ -1491,6 +1495,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
1D7EF ; 0033 ; MA # ( 𝟯 → 3 ) MATHEMATICAL SANS-SERIF BOLD DIGIT THREE → DIGIT THREE #
1D7F9 ; 0033 ; MA # ( 𝟹 → 3 ) MATHEMATICAL MONOSPACE DIGIT THREE → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
A7AB ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER REVERSED OPEN E → DIGIT THREE #
021C ; 0033 ; MA # ( Ȝ → 3 ) LATIN CAPITAL LETTER YOGH → DIGIT THREE # →Ʒ→
01B7 ; 0033 ; MA # ( Ʒ → 3 ) LATIN CAPITAL LETTER EZH → DIGIT THREE #
@ -1500,7 +1505,6 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→
16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→
118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
06F3 ; 0663 ; MA # ( ۳ → ‎٣‎ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE #
1E8C9 ; 0663 ; MA #* ( ‎𞣉‎ → ‎٣‎ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE #
@ -1530,9 +1534,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR #
1D7F0 ; 0034 ; MA # ( 𝟰 → 4 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR → DIGIT FOUR #
1D7FA ; 0034 ; MA # ( 𝟺 → 4 ) MATHEMATICAL MONOSPACE DIGIT FOUR → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
13CE ; 0034 ; MA # ( → 4 ) CHEROKEE LETTER SE → DIGIT FOUR #
118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
06F4 ; 0664 ; MA # ( ۴ → ‎٤‎ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR #
@ -1557,9 +1561,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE #
1D7F1 ; 0035 ; MA # ( 𝟱 → 5 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE → DIGIT FIVE #
1D7FB ; 0035 ; MA # ( 𝟻 → 5 ) MATHEMATICAL MONOSPACE DIGIT FIVE → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE #
118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE #
@ -1578,11 +1582,11 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX #
1D7F2 ; 0036 ; MA # ( 𝟲 → 6 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SIX → DIGIT SIX #
1D7FC ; 0036 ; MA # ( 𝟼 → 6 ) MATHEMATICAL MONOSPACE DIGIT SIX → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
2CD2 ; 0036 ; MA # ( → 6 ) COPTIC CAPITAL LETTER OLD COPTIC HEI → DIGIT SIX #
0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX #
13EE ; 0036 ; MA # ( → 6 ) CHEROKEE LETTER WV → DIGIT SIX #
118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
06F6 ; 0666 ; MA # ( ۶ → ‎٦‎ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX #
@ -1606,9 +1610,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
1D7F3 ; 0037 ; MA # ( 𝟳 → 7 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN → DIGIT SEVEN #
1D7FD ; 0037 ; MA # ( 𝟽 → 7 ) MATHEMATICAL MONOSPACE DIGIT SEVEN → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN #
118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN #
@ -1631,10 +1635,10 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT #
1D7F4 ; 0038 ; MA # ( 𝟴 → 8 ) MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT → DIGIT EIGHT #
1D7FE ; 0038 ; MA # ( 𝟾 → 8 ) MATHEMATICAL MONOSPACE DIGIT EIGHT → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT #
0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT #
1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT #
@ -1659,12 +1663,12 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
1D7F5 ; 0039 ; MA # ( 𝟵 → 9 ) MATHEMATICAL SANS-SERIF BOLD DIGIT NINE → DIGIT NINE #
1D7FF ; 0039 ; MA # ( 𝟿 → 9 ) MATHEMATICAL MONOSPACE DIGIT NINE → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
A76E ; 0039 ; MA # ( → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE #
2CCA ; 0039 ; MA # ( → 9 ) COPTIC CAPITAL LETTER DIALECT-P HORI → DIGIT NINE #
118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE #
118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE #
118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
0967 ; 0669 ; MA # ( १ → ‎٩‎ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE #
118E4 ; 0669 ; MA # ( 𑣤 → ‎٩‎ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE #
@ -2544,6 +2548,7 @@ FFE8 ; 006C ; MA #* ( → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL
1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L #
FF29 ; 006C ; MA # ( → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→
2160 ; 006C ; MA # ( → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→
@ -2601,7 +2606,6 @@ A4F2 ; 006C ; MA # ( → l ) LISU LETTER I → LATIN SMALL LETTER L # →I
16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
216C ; 004C ; MA # ( → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
@ -2972,6 +2976,7 @@ FBA6 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL ISOLATED FORM →
1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7EC ; 004F ; MA # ( 𝟬 → O ) MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
FF2F ; 004F ; MA # ( → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→
1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O #
1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O #
@ -3005,7 +3010,6 @@ A4F3 ; 004F ; MA # ( → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O #
10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O #
10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O #
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR #
1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→
@ -8024,8 +8028,6 @@ FA92 ; 6717 ; MA # ( 朗 → 朗 ) CJK COMPATIBILITY IDEOGRAPH-FA92 → CJK UNIF
FA93 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-FA93 → CJK UNIFIED IDEOGRAPH-671B #
2F8D9 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-2F8D9 → CJK UNIFIED IDEOGRAPH-671B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
5E50 ; 3B3A ; MA # ( 幐 → 㬺 ) CJK UNIFIED IDEOGRAPH-5E50 → CJK UNIFIED IDEOGRAPH-3B3A #
4420 ; 3B3B ; MA # ( 䐠 → 㬻 ) CJK UNIFIED IDEOGRAPH-4420 → CJK UNIFIED IDEOGRAPH-3B3B #
@ -8831,6 +8833,8 @@ F953 ; 808B ; MA # ( 肋 → 肋 ) CJK COMPATIBILITY IDEOGRAPH-F953 → CJK UNIF
2F984 ; 440B ; MA # ( 䐋 → 䐋 ) CJK COMPATIBILITY IDEOGRAPH-2F984 → CJK UNIFIED IDEOGRAPH-440B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
2F987 ; 267A7 ; MA # ( 𦞧 → 𦞧 ) CJK COMPATIBILITY IDEOGRAPH-2F987 → CJK UNIFIED IDEOGRAPH-267A7 #
2F988 ; 267B5 ; MA # ( 𦞵 → 𦞵 ) CJK COMPATIBILITY IDEOGRAPH-2F988 → CJK UNIFIED IDEOGRAPH-267B5 #
@ -9630,9 +9634,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
# total: 6311

View File

@ -1718,7 +1718,7 @@ cp;01C0;-Cased;-CWCM;gc=Lo;na=LATIN LETTER DENTAL CLICK;SB=LE
cp;01C1;-Cased;-CWCM;gc=Lo;na=LATIN LETTER LATERAL CLICK;SB=LE
cp;01C2;-Cased;-CWCM;gc=Lo;na=LATIN LETTER ALVEOLAR CLICK;SB=LE
cp;01C3;-Cased;-CWCM;gc=Lo;na=LATIN LETTER RETROFLEX CLICK;SB=LE
# Croatian digraphs matching Serbian Cyrillic letters
# Latin digraphs matching Serbian Cyrillic letters
cp;01C4;cf=01C6;CWCF;CWKCF;CWL;CWT;dm=0044 017D;dt=Com;na=LATIN CAPITAL LETTER DZ WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;scf=01C6;slc=01C6;stc=01C5;Upper
cp;01C5;cf=01C6;CWCF;CWKCF;CWL;CWU;dm=0044 017E;dt=Com;gc=Lt;na=LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;scf=01C6;slc=01C6;stc=01C5;suc=01C4
cp;01C6;CWKCF;CWT;CWU;dm=0064 017E;dt=Com;gc=Ll;Lower;na=LATIN SMALL LETTER DZ WITH CARON;NFKC_CF=0064 017E;NFKC_QC=N;NFKD_QC=N;SB=LO;stc=01C5;suc=01C4
@ -2034,14 +2034,14 @@ cp;02E2;Alpha;bc=L;Cased;CWKCF;dm=0073;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LE
cp;02E3;Alpha;bc=L;Cased;CWKCF;dm=0078;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LETTER SMALL X;NFKC_CF=0078;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;XIDC;XIDS
cp;02E4;Alpha;bc=L;Cased;CWKCF;dm=0295;dt=Sup;gc=Lm;IDC;IDS;Lower;na=MODIFIER LETTER SMALL REVERSED GLOTTAL STOP;NFKC_CF=0295;NFKC_QC=N;NFKD_QC=N;SB=LO;sc=Latn;XIDC;XIDS
# Tone letters
cp;02E5;na=MODIFIER LETTER EXTRA-HIGH TONE BAR;WB=XX
cp;02E6;na=MODIFIER LETTER HIGH TONE BAR;WB=XX
cp;02E7;na=MODIFIER LETTER MID TONE BAR;WB=XX
cp;02E8;na=MODIFIER LETTER LOW TONE BAR;WB=XX
cp;02E9;na=MODIFIER LETTER EXTRA-LOW TONE BAR;WB=XX
cp;02E5;na=MODIFIER LETTER EXTRA-HIGH TONE BAR
cp;02E6;na=MODIFIER LETTER HIGH TONE BAR
cp;02E7;na=MODIFIER LETTER MID TONE BAR
cp;02E8;na=MODIFIER LETTER LOW TONE BAR
cp;02E9;na=MODIFIER LETTER EXTRA-LOW TONE BAR
# Extended Bopomofo tone marks
cp;02EA;age=3.0;na=MODIFIER LETTER YIN DEPARTING TONE MARK;sc=Bopo;vo=U;WB=XX
cp;02EB;age=3.0;na=MODIFIER LETTER YANG DEPARTING TONE MARK;sc=Bopo;vo=U;WB=XX
cp;02EA;age=3.0;na=MODIFIER LETTER YIN DEPARTING TONE MARK;sc=Bopo;vo=U
cp;02EB;age=3.0;na=MODIFIER LETTER YANG DEPARTING TONE MARK;sc=Bopo;vo=U
# IPA modifiers
cp;02EC;age=3.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER VOICING;SB=LE;XIDC;XIDS
cp;02ED;age=3.0;na=MODIFIER LETTER UNASPIRATED
@ -2734,12 +2734,12 @@ cp;0556;cf=0586;CWCF;CWKCF;CWL;gc=Lu;na=ARMENIAN CAPITAL LETTER FEH;NFKC_CF=0586
unassigned;0557..0558
# Modifier letters
cp;0559;-Cased;CI;-CWCM;Dia;gc=Lm;na=ARMENIAN MODIFIER LETTER LEFT HALF RING;SB=LE
cp;055A;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN APOSTROPHE;SB=XX;WB=XX;-XIDC;-XIDS
cp;055A;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN APOSTROPHE;SB=XX;-XIDC;-XIDS
cp;055B;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN EMPHASIS MARK;SB=XX;-XIDC;-XIDS
cp;055C;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN EXCLAMATION MARK;SB=XX;-XIDC;-XIDS
cp;055D;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN COMMA;SB=SC;WB=XX;-XIDC;-XIDS
cp;055E;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN QUESTION MARK;SB=XX;-XIDC;-XIDS
cp;055F;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN ABBREVIATION MARK;SB=XX;WB=XX;-XIDC;-XIDS
cp;055F;-Alpha;-Cased;CI;-CWCM;gc=Po;-IDC;-IDS;na=ARMENIAN ABBREVIATION MARK;SB=XX;WB=ML;-XIDC;-XIDS
# Lowercase letters
cp;0560;age=11.0;-CWCM;Lower;na=ARMENIAN SMALL LETTER TURNED AYB
cp;0561;CWT;CWU;Lower;na=ARMENIAN SMALL LETTER AYB;stc=0531;suc=0531
@ -2783,8 +2783,8 @@ cp;0586;CWT;CWU;Lower;na=ARMENIAN SMALL LETTER FEH;stc=0556;suc=0556
cp;0587;cf=0565 0582;CWCF;CWKCF;CWT;CWU;dm=0565 0582;dt=Com;lc=0587;Lower;na=ARMENIAN SMALL LIGATURE ECH YIWN;NFKC_CF=0565 0582;NFKC_QC=N;NFKD_QC=N;tc=0535 0582;uc=0535 0552
cp;0588;age=11.0;-CWCM;Lower;na=ARMENIAN SMALL LETTER YI WITH STROKE
# Punctuation
cp;0589;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;lb=IS;na=ARMENIAN FULL STOP;SB=ST;sc=Zyyy;scx=Armn Geor;STerm;Term;WB=MN;-XIDC;-XIDS
cp;058A;age=3.0;-Alpha;bc=ON;-Cased;-CWCM;Dash;gc=Pd;Hyphen;-IDC;-IDS;lb=BA;na=ARMENIAN HYPHEN;SB=XX;WB=XX;-XIDC;-XIDS
cp;0589;-Alpha;-Cased;-CWCM;gc=Po;-IDC;-IDS;lb=IS;na=ARMENIAN FULL STOP;SB=ST;STerm;Term;WB=MN;-XIDC;-XIDS
cp;058A;age=3.0;-Alpha;bc=ON;-Cased;-CWCM;Dash;gc=Pd;Hyphen;-IDC;-IDS;lb=BA;na=ARMENIAN HYPHEN;SB=XX;-XIDC;-XIDS
unassigned;058B..058C
# Religious symbols
cp;058D;age=7.0;-Alpha;bc=ON;-Cased;-CWCM;gc=So;-IDC;-IDS;na=RIGHT-FACING ARMENIAN ETERNITY SIGN;SB=XX;WB=XX;-XIDC;-XIDS
@ -3567,9 +3567,9 @@ cp;0852;na=MANDAIC LETTER AQ
cp;0853;na=MANDAIC LETTER AR
cp;0854;jt=R;na=MANDAIC LETTER ASH
cp;0855;na=MANDAIC LETTER AT
cp;0856;jt=U;na=MANDAIC LETTER DUSHENNA
cp;0857;jt=U;na=MANDAIC LETTER KAD
cp;0858;jt=U;na=MANDAIC LETTER AIN
cp;0856;jt=R;na=MANDAIC LETTER DUSHENNA
cp;0857;jt=R;na=MANDAIC LETTER KAD
cp;0858;jt=R;na=MANDAIC LETTER AIN
# Diacritics
cp;0859;-Alpha;bc=NSM;ccc=220;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;-IDS;jt=T;lb=CM;na=MANDAIC AFFRICATION MARK;SB=EX;WB=Extend;-XIDS
cp;085A;-Alpha;bc=NSM;ccc=220;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;-IDS;jt=T;lb=CM;na=MANDAIC VOCALIZATION MARK;SB=EX;WB=Extend;-XIDS
@ -9102,10 +9102,10 @@ cp;1DF5;age=7.0;Dia;na=COMBINING UP TACK ABOVE
# Typicon marks
cp;1DF6;age=10.0;ccc=232;Dia;na=COMBINING KAVYKA ABOVE RIGHT
cp;1DF7;age=10.0;ccc=228;Dia;na=COMBINING KAVYKA ABOVE LEFT
cp;1DF8;age=10.0;ccc=228;Dia;na=COMBINING DOT ABOVE LEFT
# Miscellaneous marks
cp;1DF8;age=10.0;ccc=228;Dia;na=COMBINING DOT ABOVE LEFT;scx=Cyrl Syrc
cp;1DF9;age=10.0;ccc=220;Dia;na=COMBINING WIDE INVERTED BRIDGE BELOW
unassigned;1DFA
# Miscellaneous mark
cp;1DFB;age=9.0;InPC=Top;InSC=Syllable_Modifier;na=COMBINING DELETION MARK
# Double diacritic mark for UPA
cp;1DFC;age=6.0;ccc=233;na=COMBINING DOUBLE INVERTED BREVE BELOW
@ -9682,9 +9682,10 @@ cp;2024;CI;CWKCF;dm=002E;dt=Com;ea=A;lb=IN;na=ONE DOT LEADER;NFKC_CF=002E;NFKC_Q
cp;2025;CWKCF;dm=002E 002E;dt=Com;ea=A;lb=IN;na=TWO DOT LEADER;NFKC_CF=002E 002E;NFKC_QC=N;NFKD_QC=N
cp;2026;CWKCF;dm=002E 002E 002E;dt=Com;ea=A;lb=IN;na=HORIZONTAL ELLIPSIS;NFKC_CF=002E 002E 002E;NFKC_QC=N;NFKD_QC=N
cp;2027;CI;ea=A;lb=BA;na=HYPHENATION POINT;WB=ML
# Format characters
# Separators
cp;2028;bc=WS;gc=Zl;GCB=CN;-Gr_Base;lb=BK;na=LINE SEPARATOR;-Pat_Syn;Pat_WS;SB=SE;WB=NL;WSpace
cp;2029;bc=B;gc=Zp;GCB=CN;-Gr_Base;lb=BK;na=PARAGRAPH SEPARATOR;-Pat_Syn;Pat_WS;SB=SE;WB=NL;WSpace
# Format characters
cp;202A;bc=LRE;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=LEFT-TO-RIGHT EMBEDDING;Name_Alias=abbreviation=LRE;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
cp;202B;bc=RLE;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=RIGHT-TO-LEFT EMBEDDING;Name_Alias=abbreviation=RLE;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
cp;202C;bc=PDF;Bidi_C;CI;CWKCF;DI;gc=Cf;GCB=CN;-Gr_Base;jt=T;lb=CM;na=POP DIRECTIONAL FORMATTING;Name_Alias=abbreviation=PDF;NFKC_CF=;-Pat_Syn;SB=FO;WB=FO
@ -11837,8 +11838,9 @@ cp;27C4;age=4.1;bmg=27C3;na=OPEN SUPERSET
# Paired punctuation
cp;27C5;age=4.1;bmg=27C6;bpb=27C6;bpt=o;gc=Ps;lb=OP;na=LEFT S-SHAPED BAG DELIMITER;SB=CL
cp;27C6;age=4.1;bmg=27C5;bpb=27C5;bpt=c;gc=Pe;lb=CL;na=RIGHT S-SHAPED BAG DELIMITER;SB=CL
# Miscellaneous symbols
# Operator
cp;27C7;age=5.0;-Bidi_M;na=OR WITH DOT INSIDE
# Miscellaneous symbols
cp;27C8;age=5.0;bmg=27C9;na=REVERSE SOLIDUS PRECEDING SUBSET
cp;27C9;age=5.0;bmg=27C8;na=SUPERSET PRECEDING SOLIDUS
# Vertical line operator
@ -17107,17 +17109,17 @@ cp;A6F6;-Alpha;gc=Po;-IDC;-IDS;lb=BA;na=BAMUM SEMICOLON;SB=XX;Term;WB=XX;-XIDC;-
cp;A6F7;-Alpha;gc=Po;-IDC;-IDS;lb=BA;na=BAMUM QUESTION MARK;SB=ST;STerm;Term;WB=XX;-XIDC;-XIDS
unassigned;A6F8..A6FF
block;A700..A71F;age=4.1;bc=ON;blk=Modifier_Tone_Letters;CI;Dia;gc=Sk;Gr_Base;lb=AL;sc=Zyyy
block;A700..A71F;age=4.1;bc=ON;blk=Modifier_Tone_Letters;CI;Dia;gc=Sk;Gr_Base;lb=AL;sc=Zyyy;WB=LE
# A700..A71F Modifier Tone Letters
# Corner tone marks for Chinese
cp;A700;na=MODIFIER LETTER CHINESE TONE YIN PING
cp;A701;na=MODIFIER LETTER CHINESE TONE YANG PING
cp;A702;na=MODIFIER LETTER CHINESE TONE YIN SHANG
cp;A703;na=MODIFIER LETTER CHINESE TONE YANG SHANG
cp;A704;na=MODIFIER LETTER CHINESE TONE YIN QU
cp;A705;na=MODIFIER LETTER CHINESE TONE YANG QU
cp;A706;na=MODIFIER LETTER CHINESE TONE YIN RU
cp;A707;na=MODIFIER LETTER CHINESE TONE YANG RU
cp;A700;na=MODIFIER LETTER CHINESE TONE YIN PING;scx=Hani Latn;WB=XX
cp;A701;na=MODIFIER LETTER CHINESE TONE YANG PING;scx=Hani Latn;WB=XX
cp;A702;na=MODIFIER LETTER CHINESE TONE YIN SHANG;scx=Hani Latn;WB=XX
cp;A703;na=MODIFIER LETTER CHINESE TONE YANG SHANG;scx=Hani Latn;WB=XX
cp;A704;na=MODIFIER LETTER CHINESE TONE YIN QU;scx=Hani Latn;WB=XX
cp;A705;na=MODIFIER LETTER CHINESE TONE YANG QU;scx=Hani Latn;WB=XX
cp;A706;na=MODIFIER LETTER CHINESE TONE YIN RU;scx=Hani Latn;WB=XX
cp;A707;na=MODIFIER LETTER CHINESE TONE YANG RU;scx=Hani Latn;WB=XX
# Dotted tone letters
cp;A708;na=MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR
cp;A709;na=MODIFIER LETTER HIGH DOTTED TONE BAR
@ -17136,16 +17138,16 @@ cp;A714;na=MODIFIER LETTER MID LEFT-STEM TONE BAR
cp;A715;na=MODIFIER LETTER LOW LEFT-STEM TONE BAR
cp;A716;na=MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
# Chinantec tone marks
cp;A717;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT VERTICAL BAR;SB=LE;WB=LE;XIDC;XIDS
cp;A718;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT SLASH;SB=LE;WB=LE;XIDC;XIDS
cp;A719;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT HORIZONTAL BAR;SB=LE;WB=LE;XIDC;XIDS
cp;A71A;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOWER RIGHT CORNER ANGLE;SB=LE;WB=LE;XIDC;XIDS
cp;A717;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT VERTICAL BAR;SB=LE;XIDC;XIDS
cp;A718;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT SLASH;SB=LE;XIDC;XIDS
cp;A719;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER DOT HORIZONTAL BAR;SB=LE;XIDC;XIDS
cp;A71A;age=5.0;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOWER RIGHT CORNER ANGLE;SB=LE;XIDC;XIDS
# Africanist tone letters
cp;A71B;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED UP ARROW;SB=LE;WB=LE;XIDC;XIDS
cp;A71C;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED DOWN ARROW;SB=LE;WB=LE;XIDC;XIDS
cp;A71D;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
cp;A71E;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
cp;A71F;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOW INVERTED EXCLAMATION MARK;SB=LE;WB=LE;XIDC;XIDS
cp;A71B;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED UP ARROW;SB=LE;XIDC;XIDS
cp;A71C;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED DOWN ARROW;SB=LE;XIDC;XIDS
cp;A71D;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED EXCLAMATION MARK;SB=LE;XIDC;XIDS
cp;A71E;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER RAISED INVERTED EXCLAMATION MARK;SB=LE;XIDC;XIDS
cp;A71F;age=5.1;Alpha;gc=Lm;IDC;IDS;na=MODIFIER LETTER LOW INVERTED EXCLAMATION MARK;SB=LE;XIDC;XIDS
block;A720..A7FF;age=5.1;Alpha;blk=Latin_Ext_D;Cased;CWCM;gc=Ll;Gr_Base;IDC;IDS;lb=AL;SB=LO;sc=Latn;WB=LE;XIDC;XIDS
# A720..A7FF Latin Extended-D
@ -30483,21 +30485,21 @@ cp;16F9D;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-5;WB=LE
cp;16F9E;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-6;WB=LE
cp;16F9F;CI;Dia;gc=Lm;na=MIAO LETTER REFORMED TONE-8;WB=LE
block;16FE0..16FFF;age=13.0;Alpha;blk=Ideographic_Symbols;ea=W;gc=Lm;Gr_Base;IDC;lb=NS;SB=LE;sc=Zyyy;vo=U;WB=Extend;XIDC
block;16FE0..16FFF;age=13.0;Alpha;blk=Ideographic_Symbols;ea=W;gc=Lm;Gr_Base;IDC;lb=NS;SB=LE;sc=Hani;vo=U;WB=Extend;XIDC
# 16FE0..16FFF Ideographic Symbols and Punctuation
# Tangut mark
cp;16FE0;age=9.0;CI;Ext;IDS;na=TANGUT ITERATION MARK;sc=Tang;WB=LE;XIDS
# Nushu mark
cp;16FE1;age=10.0;CI;Ext;IDS;na=NUSHU ITERATION MARK;sc=Nshu;WB=LE;XIDS
# Marks used in ancient Chinese texts
cp;16FE2;age=12.0;-Alpha;bc=ON;gc=Po;-IDC;na=OLD CHINESE HOOK MARK;SB=XX;WB=XX;-XIDC
cp;16FE3;age=12.0;CI;Ext;IDS;na=OLD CHINESE ITERATION MARK;WB=LE;XIDS
cp;16FE2;age=12.0;-Alpha;bc=ON;gc=Po;-IDC;na=OLD CHINESE HOOK MARK;SB=XX;sc=Zyyy;WB=XX;-XIDC
cp;16FE3;age=12.0;CI;Ext;IDS;na=OLD CHINESE ITERATION MARK;sc=Zyyy;WB=LE;XIDS
# Small Khitan format character
cp;16FE4;-Alpha;bc=NSM;CI;gc=Mn;GCB=EX;-Gr_Base;Gr_Ext;Ideo;jt=T;lb=GL;na=KHITAN SMALL SCRIPT FILLER;SB=EX;sc=Kits
unassigned;16FE5..16FEF;vo=U
# Combining diacritics for CJK ideographs
cp;16FF0;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK CA;SB=EX;sc=Zinh
cp;16FF1;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK NHAY;SB=EX;sc=Zinh
cp;16FF0;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK CA;SB=EX
cp;16FF1;ccc=6;Dia;gc=Mc;GCB=SM;lb=CM;na=VIETNAMESE ALTERNATE READING MARK NHAY;SB=EX
unassigned;16FF2..16FFF;vo=U
block;17000..187FF;Alpha;blk=Tangut;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=ID;SB=LE;sc=Tang;vo=U;XIDC;XIDS
@ -31294,7 +31296,7 @@ cp;18AFD;age=13.0;na=TANGUT COMPONENT-766
cp;18AFE;age=13.0;na=TANGUT COMPONENT-767
cp;18AFF;age=13.0;na=TANGUT COMPONENT-768
block;18B00..18CFF;age=13.0;Alpha;blk=Khitan_Small_Script;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=ID;SB=LE;sc=Kits;vo=U;XIDC;XIDS
block;18B00..18CFF;age=13.0;Alpha;blk=Khitan_Small_Script;ea=W;gc=Lo;Gr_Base;IDC;Ideo;IDS;lb=AL;SB=LE;sc=Kits;vo=U;XIDC;XIDS
# 18B00..18CFF Khitan Small Script
# Iteration mark
cp;18B00;na=KHITAN SMALL SCRIPT CHARACTER-18B00
@ -38273,7 +38275,7 @@ cp;1F909;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING NOTCHED HOOK
cp;1F90A;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING HOOK WITH DOT
cp;1F90B;ea=N;-Emoji;-EPres;-ExtPict;lb=AL;na=DOWNWARD FACING NOTCHED HOOK WITH DOT
# Hand symbol
cp;1F90C;age=13.0;EBase;na=PINCHED FINGERS
cp;1F90C;age=13.0;EBase;lb=EB;na=PINCHED FINGERS
# Colored heart symbols
cp;1F90D;age=12.0;na=WHITE HEART
cp;1F90E;age=12.0;na=BROWN HEART
@ -38390,7 +38392,7 @@ cp;1F973;age=11.0;na=FACE WITH PARTY HORN AND PARTY HAT
cp;1F974;age=11.0;na=FACE WITH UNEVEN EYES AND WAVY MOUTH
cp;1F975;age=11.0;na=OVERHEATED FACE
cp;1F976;age=11.0;na=FREEZING FACE
cp;1F977;age=13.0;na=NINJA
cp;1F977;age=13.0;EBase;lb=EB;na=NINJA
cp;1F978;age=13.0;na=DISGUISED FACE
unassigned;1F979;ExtPict;lb=ID;vo=U
cp;1F97A;age=11.0;na=FACE WITH PLEADING EYES
@ -38732,7 +38734,7 @@ cp;1FAD5;na=FONDUE
cp;1FAD6;na=TEAPOT
unassigned;1FAD7..1FAFF;ExtPict;lb=ID;vo=U
block;1FB00..1FBFF;age=13.0;bc=ON;blk=Symbols_For_Legacy_Computing;ExtPict;gc=So;Gr_Base;lb=ID;sc=Zyyy
block;1FB00..1FBFF;age=13.0;bc=ON;blk=Symbols_For_Legacy_Computing;gc=So;Gr_Base;lb=AL;sc=Zyyy
# 1FB00..1FBFF Symbols for Legacy Computing
# Block mosaic terminal graphic characters
cp;1FB00;na=BLOCK SEXTANT-1
@ -38885,7 +38887,7 @@ cp;1FB8F;na=LOWER HALF MEDIUM SHADE
cp;1FB90;na=INVERSE MEDIUM SHADE
cp;1FB91;na=UPPER HALF BLOCK AND LOWER HALF INVERSE MEDIUM SHADE
cp;1FB92;na=UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
unassigned;1FB93;ExtPict;lb=ID
unassigned;1FB93
cp;1FB94;na=LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK
# Fill characters
cp;1FB95;na=CHECKER BOARD FILL
@ -38951,19 +38953,19 @@ cp;1FBC7;na=STICK FIGURE LEANING LEFT
cp;1FBC8;na=STICK FIGURE LEANING RIGHT
cp;1FBC9;na=STICK FIGURE WITH DRESS
cp;1FBCA;na=WHITE UP-POINTING CHEVRON
unassigned;1FBCB..1FBEF;ExtPict;lb=ID
unassigned;1FBCB..1FBEF
# Segmented digits
cp;1FBF0;bc=EN;CWKCF;dm=0030;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT ZERO;NFKC_CF=0030;NFKC_QC=N;NFKD_QC=N;nt=De;nv=0;XIDC
cp;1FBF1;bc=EN;CWKCF;dm=0031;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT ONE;NFKC_CF=0031;NFKC_QC=N;NFKD_QC=N;nt=De;nv=1;XIDC
cp;1FBF2;bc=EN;CWKCF;dm=0032;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT TWO;NFKC_CF=0032;NFKC_QC=N;NFKD_QC=N;nt=De;nv=2;XIDC
cp;1FBF3;bc=EN;CWKCF;dm=0033;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT THREE;NFKC_CF=0033;NFKC_QC=N;NFKD_QC=N;nt=De;nv=3;XIDC
cp;1FBF4;bc=EN;CWKCF;dm=0034;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT FOUR;NFKC_CF=0034;NFKC_QC=N;NFKD_QC=N;nt=De;nv=4;XIDC
cp;1FBF5;bc=EN;CWKCF;dm=0035;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT FIVE;NFKC_CF=0035;NFKC_QC=N;NFKD_QC=N;nt=De;nv=5;XIDC
cp;1FBF6;bc=EN;CWKCF;dm=0036;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT SIX;NFKC_CF=0036;NFKC_QC=N;NFKD_QC=N;nt=De;nv=6;XIDC
cp;1FBF7;bc=EN;CWKCF;dm=0037;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT SEVEN;NFKC_CF=0037;NFKC_QC=N;NFKD_QC=N;nt=De;nv=7;XIDC
cp;1FBF8;bc=EN;CWKCF;dm=0038;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT EIGHT;NFKC_CF=0038;NFKC_QC=N;NFKD_QC=N;nt=De;nv=8;XIDC
cp;1FBF9;bc=EN;CWKCF;dm=0039;dt=Font;gc=Nd;IDC;na=SEGMENTED DIGIT NINE;NFKC_CF=0039;NFKC_QC=N;NFKD_QC=N;nt=De;nv=9;XIDC
unassigned;1FBFA..1FBFF;ExtPict;lb=ID
cp;1FBF0;bc=EN;CWKCF;dm=0030;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT ZERO;NFKC_CF=0030;NFKC_QC=N;NFKD_QC=N;nt=De;nv=0;SB=NU;WB=NU;XIDC
cp;1FBF1;bc=EN;CWKCF;dm=0031;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT ONE;NFKC_CF=0031;NFKC_QC=N;NFKD_QC=N;nt=De;nv=1;SB=NU;WB=NU;XIDC
cp;1FBF2;bc=EN;CWKCF;dm=0032;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT TWO;NFKC_CF=0032;NFKC_QC=N;NFKD_QC=N;nt=De;nv=2;SB=NU;WB=NU;XIDC
cp;1FBF3;bc=EN;CWKCF;dm=0033;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT THREE;NFKC_CF=0033;NFKC_QC=N;NFKD_QC=N;nt=De;nv=3;SB=NU;WB=NU;XIDC
cp;1FBF4;bc=EN;CWKCF;dm=0034;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT FOUR;NFKC_CF=0034;NFKC_QC=N;NFKD_QC=N;nt=De;nv=4;SB=NU;WB=NU;XIDC
cp;1FBF5;bc=EN;CWKCF;dm=0035;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT FIVE;NFKC_CF=0035;NFKC_QC=N;NFKD_QC=N;nt=De;nv=5;SB=NU;WB=NU;XIDC
cp;1FBF6;bc=EN;CWKCF;dm=0036;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT SIX;NFKC_CF=0036;NFKC_QC=N;NFKD_QC=N;nt=De;nv=6;SB=NU;WB=NU;XIDC
cp;1FBF7;bc=EN;CWKCF;dm=0037;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT SEVEN;NFKC_CF=0037;NFKC_QC=N;NFKD_QC=N;nt=De;nv=7;SB=NU;WB=NU;XIDC
cp;1FBF8;bc=EN;CWKCF;dm=0038;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT EIGHT;NFKC_CF=0038;NFKC_QC=N;NFKD_QC=N;nt=De;nv=8;SB=NU;WB=NU;XIDC
cp;1FBF9;bc=EN;CWKCF;dm=0039;dt=Font;gc=Nd;IDC;lb=NU;na=SEGMENTED DIGIT NINE;NFKC_CF=0039;NFKC_QC=N;NFKD_QC=N;nt=De;nv=9;SB=NU;WB=NU;XIDC
unassigned;1FBFA..1FBFF
# No block
unassigned;1FC00..1FF7F;ExtPict;lb=ID

View File

@ -102,25 +102,25 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
u"\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-"
u"\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8"
u"\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD"
u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02-"
u"\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-"
u"\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D81-"
u"\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-"
u"\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE"
u"\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81"
u"\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-"
u"\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF"
u"\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-"
u"\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-"
u"\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92"
u"\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB"
u"\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7"
u"\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-"
u"\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D"
u"\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-"
u"\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-"
u"\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2"
u"\\u17D7\\u17DC\\u17E0-\\u17E9\\u1ABF\\u1AC0\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02"
u"\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-"
u"\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-"
u"\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-"
u"\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6"
u"\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-"
u"\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-"
u"\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9"
u"\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-"
u"\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-"
u"\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84"
u"\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
u"\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-"
u"\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-"
u"\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288"
u"\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-"
u"\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-"
u"\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-"
u"\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
u"\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-"
u"\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70"
u"\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA"
@ -131,14 +131,14 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
u"\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E"
u"\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-"
u"\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793"
u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA7F5\\uA7F6\\uA9E7-\\uA9FE"
u"\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
u"\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66-\\uAB68\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
u"\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
u"\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-"
u"\\U0001B152\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-"
u"\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-"
u"\\U0002EBE0\\U00030000-\\U0003134A]";
u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76"
u"\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26"
u"\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13"
u"\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303"
u"\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152"
u"\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734"
u"\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0"
u"\\U00030000-\\U0003134A]";
gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status);
if (gRecommendedSet == NULL) {

View File

@ -295,13 +295,13 @@ void CollationTest::TestIllegalUTF8() {
static const char *strings[] = {
// string with U+FFFD == illegal byte sequence
u8"a\uFFFDz", "a\x80z", // trail byte
u8"a\uFFFD\uFFFDz", "a\xc1\x81z", // non-shortest form
u8"a\uFFFD\uFFFD\uFFFDz", "a\xe0\x82\x83z", // non-shortest form
u8"a\uFFFD\uFFFD\uFFFDz", "a\xed\xa0\x80z", // lead surrogate: would be U+D800
u8"a\uFFFD\uFFFD\uFFFDz", "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF
u8"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf0\x8f\xbf\xbfz", // non-shortest form
u8"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf4\x90\x80\x80z" // out of range: would be U+110000
reinterpret_cast<const char*>(u8"a\uFFFDz"), reinterpret_cast<const char*>("a\x80z"), // trail byte
reinterpret_cast<const char*>(u8"a\uFFFD\uFFFDz"), reinterpret_cast<const char*>("a\xc1\x81z"), // non-shortest form
reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFDz"), reinterpret_cast<const char*>("a\xe0\x82\x83z"), // non-shortest form
reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFDz"), reinterpret_cast<const char*>("a\xed\xa0\x80z"), // lead surrogate: would be U+D800
reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFDz"), reinterpret_cast<const char*>("a\xed\xbf\xbfz"), // trail surrogate: would be U+DFFF
reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFD\uFFFDz"), reinterpret_cast<const char*>("a\xf0\x8f\xbf\xbfz"), // non-shortest form
reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFD\uFFFDz"), reinterpret_cast<const char*>("a\xf4\x90\x80\x80z") // out of range: would be U+110000
};
for(int32_t i = 0; i < UPRV_LENGTHOF(strings); i += 2) {

View File

@ -185,38 +185,38 @@ static ExpectedResult kChineseCurrencyTestData[] = {
{123456789012345.0, "\\u00A5120\\u4E07\\u4EBF"},
};
static ExpectedResult kGermanCurrencyTestData[] = {
{1.0, u8"1\\u00A0\\u20AC"},
{12.0, u8"12\\u00A0\\u20AC"},
{123.0, u8"120\\u00A0\\u20AC"},
{1234.0, u8"1200\\u00A0\\u20AC"},
{12345.0, u8"12.000\\u00A0\\u20AC"},
{123456.0, u8"120.000\\u00A0\\u20AC"},
{1234567.0, u8"1,2\\u00A0Mio.\\u00A0\\u20AC"},
{12345678.0, u8"12\\u00A0Mio.\\u00A0\\u20AC"},
{123456789.0, u8"120\\u00A0Mio.\\u00A0\\u20AC"},
{1234567890.0, u8"1,2\\u00A0Mrd.\\u00A0\\u20AC"},
{12345678901.0, u8"12\\u00A0Mrd.\\u00A0\\u20AC"},
{123456789012.0, u8"120\\u00A0Mrd.\\u00A0\\u20AC"},
{1234567890123.0, u8"1,2\\u00A0Bio.\\u00A0\\u20AC"},
{12345678901234.0, u8"12\\u00A0Bio.\\u00A0\\u20AC"},
{123456789012345.0, u8"120\\u00A0Bio.\\u00A0\\u20AC"},
{1.0, reinterpret_cast<const char*>(u8"1\\u00A0\\u20AC")},
{12.0, reinterpret_cast<const char*>(u8"12\\u00A0\\u20AC")},
{123.0, reinterpret_cast<const char*>(u8"120\\u00A0\\u20AC")},
{1234.0, reinterpret_cast<const char*>(u8"1200\\u00A0\\u20AC")},
{12345.0, reinterpret_cast<const char*>(u8"12.000\\u00A0\\u20AC")},
{123456.0, reinterpret_cast<const char*>(u8"120.000\\u00A0\\u20AC")},
{1234567.0, reinterpret_cast<const char*>(u8"1,2\\u00A0Mio.\\u00A0\\u20AC")},
{12345678.0, reinterpret_cast<const char*>(u8"12\\u00A0Mio.\\u00A0\\u20AC")},
{123456789.0, reinterpret_cast<const char*>(u8"120\\u00A0Mio.\\u00A0\\u20AC")},
{1234567890.0, reinterpret_cast<const char*>(u8"1,2\\u00A0Mrd.\\u00A0\\u20AC")},
{12345678901.0, reinterpret_cast<const char*>(u8"12\\u00A0Mrd.\\u00A0\\u20AC")},
{123456789012.0, reinterpret_cast<const char*>(u8"120\\u00A0Mrd.\\u00A0\\u20AC")},
{1234567890123.0, reinterpret_cast<const char*>(u8"1,2\\u00A0Bio.\\u00A0\\u20AC")},
{12345678901234.0, reinterpret_cast<const char*>(u8"12\\u00A0Bio.\\u00A0\\u20AC")},
{123456789012345.0, reinterpret_cast<const char*>(u8"120\\u00A0Bio.\\u00A0\\u20AC")},
};
static ExpectedResult kEnglishCurrencyTestData[] = {
{1.0, u8"$1"},
{12.0, u8"$12"},
{123.0, u8"$120"},
{1234.0, u8"$1.2K"},
{12345.0, u8"$12K"},
{123456.0, u8"$120K"},
{1234567.0, u8"$1.2M"},
{12345678.0, u8"$12M"},
{123456789.0, u8"$120M"},
{1234567890.0, u8"$1.2B"},
{12345678901.0, u8"$12B"},
{123456789012.0, u8"$120B"},
{1234567890123.0, u8"$1.2T"},
{12345678901234.0, u8"$12T"},
{123456789012345.0, u8"$120T"},
{1.0, reinterpret_cast<const char*>(u8"$1")},
{12.0, reinterpret_cast<const char*>(u8"$12")},
{123.0, reinterpret_cast<const char*>(u8"$120")},
{1234.0, reinterpret_cast<const char*>(u8"$1.2K")},
{12345.0, reinterpret_cast<const char*>(u8"$12K")},
{123456.0, reinterpret_cast<const char*>(u8"$120K")},
{1234567.0, reinterpret_cast<const char*>(u8"$1.2M")},
{12345678.0, reinterpret_cast<const char*>(u8"$12M")},
{123456789.0, reinterpret_cast<const char*>(u8"$120M")},
{1234567890.0, reinterpret_cast<const char*>(u8"$1.2B")},
{12345678901.0, reinterpret_cast<const char*>(u8"$12B")},
{123456789012.0, reinterpret_cast<const char*>(u8"$120B")},
{1234567890123.0, reinterpret_cast<const char*>(u8"$1.2T")},
{12345678901234.0, reinterpret_cast<const char*>(u8"$12T")},
{123456789012345.0, reinterpret_cast<const char*>(u8"$120T")},
};

View File

@ -1992,10 +1992,15 @@ RBBIWordMonkey::RBBIWordMonkey()
fMidNumLetSet = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]", status);
fMidLetterSet = new UnicodeSet(u"[\\p{Word_Break = MidLetter}]", status);
fMidNumSet = new UnicodeSet(u"[\\p{Word_Break = MidNum}]", status);
fNumericSet = new UnicodeSet(u"[[\\p{Word_Break = Numeric}][\\uff10-\\uff19]]", status);
fNumericSet = new UnicodeSet(u"[\\p{Word_Break = Numeric}]", status);
fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status);
fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status);
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status);
// There are some sc=Hani characters with WB=Extend.
// The break rules need to pick one or the other because
// Extend overlapping with something else is messy.
// For Unicode 13, we chose to keep U+16FF0 & U+16FF1
// in $Han (for $dictionary) and out of $Extend.
fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}-[:Hani:]]", status);
fWSegSpaceSet = new UnicodeSet(u"[\\p{Word_Break = WSegSpace}]", status);
fZWJSet = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]", status);

View File

@ -5838,9 +5838,9 @@ void RegexTest::TestBug12884() {
REGEX_ASSERT(status == U_REGEX_TIME_OUT);
// UText, wrapping non-UTF-16 text, also takes a different execution path.
const char *text8 = u8"¿Qué es Unicode? Unicode proporciona un número único para cada"
const char *text8 = reinterpret_cast<const char*>(u8"¿Qué es Unicode? Unicode proporciona un número único para cada"
"carácter, sin importar la plataforma, sin importar el programa,"
"sin importar el idioma.";
"sin importar el idioma.");
status = U_ZERO_ERROR;
LocalUTextPointer ut(utext_openUTF8(NULL, text8, -1, &status));
REGEX_CHECK_STATUS;

View File

@ -1314,7 +1314,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
Edits edits;
int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
reinterpret_cast<const char*>(u8"IstanBul"), 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
static const EditChange lowerExpectedChanges[] = {
@ -1330,7 +1330,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
edits.reset();
length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
reinterpret_cast<const char*>(u8"Πατάτα"), 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
static const EditChange upperExpectedChanges[] = {
@ -1352,7 +1352,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
U_OMIT_UNCHANGED_TEXT |
U_TITLECASE_NO_BREAK_ADJUSTMENT |
U_TITLECASE_NO_LOWERCASE,
nullptr, u8"IjssEL IglOo", 12,
nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), 12,
dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
@ -1370,7 +1370,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
// No explicit nor automatic edits.reset(). Edits should be appended.
length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
reinterpret_cast<const char*>(u8"IßtanBul"), 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
UnicodeString::fromUTF8(StringPiece(dest, length)));
static const EditChange foldExpectedChanges[] = {
@ -1454,44 +1454,44 @@ void StringCaseTest::TestCaseMapUTF8ToString() {
StringByteSink<std::string> sink(&dest);
// Omit unchanged text.
CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, reinterpret_cast<const char*>(u8"IstanBul"), sink, nullptr, errorCode);
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
dest.clear();
CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, reinterpret_cast<const char*>(u8"Πατάτα"), sink, nullptr, errorCode);
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
UnicodeString::fromUTF8(dest));
#if !UCONFIG_NO_BREAK_ITERATION
dest.clear();
CaseMap::utf8ToTitle(
"nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), sink, nullptr, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
UnicodeString::fromUTF8(dest));
#endif
dest.clear();
CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
u8"IßtanBul", sink, nullptr, errorCode);
reinterpret_cast<const char*>(u8"IßtanBul"), sink, nullptr, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
UnicodeString::fromUTF8(dest));
// Return the whole result string.
dest.clear();
CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
CaseMap::utf8ToLower("tr", 0, reinterpret_cast<const char*>(u8"IstanBul"), sink, nullptr, errorCode);
assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
UnicodeString::fromUTF8(dest));
dest.clear();
CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
CaseMap::utf8ToUpper("el", 0, reinterpret_cast<const char*>(u8"Πατάτα"), sink, nullptr, errorCode);
assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
UnicodeString::fromUTF8(dest));
#if !UCONFIG_NO_BREAK_ITERATION
dest.clear();
CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), sink, nullptr, errorCode);
assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
UnicodeString::fromUTF8(dest));
#endif
dest.clear();
CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, reinterpret_cast<const char*>(u8"IßtanBul"), sink, nullptr, errorCode);
assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
UnicodeString::fromUTF8(dest));
}

View File

@ -1574,8 +1574,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
return;
}
static const char *const src =
u8" AÄA\u0308A\u0308\u00ad\u0323Ä\u0323,\u00ad\u1100\u1161\u11A8\u3133 ";
std::string expected = u8" aääạ\u0308\u0308,가각갃 ";
reinterpret_cast<const char*>(u8" AÄA\u0308A\u0308\u00ad\u0323Ä\u0323,\u00ad\u1100\u1161\u11A8\u3133 ");
std::string expected = reinterpret_cast<const char*>(u8" aääạ\u0308\u0308,가각갃 ");
std::string result;
StringByteSink<std::string> sink(&result, static_cast<int32_t>(expected.length()));
Edits edits;
@ -1607,7 +1607,7 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
assertTrue("isNormalizedUTF8(normalized)", nfkc_cf->isNormalizedUTF8(result, errorCode));
// Omit unchanged text.
expected = u8"aääạ\u0308\u0308가각갃";
expected = reinterpret_cast<const char*>(u8"aääạ\u0308\u0308가각갃");
result.clear();
edits.reset();
nfkc_cf->normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
@ -1623,7 +1623,7 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
// With filter: The normalization code does not see the "A" substrings.
UnicodeSet filter(u"[^A]", errorCode);
FilteredNormalizer2 fn2(*nfkc_cf, filter);
expected = u8" AäA\u0308A\u0323\u0308\u0308,가각갃 ";
expected = reinterpret_cast<const char*>(u8" AäA\u0308A\u0323\u0308\u0308,가각갃 ");
result.clear();
edits.reset();
fn2.normalizeUTF8(0, src, sink, &edits, errorCode);
@ -1655,7 +1655,7 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
// Omit unchanged text.
// Note that the result is not normalized because the inner normalizer
// does not see text across filter spans.
expected = u8"ä\u0323\u0308\u0308가각갃";
expected = reinterpret_cast<const char*>(u8"ä\u0323\u0308\u0308가각갃");
result.clear();
edits.reset();
fn2.normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
@ -1743,16 +1743,16 @@ BasicNormalizerTest::TestNormalizeIllFormedText() {
assertSuccess("normalize", errorCode.get());
assertEquals("normalize", expected, result);
std::string src8(u8" A");
src8.append("\x80").append(u8"ÄA\u0308").append("\xC0\x80").
append(u8"A\u0308\u00ad\u0323").append("\xED\xA0\x80").
append(u8"Ä\u0323,\u00ad").append("\xF4\x90\x80\x80").
append(u8"\u1100\u1161\u11A8\u3133 ").append("\xF0");
std::string expected8(u8" a");
expected8.append("\x80").append(u8"ää").append("\xC0\x80").
append(u8"\u0308").append("\xED\xA0\x80").
append(u8"\u0308,").append("\xF4\x90\x80\x80").
append(u8"가각갃 ").append("\xF0");
std::string src8(reinterpret_cast<const char*>(u8" A"));
src8.append("\x80").append(reinterpret_cast<const char*>(u8"ÄA\u0308")).append("\xC0\x80").
append(reinterpret_cast<const char*>(u8"A\u0308\u00ad\u0323")).append("\xED\xA0\x80").
append(reinterpret_cast<const char*>(u8"Ä\u0323,\u00ad")).append("\xF4\x90\x80\x80").
append(reinterpret_cast<const char*>(u8"\u1100\u1161\u11A8\u3133 ")).append("\xF0");
std::string expected8(reinterpret_cast<const char*>(u8" a"));
expected8.append("\x80").append(reinterpret_cast<const char*>(u8"ää")).append("\xC0\x80").
append(reinterpret_cast<const char*>(u8"\u0308")).append("\xED\xA0\x80").
append(reinterpret_cast<const char*>(u8"\u0308,")).append("\xF4\x90\x80\x80").
append(reinterpret_cast<const char*>(u8"가각갃 ")).append("\xF0");
std::string result8;
StringByteSink<std::string> sink(&result8);
nfkc_cf->normalizeUTF8(0, src8, sink, nullptr, errorCode);
@ -1777,8 +1777,8 @@ BasicNormalizerTest::TestComposeJamoTBase() {
assertFalse("isNormalized(LV+11A7)", nfkc->isNormalized(s, errorCode));
assertTrue("isNormalized(normalized)", nfkc->isNormalized(result, errorCode));
std::string s8(u8"\u1100\u1161\u11A7\u1100\u314F\u11A7\u11A7");
std::string expected8(u8"\u11A7\u11A7\u11A7");
std::string s8(reinterpret_cast<const char*>(u8"\u1100\u1161\u11A7\u1100\u314F\u11A7\u11A7"));
std::string expected8(reinterpret_cast<const char*>(u8"\u11A7\u11A7\u11A7"));
std::string result8;
StringByteSink<std::string> sink(&result8, static_cast<int32_t>(expected8.length()));
nfkc->normalizeUTF8(0, s8, sink, nullptr, errorCode);

View File

@ -935,7 +935,7 @@ void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
if (*status != u'[') {
errln("%s status field does not start with '[': %s\n %s", type, status, line);
}
if (strcmp(status, u8"[]") != 0) {
if (strcmp(status, reinterpret_cast<const char*>(u8"[]")) != 0) {
expectedHasErrors = TRUE;
}
}

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2019-11-08, 22:14:17 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:40 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -945,9 +945,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -1111,6 +1113,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -1232,7 +1235,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -66995,6 +67000,7 @@ A75E 0062
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -67303,6 +67309,7 @@ A7C2 0062
2C72 0041
2C73 0062
2C72 0062
0334 1AC0
1AC0 0334
028D 0021
1AC0 0021
@ -92154,6 +92161,7 @@ A806 003F
A806 0061
A806 0041
A806 0062
0334 A82C
A82C 0334
A82C 0021
A82C 003F
@ -96505,12 +96513,14 @@ A8C4 0062
11938 0041
11935 11930 0062
11938 0062
0334 1193D
1193D 0334
1193D 0021
1193D 003F
1193D 0061
1193D 0041
1193D 0062
0334 1193E
1193E 0334
1193E 0021
1193E 003F
@ -114560,10 +114570,15 @@ A9B2 0061
A9B2 0041
A9B2 0062
A9B4 0021
A9B5 0021
A9B4 003F
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -114604,11 +114619,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
0334 A9C0
A9C0 0334
A9C0 0021

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2019-11-08, 22:14:19 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:42 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -5371,9 +5371,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -5537,6 +5539,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -5658,7 +5661,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -72709,6 +72714,7 @@ FF37 003F
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -72928,6 +72934,7 @@ A7C2 0062
1AC0 003F
AB69 0021
AB69 003F
0334 1AC0
1AC0 0334
028D 0061
028D 0041
@ -98376,6 +98383,7 @@ A806 0041
A806 0062
A82C 0021
A82C 003F
0334 A82C
A82C 0334
A82C 0061
A82C 0041
@ -103473,12 +103481,14 @@ A8C4 0062
11938 0062
1193D 0021
1193D 003F
0334 1193D
1193D 0334
1193D 0061
1193D 0041
1193D 0062
1193E 0021
1193E 003F
0334 1193E
1193E 0334
1193E 0061
1193E 0041
@ -122266,9 +122276,14 @@ A9B2 0041
A9B2 0062
A9B4 0021
A9B4 003F
A9B5 0021
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -122309,11 +122324,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
A9C0 0021
A9C0 003F
0334 A9C0

View File

@ -1,6 +1,6 @@
# GraphemeBreakTest-13.0.0.txt
# Date: 2019-11-20, 22:53:31 GMT
# © 2019 Unicode®, Inc.
# GraphemeBreakTest-cldr-13.0.0.txt
# Date: 2020-02-07, 21:43:46 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#

View File

@ -14,11 +14,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -30,14 +31,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet}];
MidLetter = [\p{Word_Break = MidLetter}];
MidNum = [\p{Word_Break = MidNum}];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -13,11 +13,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en_US_POSIX;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -29,14 +30,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
MidNum = [\p{Word_Break = MidNum} [.]];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -8,5 +8,5 @@ api.report.version = 66
api.report.prev.version = 65
release.file.ver = 66rc
api.doc.version = 66 Release Candidate
maven.pom.ver = 66.0.1-SNAPSHOT
maven.pom.ver = 66.1-SNAPSHOT

View File

@ -4,6 +4,6 @@
#* Copyright (C) 2010-2016, International Business Machines Corporation and *
#* others. All Rights Reserved. *
#*******************************************************************************
icu4j.plugin.impl.version.string=66.0.1
icu4j.plugin.impl.version.string=66.1.0
copyright.eclipse=(C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html#License
icu4j.data.version.number=66

View File

@ -317,25 +317,25 @@ public class SpoofChecker {
+ "\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-"
+ "\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8"
+ "\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD"
+ "\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02-"
+ "\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-"
+ "\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D81-"
+ "\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-"
+ "\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE"
+ "\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81"
+ "\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-"
+ "\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF"
+ "\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-"
+ "\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-"
+ "\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92"
+ "\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB"
+ "\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7"
+ "\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-"
+ "\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D"
+ "\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-"
+ "\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-"
+ "\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2"
+ "\\u17D7\\u17DC\\u17E0-\\u17E9\\u1ABF\\u1AC0\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
+ "\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02"
+ "\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-"
+ "\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-"
+ "\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-"
+ "\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6"
+ "\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-"
+ "\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-"
+ "\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9"
+ "\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-"
+ "\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-"
+ "\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84"
+ "\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6"
+ "\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-"
+ "\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-"
+ "\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288"
+ "\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-"
+ "\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-"
+ "\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-"
+ "\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF"
+ "\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-"
+ "\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70"
+ "\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA"
@ -346,14 +346,14 @@ public class SpoofChecker {
+ "\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E"
+ "\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-"
+ "\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793"
+ "\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA7F5\\uA7F6\\uA9E7-\\uA9FE"
+ "\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16"
+ "\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66-\\uAB68\\uAC00-\\uD7A3\\uFA0E\\uFA0F"
+ "\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301"
+ "\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-"
+ "\\U0001B152\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-"
+ "\\U0002B734\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-"
+ "\\U0002EBE0\\U00030000-\\U0003134A]"
+ "\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76"
+ "\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26"
+ "\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13"
+ "\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303"
+ "\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152"
+ "\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734"
+ "\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0"
+ "\\U00030000-\\U0003134A]"
).freeze();
// Note: data from IdentifierStatus.txt & IdentifierType.txt
// There is tooling to generate this constant in the unicodetools project:

View File

@ -595,7 +595,7 @@ public final class VersionInfo implements Comparable<VersionInfo>
UNICODE_12_1 = getInstance(12, 1, 0, 0);
UNICODE_13_0 = getInstance(13, 0, 0, 0);
ICU_VERSION = getInstance(66, 0, 1, 0);
ICU_VERSION = getInstance(66, 1, 0, 0);
ICU_DATA_VERSION = ICU_VERSION;
UNICODE_VERSION = UNICODE_13_0;

View File

@ -7,7 +7,7 @@
# Version numbers, etc.
icu4j.spec.version = 66
icu4j.impl.version = 66.0.1
icu4j.impl.version = 66.1
icu4j.data.version = 66
default.exec.env = JavaSE-1.7

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6b14ef66277d196e8b01365e9426a1585ea25a9cc346b4a454db2ecc157aed41
size 12995957
oid sha256:ba8a127c85144f3e2b531d1a7ea73a4c9e4e319b976643884a32d4d522c51c70
size 12996272

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5891f9acf588285d8d39c00b82c923cd7e887541e2ac6b8341865ec548f6d0cd
size 94279
oid sha256:584eeb39ff67d1b1c2e774e1650b31b2caccf022ee57770bbc97e6b1c9320947
size 94268

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:057e776b2340a9d0bb1742fcbd153bfe33970cf6b7f8d88761dd1e262390bda4
size 723418
oid sha256:6f4e2d5516ef6d4890ee5fa9fb552f204360c6371be87ba8fb36b0d04c4f4d20
size 726380

View File

@ -1,6 +1,6 @@
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Copyright © 1991-2020 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_NON_IGNORABLE_SHORT.txt
# Date: 2019-11-08, 22:14:17 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:40 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -945,9 +945,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -1111,6 +1113,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -1232,7 +1235,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -66995,6 +67000,7 @@ A75E 0062
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -67303,6 +67309,7 @@ A7C2 0062
2C72 0041
2C73 0062
2C72 0062
0334 1AC0
1AC0 0334
028D 0021
1AC0 0021
@ -92154,6 +92161,7 @@ A806 003F
A806 0061
A806 0041
A806 0062
0334 A82C
A82C 0334
A82C 0021
A82C 003F
@ -96505,12 +96513,14 @@ A8C4 0062
11938 0041
11935 11930 0062
11938 0062
0334 1193D
1193D 0334
1193D 0021
1193D 003F
1193D 0061
1193D 0041
1193D 0062
0334 1193E
1193E 0334
1193E 0021
1193E 003F
@ -114560,10 +114570,15 @@ A9B2 0061
A9B2 0041
A9B2 0062
A9B4 0021
A9B5 0021
A9B4 003F
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -114604,11 +114619,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
0334 A9C0
A9C0 0334
A9C0 0021

View File

@ -1,6 +1,6 @@
# CollationTest_CLDR_SHIFTED_SHORT.txt
# Date: 2019-11-08, 22:14:19 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-12, 17:50:42 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# UCA Version: 13.0.0
@ -5371,9 +5371,11 @@ FB1E 0334
0652 0334
0334 0653
0653 0334
0334 10EAC
10EAC 0334
0334 0654
0654 0334
0334 10EAB
10EAB 0334
0334 0655
0655 0334
@ -5537,6 +5539,7 @@ A9B3 0334
116B7 0334
0334 1183A
1183A 0334
0334 11943
11943 0334
0334 11D42
11D42 0334
@ -5658,7 +5661,9 @@ A92D 0334
302E 0334
0334 302F
302F 0334
0334 16FF0
16FF0 0334
0334 16FF1
16FF1 0334
0334 20D0
20D0 0334
@ -72709,6 +72714,7 @@ FF37 003F
0057 0323 0334
0057 0334 0323
1E88 0334
0334 1ABF
1ABF 0334
0334 1DF1
1DF1 0334
@ -72928,6 +72934,7 @@ A7C2 0062
1AC0 003F
AB69 0021
AB69 003F
0334 1AC0
1AC0 0334
028D 0061
028D 0041
@ -98376,6 +98383,7 @@ A806 0041
A806 0062
A82C 0021
A82C 003F
0334 A82C
A82C 0334
A82C 0061
A82C 0041
@ -103473,12 +103481,14 @@ A8C4 0062
11938 0062
1193D 0021
1193D 003F
0334 1193D
1193D 0334
1193D 0061
1193D 0041
1193D 0062
1193E 0021
1193E 003F
0334 1193E
1193E 0334
1193E 0061
1193E 0041
@ -122266,9 +122276,14 @@ A9B2 0041
A9B2 0062
A9B4 0021
A9B4 003F
A9B5 0021
A9B5 003F
A9B4 0061
A9B4 0041
A9B5 0061
A9B5 0041
A9B4 0062
A9B5 0062
A9BC 0021
A9BC 003F
A9BC 0061
@ -122309,11 +122324,6 @@ A9BB 003F
A9BB 0061
A9BB 0041
A9BB 0062
A9B5 0021
A9B5 003F
A9B5 0061
A9B5 0041
A9B5 0062
A9C0 0021
A9C0 003F
0334 A9C0

View File

@ -1,6 +1,6 @@
# confusables.txt
# Date: 2019-10-22, 13:05:29 GMT
# © 2019 Unicode®, Inc.
# Date: 2020-02-13, 01:38:49 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -1358,6 +1358,10 @@ FFED ; 25AA ; MA #* ( ■ → ▪ ) HALFWIDTH BLACK SQUARE → BLACK SMALL SQUAR
266A ; 1D158 1D165 1D16E ; MA #* ( ♪ → 𝅘𝅥𝅮 ) EIGHTH NOTE → MUSICAL SYMBOL NOTEHEAD BLACK, MUSICAL SYMBOL COMBINING STEM, MUSICAL SYMBOL COMBINING FLAG-1 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
02D9 ; 0971 ; MA #* ( ˙ → ॱ ) DOT ABOVE → DEVANAGARI SIGN HIGH SPACING DOT #
0D4E ; 0971 ; MA # ( ൎ → ॱ ) MALAYALAM LETTER DOT REPH → DEVANAGARI SIGN HIGH SPACING DOT # →˙→
@ -1418,13 +1422,13 @@ A9C6 ; A9D0 ; MA #* ( ꧆ → ꧐ ) JAVANESE PADA WINDU → JAVANESE DIGIT ZERO
1D7E4 ; 0032 ; MA # ( 𝟤 → 2 ) MATHEMATICAL SANS-SERIF DIGIT TWO → DIGIT TWO #
1D7EE ; 0032 ; MA # ( 𝟮 → 2 ) MATHEMATICAL SANS-SERIF BOLD DIGIT TWO → DIGIT TWO #
1D7F8 ; 0032 ; MA # ( 𝟸 → 2 ) MATHEMATICAL MONOSPACE DIGIT TWO → DIGIT TWO #
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A75A ; 0032 ; MA # ( → 2 ) LATIN CAPITAL LETTER R ROTUNDA → DIGIT TWO #
01A7 ; 0032 ; MA # ( Ƨ → 2 ) LATIN CAPITAL LETTER TONE TWO → DIGIT TWO #
03E8 ; 0032 ; MA # ( Ϩ → 2 ) COPTIC CAPITAL LETTER HORI → DIGIT TWO # →Ƨ→
A644 ; 0032 ; MA # ( → 2 ) CYRILLIC CAPITAL LETTER REVERSED DZE → DIGIT TWO # →Ƨ→
14BF ; 0032 ; MA # ( → 2 ) CANADIAN SYLLABICS SAYISI M → DIGIT TWO #
A6EF ; 0032 ; MA # ( → 2 ) BAMUM LETTER KOGHOM → DIGIT TWO # →Ƨ→
1FBF2 ; 0032 ; MA # ( 🯲 → 2 ) SEGMENTED DIGIT TWO → DIGIT TWO #
A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DIGIT TWO #
06F2 ; 0662 ; MA # ( ۲ → ‎٢‎ ) EXTENDED ARABIC-INDIC DIGIT TWO → ARABIC-INDIC DIGIT TWO #
@ -1491,6 +1495,7 @@ A9CF ; 0662 ; MA # ( ꧏ → ‎٢‎ ) JAVANESE PANGRANGKEP → ARABIC-INDIC DI
1D7E5 ; 0033 ; MA # ( 𝟥 → 3 ) MATHEMATICAL SANS-SERIF DIGIT THREE → DIGIT THREE #
1D7EF ; 0033 ; MA # ( 𝟯 → 3 ) MATHEMATICAL SANS-SERIF BOLD DIGIT THREE → DIGIT THREE #
1D7F9 ; 0033 ; MA # ( 𝟹 → 3 ) MATHEMATICAL MONOSPACE DIGIT THREE → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
A7AB ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER REVERSED OPEN E → DIGIT THREE #
021C ; 0033 ; MA # ( Ȝ → 3 ) LATIN CAPITAL LETTER YOGH → DIGIT THREE # →Ʒ→
01B7 ; 0033 ; MA # ( Ʒ → 3 ) LATIN CAPITAL LETTER EZH → DIGIT THREE #
@ -1500,7 +1505,6 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
04E0 ; 0033 ; MA # ( Ӡ → 3 ) CYRILLIC CAPITAL LETTER ABKHASIAN DZE → DIGIT THREE # →Ʒ→
16F3B ; 0033 ; MA # ( 𖼻 → 3 ) MIAO LETTER ZA → DIGIT THREE # →Ʒ→
118CA ; 0033 ; MA # ( 𑣊 → 3 ) WARANG CITI SMALL LETTER ANG → DIGIT THREE #
1FBF3 ; 0033 ; MA # ( 🯳 → 3 ) SEGMENTED DIGIT THREE → DIGIT THREE #
06F3 ; 0663 ; MA # ( ۳ → ‎٣‎ ) EXTENDED ARABIC-INDIC DIGIT THREE → ARABIC-INDIC DIGIT THREE #
1E8C9 ; 0663 ; MA #* ( ‎𞣉‎ → ‎٣‎ ) MENDE KIKAKUI DIGIT THREE → ARABIC-INDIC DIGIT THREE #
@ -1530,9 +1534,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E6 ; 0034 ; MA # ( 𝟦 → 4 ) MATHEMATICAL SANS-SERIF DIGIT FOUR → DIGIT FOUR #
1D7F0 ; 0034 ; MA # ( 𝟰 → 4 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR → DIGIT FOUR #
1D7FA ; 0034 ; MA # ( 𝟺 → 4 ) MATHEMATICAL MONOSPACE DIGIT FOUR → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
13CE ; 0034 ; MA # ( → 4 ) CHEROKEE LETTER SE → DIGIT FOUR #
118AF ; 0034 ; MA # ( 𑢯 → 4 ) WARANG CITI CAPITAL LETTER UC → DIGIT FOUR #
1FBF4 ; 0034 ; MA # ( 🯴 → 4 ) SEGMENTED DIGIT FOUR → DIGIT FOUR #
06F4 ; 0664 ; MA # ( ۴ → ‎٤‎ ) EXTENDED ARABIC-INDIC DIGIT FOUR → ARABIC-INDIC DIGIT FOUR #
@ -1557,9 +1561,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E7 ; 0035 ; MA # ( 𝟧 → 5 ) MATHEMATICAL SANS-SERIF DIGIT FIVE → DIGIT FIVE #
1D7F1 ; 0035 ; MA # ( 𝟱 → 5 ) MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE → DIGIT FIVE #
1D7FB ; 0035 ; MA # ( 𝟻 → 5 ) MATHEMATICAL MONOSPACE DIGIT FIVE → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
01BC ; 0035 ; MA # ( Ƽ → 5 ) LATIN CAPITAL LETTER TONE FIVE → DIGIT FIVE #
118BB ; 0035 ; MA # ( 𑢻 → 5 ) WARANG CITI CAPITAL LETTER HORR → DIGIT FIVE #
1FBF5 ; 0035 ; MA # ( 🯵 → 5 ) SEGMENTED DIGIT FIVE → DIGIT FIVE #
2464 ; 2784 ; MA #* ( ⑤ → ➄ ) CIRCLED DIGIT FIVE → DINGBAT CIRCLED SANS-SERIF DIGIT FIVE #
@ -1578,11 +1582,11 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E8 ; 0036 ; MA # ( 𝟨 → 6 ) MATHEMATICAL SANS-SERIF DIGIT SIX → DIGIT SIX #
1D7F2 ; 0036 ; MA # ( 𝟲 → 6 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SIX → DIGIT SIX #
1D7FC ; 0036 ; MA # ( 𝟼 → 6 ) MATHEMATICAL MONOSPACE DIGIT SIX → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
2CD2 ; 0036 ; MA # ( → 6 ) COPTIC CAPITAL LETTER OLD COPTIC HEI → DIGIT SIX #
0431 ; 0036 ; MA # ( б → 6 ) CYRILLIC SMALL LETTER BE → DIGIT SIX #
13EE ; 0036 ; MA # ( → 6 ) CHEROKEE LETTER WV → DIGIT SIX #
118D5 ; 0036 ; MA # ( 𑣕 → 6 ) WARANG CITI SMALL LETTER AT → DIGIT SIX #
1FBF6 ; 0036 ; MA # ( 🯶 → 6 ) SEGMENTED DIGIT SIX → DIGIT SIX #
06F6 ; 0666 ; MA # ( ۶ → ‎٦‎ ) EXTENDED ARABIC-INDIC DIGIT SIX → ARABIC-INDIC DIGIT SIX #
@ -1606,9 +1610,9 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7E9 ; 0037 ; MA # ( 𝟩 → 7 ) MATHEMATICAL SANS-SERIF DIGIT SEVEN → DIGIT SEVEN #
1D7F3 ; 0037 ; MA # ( 𝟳 → 7 ) MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN → DIGIT SEVEN #
1D7FD ; 0037 ; MA # ( 𝟽 → 7 ) MATHEMATICAL MONOSPACE DIGIT SEVEN → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
104D2 ; 0037 ; MA # ( 𐓒 → 7 ) OSAGE CAPITAL LETTER ZA → DIGIT SEVEN #
118C6 ; 0037 ; MA # ( 𑣆 → 7 ) WARANG CITI SMALL LETTER II → DIGIT SEVEN #
1FBF7 ; 0037 ; MA # ( 🯷 → 7 ) SEGMENTED DIGIT SEVEN → DIGIT SEVEN #
2466 ; 2786 ; MA #* ( ⑦ → ➆ ) CIRCLED DIGIT SEVEN → DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN #
@ -1631,10 +1635,10 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EA ; 0038 ; MA # ( 𝟪 → 8 ) MATHEMATICAL SANS-SERIF DIGIT EIGHT → DIGIT EIGHT #
1D7F4 ; 0038 ; MA # ( 𝟴 → 8 ) MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT → DIGIT EIGHT #
1D7FE ; 0038 ; MA # ( 𝟾 → 8 ) MATHEMATICAL MONOSPACE DIGIT EIGHT → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0223 ; 0038 ; MA # ( ȣ → 8 ) LATIN SMALL LETTER OU → DIGIT EIGHT #
0222 ; 0038 ; MA # ( Ȣ → 8 ) LATIN CAPITAL LETTER OU → DIGIT EIGHT #
1031A ; 0038 ; MA # ( 𐌚 → 8 ) OLD ITALIC LETTER EF → DIGIT EIGHT #
1FBF8 ; 0038 ; MA # ( 🯸 → 8 ) SEGMENTED DIGIT EIGHT → DIGIT EIGHT #
0AEE ; 096E ; MA # ( ૮ → ८ ) GUJARATI DIGIT EIGHT → DEVANAGARI DIGIT EIGHT #
@ -1659,12 +1663,12 @@ A76A ; 0033 ; MA # ( → 3 ) LATIN CAPITAL LETTER ET → DIGIT THREE #
1D7EB ; 0039 ; MA # ( 𝟫 → 9 ) MATHEMATICAL SANS-SERIF DIGIT NINE → DIGIT NINE #
1D7F5 ; 0039 ; MA # ( 𝟵 → 9 ) MATHEMATICAL SANS-SERIF BOLD DIGIT NINE → DIGIT NINE #
1D7FF ; 0039 ; MA # ( 𝟿 → 9 ) MATHEMATICAL MONOSPACE DIGIT NINE → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
A76E ; 0039 ; MA # ( → 9 ) LATIN CAPITAL LETTER CON → DIGIT NINE #
2CCA ; 0039 ; MA # ( → 9 ) COPTIC CAPITAL LETTER DIALECT-P HORI → DIGIT NINE #
118CC ; 0039 ; MA # ( 𑣌 → 9 ) WARANG CITI SMALL LETTER KO → DIGIT NINE #
118AC ; 0039 ; MA # ( 𑢬 → 9 ) WARANG CITI CAPITAL LETTER KO → DIGIT NINE #
118D6 ; 0039 ; MA # ( 𑣖 → 9 ) WARANG CITI SMALL LETTER AM → DIGIT NINE #
1FBF9 ; 0039 ; MA # ( 🯹 → 9 ) SEGMENTED DIGIT NINE → DIGIT NINE #
0967 ; 0669 ; MA # ( १ → ‎٩‎ ) DEVANAGARI DIGIT ONE → ARABIC-INDIC DIGIT NINE #
118E4 ; 0669 ; MA # ( 𑣤 → ‎٩‎ ) WARANG CITI DIGIT FOUR → ARABIC-INDIC DIGIT NINE #
@ -2544,6 +2548,7 @@ FFE8 ; 006C ; MA #* ( → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL
1D7E3 ; 006C ; MA # ( 𝟣 → l ) MATHEMATICAL SANS-SERIF DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→
1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L #
FF29 ; 006C ; MA # ( → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→
2160 ; 006C ; MA # ( → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→
@ -2601,7 +2606,6 @@ A4F2 ; 006C ; MA # ( → l ) LISU LETTER I → LATIN SMALL LETTER L # →I
16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→
1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→
10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→
1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→
1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L #
216C ; 004C ; MA # ( → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L #
@ -2972,6 +2976,7 @@ FBA6 ; 006F ; MA # ( → o ) ARABIC LETTER HEH GOAL ISOLATED FORM →
1D7E2 ; 004F ; MA # ( 𝟢 → O ) MATHEMATICAL SANS-SERIF DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7EC ; 004F ; MA # ( 𝟬 → O ) MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1D7F6 ; 004F ; MA # ( 𝟶 → O ) MATHEMATICAL MONOSPACE DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
FF2F ; 004F ; MA # ( → O ) FULLWIDTH LATIN CAPITAL LETTER O → LATIN CAPITAL LETTER O # →О→
1D40E ; 004F ; MA # ( 𝐎 → O ) MATHEMATICAL BOLD CAPITAL O → LATIN CAPITAL LETTER O #
1D442 ; 004F ; MA # ( 𝑂 → O ) MATHEMATICAL ITALIC CAPITAL O → LATIN CAPITAL LETTER O #
@ -3005,7 +3010,6 @@ A4F3 ; 004F ; MA # ( → O ) LISU LETTER O → LATIN CAPITAL LETTER O #
102AB ; 004F ; MA # ( 𐊫 → O ) CARIAN LETTER O → LATIN CAPITAL LETTER O #
10404 ; 004F ; MA # ( 𐐄 → O ) DESERET CAPITAL LETTER LONG O → LATIN CAPITAL LETTER O #
10516 ; 004F ; MA # ( 𐔖 → O ) ELBASAN LETTER O → LATIN CAPITAL LETTER O #
1FBF0 ; 004F ; MA # ( 🯰 → O ) SEGMENTED DIGIT ZERO → LATIN CAPITAL LETTER O # →0→
2070 ; 00BA ; MA #* ( ⁰ → º ) SUPERSCRIPT ZERO → MASCULINE ORDINAL INDICATOR #
1D52 ; 00BA ; MA # ( ᵒ → º ) MODIFIER LETTER SMALL O → MASCULINE ORDINAL INDICATOR # →⁰→
@ -8024,8 +8028,6 @@ FA92 ; 6717 ; MA # ( 朗 → 朗 ) CJK COMPATIBILITY IDEOGRAPH-FA92 → CJK UNIF
FA93 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-FA93 → CJK UNIFIED IDEOGRAPH-671B #
2F8D9 ; 671B ; MA # ( 望 → 望 ) CJK COMPATIBILITY IDEOGRAPH-2F8D9 → CJK UNIFIED IDEOGRAPH-671B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
5E50 ; 3B3A ; MA # ( 幐 → 㬺 ) CJK UNIFIED IDEOGRAPH-5E50 → CJK UNIFIED IDEOGRAPH-3B3A #
4420 ; 3B3B ; MA # ( 䐠 → 㬻 ) CJK UNIFIED IDEOGRAPH-4420 → CJK UNIFIED IDEOGRAPH-3B3B #
@ -8831,6 +8833,8 @@ F953 ; 808B ; MA # ( 肋 → 肋 ) CJK COMPATIBILITY IDEOGRAPH-F953 → CJK UNIF
2F984 ; 440B ; MA # ( 䐋 → 䐋 ) CJK COMPATIBILITY IDEOGRAPH-2F984 → CJK UNIFIED IDEOGRAPH-440B #
2F8DA ; 6721 ; MA # ( 朡 → 朡 ) CJK COMPATIBILITY IDEOGRAPH-2F8DA → CJK UNIFIED IDEOGRAPH-6721 #
2F987 ; 267A7 ; MA # ( 𦞧 → 𦞧 ) CJK COMPATIBILITY IDEOGRAPH-2F987 → CJK UNIFIED IDEOGRAPH-267A7 #
2F988 ; 267B5 ; MA # ( 𦞵 → 𦞵 ) CJK COMPATIBILITY IDEOGRAPH-2F988 → CJK UNIFIED IDEOGRAPH-267B5 #
@ -9630,9 +9634,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF
2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 #
24EA ; 1F10D ; MA #* ( ⓪ → 🄍 ) CIRCLED DIGIT ZERO → CIRCLED ZERO WITH SLASH #
21BA ; 1F10E ; MA #* ( ↺ → 🄎 ) ANTICLOCKWISE OPEN CIRCLE ARROW → CIRCLED ANTICLOCKWISE ARROW #
# total: 6311

View File

@ -357,10 +357,15 @@ public class RBBITestMonkey extends TestFmwk {
fMidNumLetSet = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
fMidLetterSet = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
fMidNumSet = new UnicodeSet("[\\p{Word_Break = MidNum}]");
fNumericSet = new UnicodeSet("[[\\p{Word_Break = Numeric}][\\uFF10-\\uff19]]");
fNumericSet = new UnicodeSet("[\\p{Word_Break = Numeric}]");
fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]");
fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]");
// There are some sc=Hani characters with WB=Extend.
// The break rules need to pick one or the other because
// Extend overlapping with something else is messy.
// For Unicode 13, we chose to keep U+16FF0 & U+16FF1
// in $Han (for $dictionary) and out of $Extend.
fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}-[:Hani:]]");
fWSegSpaceSet = new UnicodeSet("[\\p{Word_Break = WSegSpace}]");
fZWJSet = new UnicodeSet("[\\p{Word_Break = ZWJ}]");
fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]");

View File

@ -14,11 +14,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -30,14 +31,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet}];
MidLetter = [\p{Word_Break = MidLetter}];
MidNum = [\p{Word_Break = MidNum}];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -13,11 +13,12 @@
type = word; # one of grapheme | word | line | sentence
locale = en_US_POSIX;
Han = [:Han:];
CR = [\p{Word_Break = CR}];
LF = [\p{Word_Break = LF}];
Newline = [\p{Word_Break = Newline}];
Extend = [\p{Word_Break = Extend}];
Extend = [\p{Word_Break = Extend}-Han];
ZWJ = [\p{Word_Break = ZWJ}];
Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
Format = [\p{Word_Break = Format}];
@ -29,14 +30,13 @@ Double_Quote = [\p{Word_Break = Double_Quote}];
MidNumLet = [\p{Word_Break = MidNumLet} - [.]];
MidLetter = [\p{Word_Break = MidLetter} - [\:]];
MidNum = [\p{Word_Break = MidNum} [.]];
Numeric = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]]; # Patch for ICU-12079;
Numeric = [\p{Word_Break = Numeric}];
ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
WSegSpace = [\p{Word_Break = WSegSpace}];
Extended_Pict = [:ExtPict:];
#define dictionary, with the effect being that those characters don't appear in test data.
Han = [:Han:];
Hiragana = [:Hiragana:];
Control = [\p{Grapheme_Cluster_Break = Control}];

View File

@ -10,7 +10,7 @@
package com.ibm.icu.dev.test.util;
public class DebugUtilitiesData extends Object {
public static final String ICU4C_VERSION="66.0.1";
public static final String ICU4C_VERSION="66.1";
public static final int UDebugEnumType = 0;
public static final int UCalendarDateFields = 1;
public static final int UCalendarMonths = 2;

View File

@ -14,24 +14,24 @@ h3.doc { text-decoration: underline }
<body style="background-color: rgb(255, 255, 255);" lang="EN-US"
link="#0000ff" vlink="#800080">
<h1>International Components for Unicode for Java (ICU4J)</h1>
<h2>Read Me for ICU4J 66.0.1</h2>
(Last Update: 2019-Nov-27)
<h2>Read Me for ICU4J 66.1</h2>
(Last Update: 2020-Feb-12)
<hr size="2" width="100%">
<p>
<!-- <b>Note:</b> This is major release of ICU4J. It contains bug fixes and adds implementations
of inherited API and introduces new API or functionality. -->
<b>Note:</b> This is a preview release of ICU4J 66.
<!-- <b>Note:</b> This is a preview release of ICU4J 66.
The contents of this document may not reflect the recent changes done
for ICU 66 development. It is not recommended for production use.
for ICU 66 development. It is not recommended for production use. -->
<!-- <b>Note:</b> This is a development milestone of ICU4J 66.
The contents of this document may not reflect the recent changes done
for ICU 66 development. It is not recommended for production use. -->
<!--<b>Note:</b> This is a release candidate of ICU4J 66.
<b>Note:</b> This is a release candidate of ICU4J 66.
The contents of this document may not reflect the recent changes done
for ICU 66 development. This release candidate is intended for those
wishing to verify ICU 66 integration before final release. It is not
recommended for production use. -->
recommended for production use.
</p>
<p>For the most recent release, see the <a
href="http://www.icu-project.org/download/"> ICU4J
@ -186,19 +186,19 @@ builds, and they are packaged in jar files for convenient download.
&lt;dependency&gt;
&lt;groupId&gt;com.ibm.icu&lt;/groupId&gt;
&lt;artifactId&gt;icu4j&lt;/artifactId&gt;
&lt;version&gt;66.0.1&lt;/version&gt;
&lt;version&gt;66.1&lt;/version&gt;
&lt;/dependency&gt;
&lt;dependency&gt;
&lt;groupId&gt;com.ibm.icu&lt;/groupId&gt;
&lt;artifactId&gt;icu4j-charset&lt;/artifactId&gt;
&lt;version&gt;66.0.1&lt;/version&gt;
&lt;version&gt;66.1&lt;/version&gt;
&lt;/dependency&gt;
&lt;dependency&gt;
&lt;groupId&gt;com.ibm.icu&lt;/groupId&gt;
&lt;artifactId&gt;icu4j-localespi&lt;/artifactId&gt;
&lt;version&gt;66.0.1&lt;/version&gt;
&lt;version&gt;66.1&lt;/version&gt;
&lt;/dependency&gt;
</pre>
</ul>
@ -879,7 +879,7 @@ ICU4J data is built by ICU4C tools. Please see "icu4j-readme.txt" in icu4c/sourc
<I> Note: This procedure assumes that all 3 sources are present</I>
<ol>
<li>Checkout or download CLDR version 'release-36'</li>
<li>Checkout ICU with tag 'release-66-0-1'</li>
<li>Checkout ICU with tag 'release-66-1'</li>
<li>cd to icu4c/source/data directory</li>
<li>Follow the instructions in icu4c/source/data/cldr-icu-readme.txt</li>
<li>Rebuild ICU4C with the newly generated data.</li>