Update UCD to Revision 26

Include WordBreakTest.html, since a test uses sample strings from it,
albeit without actually reading the file.

Had to comment out more of the new tests, as at Revision 24, pending
an update to harfbuzz and the text boundary detection code.

Task-number: QTBUG-79631
Task-number: QTBUG-79418
Task-number: QTBUG-82747
Change-Id: I0082294b09d67ffdc6a9b5c15acf77ad3b86f65f
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
This commit is contained in:
Edward Welbourne 2020-03-13 17:26:53 +01:00
parent 7ddbd179a1
commit 54f8be6cc0
31 changed files with 11032 additions and 8380 deletions

View File

@ -192,6 +192,7 @@ QT_BEGIN_NAMESPACE
\value Unicode_11_0 Version 11.0 Since Qt 5.15
\value Unicode_12_0 Version 12.0 Since Qt 5.15
\value Unicode_12_1 Version 12.1 Since Qt 5.15
\value Unicode_13_0 Version 13.0 Since Qt 5.15
\value Unicode_Unassigned The value is not assigned to any character
in version 8.0 of Unicode.
@ -314,12 +315,14 @@ QT_BEGIN_NAMESPACE
\value Script_Chakma
\value Script_Cham
\value Script_Cherokee
\value Script_Chorasmian Since Qt 5.15
\value Script_Coptic
\value Script_Cuneiform
\value Script_Cypriot
\value Script_Cyrillic
\value Script_Deseret
\value Script_Devanagari
\value Script_DivesAkuru Since Qt 5.15
\value Script_Dogra Since Qt 5.15
\value Script_Duployan Since Qt 5.5
\value Script_EgyptianHieroglyphs
@ -350,6 +353,7 @@ QT_BEGIN_NAMESPACE
\value Script_Katakana
\value Script_KayahLi
\value Script_Kharoshthi
\value Script_KhitanSmallScript Since Qt 5.15
\value Script_Khmer
\value Script_Khojki Since Qt 5.5
\value Script_Khudawadi Since Qt 5.5
@ -439,6 +443,7 @@ QT_BEGIN_NAMESPACE
\value Script_Vai
\value Script_Wancho Since Qt 5.15
\value Script_WarangCiti Since Qt 5.5
\value Script_Yezidi Since Qt 5.15
\value Script_Yi
\value Script_ZanabazarSquare Since Qt 5.11

View File

@ -341,6 +341,12 @@ public:
Script_NyiakengPuachueHmong,
Script_Wancho,
// Unicode 13.0 additions
Script_Chorasmian,
Script_DivesAkuru,
Script_KhitanSmallScript,
Script_Yezidi,
ScriptCount
};
@ -437,7 +443,8 @@ public:
Unicode_10_0,
Unicode_11_0,
Unicode_12_0,
Unicode_12_1
Unicode_12_1,
Unicode_13_0
};
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO

View File

@ -10,8 +10,9 @@
"Description": "The Unicode Character Database (UCD) is a set of files that
define the Unicode character properties and internal mappings.",
"Homepage": "https://www.unicode.org/ucd/",
"Version": "Don't use the Unicode standard version; UCD has its own 'Revision' numbers",
"Version": "24",
"Version": "Don't use the Unicode standard version;
UCD has its own 'Revision' numbers, see the 'UAX #44, UCD' page",
"Version": "26",
"License": "Unicode License Agreement - Data Files and Software (2016)",
"LicenseId": "Unicode-DFS-2016",
"LicenseFile": "UNICODE_LICENSE.txt",

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2019 The Qt Company Ltd.
** Copyright (C) 2020 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -37,7 +37,7 @@
**
****************************************************************************/
/* This file is autogenerated from the Unicode 12.1 database. Do not edit */
/* This file is autogenerated from the Unicode 13.0 database. Do not edit */
//
// W A R N I N G
@ -59,7 +59,7 @@
QT_BEGIN_NAMESPACE
#define UNICODE_DATA_VERSION QChar::Unicode_12_1
#define UNICODE_DATA_VERSION QChar::Unicode_13_0
namespace QUnicodeTables {

View File

@ -230,6 +230,12 @@ static const hb_script_t _qtscript_to_hbscript[] = {
hb_script_t(HB_TAG('N', 'a', 'n', 'd')), // Script_Nandinagari
hb_script_t(HB_TAG('H', 'm', 'n', 'p')), // Script_NyiakengPuachueHmong
hb_script_t(HB_TAG('W', 'c', 'h', 'o')), // Script_Wancho
// Unicode 13.0 additions (as above)
hb_script_t(HB_TAG('C', 'h', 'o', 'r')), // Script_Chorasmian
hb_script_t(HB_TAG('D', 'i', 'v', 'e')), // Script_DivesAkuru
hb_script_t(HB_TAG('K', 'h', 'i', 't')), // Script_KhitanSmallScript
hb_script_t(HB_TAG('Y', 'e', 'z', 'i')), // Script_Yezidi
};
Q_STATIC_ASSERT(QChar::ScriptCount == sizeof(_qtscript_to_hbscript) / sizeof(_qtscript_to_hbscript[0]));

View File

@ -273,7 +273,11 @@ static const char specialLanguages[][6] = {
"", // Elymaic
"", // Nandinagari
"", // NyiakengPuachueHmong
"" // Wancho
"", // Wancho
"", // Chorasmian
"", // DivesAkuru
"", // KhitanSmallScript
"" // Yezidi
};
Q_STATIC_ASSERT(sizeof specialLanguages / sizeof *specialLanguages == QChar::ScriptCount);

View File

@ -1,5 +1,5 @@
# GraphemeBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# GraphemeBreakTest-13.0.0.txt
# Date: 2019-11-15, 19:49:10 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View File

@ -1,5 +1,5 @@
# GraphemeBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# GraphemeBreakTest-13.0.0.txt
# Date: 2019-11-15, 19:49:10 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View File

@ -1,4 +1,4 @@
Temporary kludge at UCD Revision 24 until code can be fixed up.
Temporary kludge at UCD Revision 24--26 until code can be fixed up.
57 of the tests defined by the UCD data are here commented out.
57+53 of the tests defined by the UCD data are here commented out.
The raw upstream files are provided as *.txt.full where this was needed.

View File

@ -1,5 +1,5 @@
# SentenceBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
# SentenceBreakTest-13.0.0.txt
# Date: 2019-11-20, 22:27:22 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View File

@ -0,0 +1,241 @@
<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>
<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>
<title>Word Break Chart</title>
<style type='text/css'>
td, th { vertical-align: top }
</style></head>
<body bgcolor='#FFFFFF'>
<h2>Word_Break Chart</h2>
<p><b>Unicode Version:</b> 13.0.0</p>
<p><b>Date:</b> 2019-11-20, 22:27:23 GMT</p>
<p>This page illustrates the application of the Word_Break specification. The material here is informative, not normative.</p> <p>The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.</p><p>Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.</p>
<p>After the heavy blue line in the table are additional rows, either with different sample characters or for sequences, such as “ALetter MidLetter”. Some column headers may be composed, reflecting “treat as” or “ignore” rules.</p>
<p>If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of ExtendNumLet and ALetter shows ×, with the rule 13.2. Checking below the table, rule 13.2 is “ExtendNumLet × (AHLetter | Numeric | Katakana)”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.</p>
<h3><a href='#table' name='table'>Table</a></h3>
<table border='1' cellspacing='0' width='100%'><tr><th width='4%'></th><th width='4%' class='lbclass' title='U+0001 <START OF HEADING>, gc=Cc, sc=Zyyy'>Other</th><th width='4%' class='lbclass' title='U+000D <CARRIAGE RETURN (CR)>, gc=Cc, sc=Zyyy'>CR</th><th width='4%' class='lbclass' title='U+000A <LINE FEED (LF)>, gc=Cc, sc=Zyyy'>LF</th><th width='4%' class='lbclass' title='U+000B <LINE TABULATION>, gc=Cc, sc=Zyyy'>Newline</th><th width='4%' class='lbclass' title='U+3031 VERTICAL KANA REPEAT MARK, gc=Lm, sc=Zyyy'>Katakana</th><th width='4%' class='lbclass' title='U+0041 LATIN CAPITAL LETTER A, gc=Lu, sc=Latn'>ALetter</th><th width='4%' class='lbclass' title='U+003A COLON, gc=Po, sc=Zyyy'>MidLetter</th><th width='4%' class='lbclass' title='U+002C COMMA, gc=Po, sc=Zyyy'>MidNum</th><th width='4%' class='lbclass' title='U+002E FULL STOP, gc=Po, sc=Zyyy'>MidNumLet</th><th width='4%' class='lbclass' title='U+0030 DIGIT ZERO, gc=Nd, sc=Zyyy'>Numeric</th><th width='4%' class='lbclass' title='U+005F LOW LINE, gc=Pc, sc=Zyyy'>ExtendNumLet</th><th width='4%' class='lbclass' title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A, gc=So, sc=Zyyy'>RI</th><th width='4%' class='lbclass' title='U+05D0 HEBREW LETTER ALEF, gc=Lo, sc=Hebr'>Hebrew_Letter</th><th width='4%' class='lbclass' title='U+0022 QUOTATION MARK, gc=Po, sc=Zyyy'>Double_Quote</th><th width='4%' class='lbclass' title='U+0027 APOSTROPHE, gc=Po, sc=Zyyy'>Single_Quote</th><th width='4%' class='lbclass' title='U+231A WATCH, gc=So, sc=Zyyy'>ExtPict</th><th width='4%' class='lbclass' title='U+0020 SPACE, gc=Zs, sc=Zyyy'>WSegSpace</th><th width='4%' class='lbclass' title='U+00AD SOFT HYPHEN, gc=Cf, sc=Zyyy'>Format_FE</th><th width='4%' class='lbclass' title='U+0300 COMBINING GRAVE ACCENT, gc=Mn, sc=Zinh'>Extend_FE</th><th width='4%' class='lbclass' title='U+200D ZERO WIDTH JOINER, gc=Cf, sc=Zinh'>ZWJ_FE</th></tr>
<tr><th class='lbclass' title='U+0001 <START OF HEADING>'>Other</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+000D <CARRIAGE RETURN (CR)>'>CR</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th></tr>
<tr><th class='lbclass' title='U+000A <LINE FEED (LF)>'>LF</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th></tr>
<tr><th class='lbclass' title='U+000B <LINE TABULATION>'>Newline</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th><th title='3.1' class='pairItem'>÷</th></tr>
<tr><th class='lbclass' title='U+3031 VERTICAL KANA REPEAT MARK'>Katakana</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='13.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='13.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0041 LATIN CAPITAL LETTER A'>ALetter</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='5.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='9.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='13.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='5.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+003A COLON'>MidLetter</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+002C COMMA'>MidNum</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+002E FULL STOP'>MidNumLet</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0030 DIGIT ZERO'>Numeric</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='10.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='8.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='13.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='10.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+005F LOW LINE'>ExtendNumLet</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='13.2' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='13.2' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='13.2' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='13.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='13.2' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A'>RI</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='15.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+05D0 HEBREW LETTER ALEF'>Hebrew_Letter</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='5.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='9.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='13.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='5.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='7.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0022 QUOTATION MARK'>Double_Quote</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0027 APOSTROPHE'>Single_Quote</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+231A WATCH'>ExtPict</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0020 SPACE'>WSegSpace</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='3.4' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+00AD SOFT HYPHEN'>Format_FE</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0300 COMBINING GRAVE ACCENT'>Extend_FE</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+200D ZERO WIDTH JOINER'>ZWJ_FE</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='3.3' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><td bgcolor='#0000FF' colSpan='21' style='font-size: 1px'>&nbsp;</td></tr>
<tr><th class='lbclass' title='U+0061 LATIN SMALL LETTER A, U+2060 WORD JOINER'>ALetter Format_FE</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='5.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='9.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='13.1' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='5.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0061 LATIN SMALL LETTER A, U+003A COLON'>ALetter MidLetter</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='7.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='7.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0061 LATIN SMALL LETTER A, U+0027 APOSTROPHE'>ALetter Single_Quote</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='7.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='7.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0061 LATIN SMALL LETTER A, U+0027 APOSTROPHE, U+2060 WORD JOINER'>ALetter Single_Quote Format_FE</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='7.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='7.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0061 LATIN SMALL LETTER A, U+002C COMMA'>ALetter MidNum</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0031 DIGIT ONE, U+003A COLON'>Numeric MidLetter</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0031 DIGIT ONE, U+0027 APOSTROPHE'>Numeric Single_Quote</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='11.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0031 DIGIT ONE, U+002C COMMA'>Numeric MidNum</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='11.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
<tr><th class='lbclass' title='U+0031 DIGIT ONE, U+002E FULL STOP, U+2060 WORD JOINER'>Numeric MidNumLet Format_FE</th><th title='999.0' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='3.2' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='11.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='999.0' class='pairItem'>÷</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th><th title='4.0' bgcolor='#CCCCFF' class='pairItem'>×</th></tr>
</table>
<h3><a href='#rules' name='rules'>Rules</a></h3>
<p>This section shows the rules. They are mechanically modified for programmatic generation of the tables and test code, and thus do not match the UAX rules precisely. In particular:</p><ol><li>The rules are cast into a form that is more like regular expressions.</li><li>The rules “sot ÷”, “÷ eot”, and “÷ Any” are added mechanically, and have artificial numbers.</li><li>The rules are given decimal numbers using tenths, and are written without prefix. For example, rule WB13a is given the number 13.1.</li><li>Any “treat as” or “ignore” rules are handled as discussed in UAX #29, and thus reflected in a transformation of the rules usually not visible here. In addition, final rules like “Any ÷ Any” may be recast as the equivalent expression “÷ Any”.</li><li>In some cases, the numbering and form of a rule is changed due to “treat as” rules.</li></ol><p>For the original rules and the macro values they use, see UAX #29.</p>
<table>
<tr><th style='text-align:right'><a href='#r0.2' name='r0.2'>0.2</a></th><td style='text-align:right'>sot </td><td>÷</td><td></td></tr>
<tr><th style='text-align:right'><a href='#r0.3' name='r0.3'>0.3</a></th><td style='text-align:right'></td><td>÷</td><td> eot</td></tr>
<tr><th style='text-align:right'><a href='#r3.0' name='r3.0'>3.0</a></th><td style='text-align:right'>CR </td><td>×</td><td> LF</td></tr>
<tr><th style='text-align:right'><a href='#r3.1' name='r3.1'>3.1</a></th><td style='text-align:right'>(Newline | CR | LF) </td><td>÷</td><td></td></tr>
<tr><th style='text-align:right'><a href='#r3.2' name='r3.2'>3.2</a></th><td style='text-align:right'></td><td>÷</td><td> (Newline | CR | LF)</td></tr>
<tr><th style='text-align:right'><a href='#r3.3' name='r3.3'>3.3</a></th><td style='text-align:right'>ZWJ </td><td>×</td><td> ExtPict</td></tr>
<tr><th style='text-align:right'><a href='#r3.4' name='r3.4'>3.4</a></th><td style='text-align:right'>WSegSpace </td><td>×</td><td> WSegSpace</td></tr>
<tr><th style='text-align:right'><a href='#r4.0' name='r4.0'>4.0</a></th><td style='text-align:right'>[^ Newline CR LF ] </td><td>×</td><td> [Format Extend ZWJ]</td></tr>
<tr><th style='text-align:right'><a href='#r5.0' name='r5.0'>5.0</a></th><td style='text-align:right'>AHLetter </td><td>×</td><td> AHLetter</td></tr>
<tr><th style='text-align:right'><a href='#r6.0' name='r6.0'>6.0</a></th><td style='text-align:right'>AHLetter </td><td>×</td><td> (MidLetter | MidNumLetQ) AHLetter</td></tr>
<tr><th style='text-align:right'><a href='#r7.0' name='r7.0'>7.0</a></th><td style='text-align:right'>AHLetter (MidLetter | MidNumLetQ) </td><td>×</td><td> AHLetter</td></tr>
<tr><th style='text-align:right'><a href='#r7.1' name='r7.1'>7.1</a></th><td style='text-align:right'>Hebrew_Letter </td><td>×</td><td> Single_Quote</td></tr>
<tr><th style='text-align:right'><a href='#r7.2' name='r7.2'>7.2</a></th><td style='text-align:right'>Hebrew_Letter </td><td>×</td><td> Double_Quote Hebrew_Letter</td></tr>
<tr><th style='text-align:right'><a href='#r7.3' name='r7.3'>7.3</a></th><td style='text-align:right'>Hebrew_Letter Double_Quote </td><td>×</td><td> Hebrew_Letter</td></tr>
<tr><th style='text-align:right'><a href='#r8.0' name='r8.0'>8.0</a></th><td style='text-align:right'>Numeric </td><td>×</td><td> Numeric</td></tr>
<tr><th style='text-align:right'><a href='#r9.0' name='r9.0'>9.0</a></th><td style='text-align:right'>AHLetter </td><td>×</td><td> Numeric</td></tr>
<tr><th style='text-align:right'><a href='#r10.0' name='r10.0'>10.0</a></th><td style='text-align:right'>Numeric </td><td>×</td><td> AHLetter</td></tr>
<tr><th style='text-align:right'><a href='#r11.0' name='r11.0'>11.0</a></th><td style='text-align:right'>Numeric (MidNum | MidNumLetQ) </td><td>×</td><td> Numeric</td></tr>
<tr><th style='text-align:right'><a href='#r12.0' name='r12.0'>12.0</a></th><td style='text-align:right'>Numeric </td><td>×</td><td> (MidNum | MidNumLetQ) Numeric</td></tr>
<tr><th style='text-align:right'><a href='#r13.0' name='r13.0'>13.0</a></th><td style='text-align:right'>Katakana </td><td>×</td><td> Katakana</td></tr>
<tr><th style='text-align:right'><a href='#r13.1' name='r13.1'>13.1</a></th><td style='text-align:right'>(AHLetter | Numeric | Katakana | ExtendNumLet) </td><td>×</td><td> ExtendNumLet</td></tr>
<tr><th style='text-align:right'><a href='#r13.2' name='r13.2'>13.2</a></th><td style='text-align:right'>ExtendNumLet </td><td>×</td><td> (AHLetter | Numeric | Katakana)</td></tr>
<tr><th style='text-align:right'><a href='#r15.0' name='r15.0'>15.0</a></th><td style='text-align:right'>^ (RI RI)* RI </td><td>×</td><td> RI</td></tr>
<tr><th style='text-align:right'><a href='#r16.0' name='r16.0'>16.0</a></th><td style='text-align:right'>[^RI] (RI RI)* RI </td><td>×</td><td> RI</td></tr>
<tr><th style='text-align:right'><a href='#r999.0' name='r999.0'>999.0</a></th><td style='text-align:right'></td><td>÷</td><td> Any</td></tr>
</table>
<h3><a href='#samples' name='samples'>Sample Strings</a></h3>
<p>The following samples illustrate the application of the rules. The blue lines indicate possible break points. If your browser supports titles (tooltips), then positioning the mouse over each character will show its name, while positioning between characters shows the number of the rule responsible for the break-status.</p>
<table>
<tr><th style='text-align:right'><a href='#s1' name='s1'>1</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+000D &lt;CARRIAGE RETURN (CR)&gt; (CR)'>&#x25A1;</span><span title='3.0'><span>&nbsp;</span>&nbsp;</span><span title='U+000A &lt;LINE FEED (LF)&gt; (LF)'>&#x25A1;</span><span title='3.1'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='3.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+000A &lt;LINE FEED (LF)&gt; (LF)'>&#x25A1;</span><span title='3.1'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0308 COMBINING DIAERESIS (Extend_FE)'>&#x25CC;&#x308;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s2' name='s2'>2</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0308 COMBINING DIAERESIS (Extend_FE)'>&#x25CC;&#x308;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s3' name='s3'>3</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0020 SPACE (WSegSpace)'> </span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0646 ARABIC LETTER NOON (ALetter)'>&#x646;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s4' name='s4'>4</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0646 ARABIC LETTER NOON (ALetter)'>&#x646;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0020 SPACE (WSegSpace)'> </span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s5' name='s5'>5</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='5.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='5.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s6' name='s6'>6</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='6.0'><span>&nbsp;</span>&nbsp;</span><span title='U+003A COLON (MidLetter)'>:</span><span title='7.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s7' name='s7'>7</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+003A COLON (MidLetter)'>:</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+003A COLON (MidLetter)'>:</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s8' name='s8'>8</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+05D0 HEBREW LETTER ALEF (Hebrew_Letter)'>&#x5D0;</span><span title='7.1'><span>&nbsp;</span>&nbsp;</span><span title='U+0027 APOSTROPHE (Single_Quote)'>'</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s9' name='s9'>9</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+05D0 HEBREW LETTER ALEF (Hebrew_Letter)'>&#x5D0;</span><span title='7.2'><span>&nbsp;</span>&nbsp;</span><span title='U+0022 QUOTATION MARK (Double_Quote)'>&quot;</span><span title='7.3'><span>&nbsp;</span>&nbsp;</span><span title='U+05D0 HEBREW LETTER ALEF (Hebrew_Letter)'>&#x5D0;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s10' name='s10'>10</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='9.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='8.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='10.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s11' name='s11'>11</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='12.0'><span>&nbsp;</span>&nbsp;</span><span title='U+002C COMMA (MidNum)'>,</span><span title='11.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s12' name='s12'>12</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+002C COMMA (MidNum)'>,</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+002C COMMA (MidNum)'>,</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s13' name='s13'>13</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+3031 VERTICAL KANA REPEAT MARK (Katakana)'>&#x3031;</span><span title='13.0'><span>&nbsp;</span>&nbsp;</span><span title='U+3031 VERTICAL KANA REPEAT MARK (Katakana)'>&#x3031;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s14' name='s14'>14</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='13.1'><span>&nbsp;</span>&nbsp;</span><span title='U+005F LOW LINE (ExtendNumLet)'>_</span><span title='13.2'><span>&nbsp;</span>&nbsp;</span><span title='U+0030 DIGIT ZERO (Numeric)'>0</span><span title='13.1'><span>&nbsp;</span>&nbsp;</span><span title='U+005F LOW LINE (ExtendNumLet)'>_</span><span title='13.2'><span>&nbsp;</span>&nbsp;</span><span title='U+3031 VERTICAL KANA REPEAT MARK (Katakana)'>&#x3031;</span><span title='13.1'><span>&nbsp;</span>&nbsp;</span><span title='U+005F LOW LINE (ExtendNumLet)'>_</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s15' name='s15'>15</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='13.1'><span>&nbsp;</span>&nbsp;</span><span title='U+005F LOW LINE (ExtendNumLet)'>_</span><span title='13.1'><span>&nbsp;</span>&nbsp;</span><span title='U+005F LOW LINE (ExtendNumLet)'>_</span><span title='13.2'><span>&nbsp;</span>&nbsp;</span><span title='U+0041 LATIN CAPITAL LETTER A (ALetter)'>A</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s16' name='s16'>16</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A (RI)'>&#x1F1E6;</span><span title='15.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F1E7 REGIONAL INDICATOR SYMBOL LETTER B (RI)'>&#x1F1E7;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E8 REGIONAL INDICATOR SYMBOL LETTER C (RI)'>&#x1F1E8;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s17' name='s17'>17</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A (RI)'>&#x1F1E6;</span><span title='16.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F1E7 REGIONAL INDICATOR SYMBOL LETTER B (RI)'>&#x1F1E7;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E8 REGIONAL INDICATOR SYMBOL LETTER C (RI)'>&#x1F1E8;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s18' name='s18'>18</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A (RI)'>&#x1F1E6;</span><span title='16.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F1E7 REGIONAL INDICATOR SYMBOL LETTER B (RI)'>&#x1F1E7;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E8 REGIONAL INDICATOR SYMBOL LETTER C (RI)'>&#x1F1E8;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s19' name='s19'>19</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A (RI)'>&#x1F1E6;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='16.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F1E7 REGIONAL INDICATOR SYMBOL LETTER B (RI)'>&#x1F1E7;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E8 REGIONAL INDICATOR SYMBOL LETTER C (RI)'>&#x1F1E8;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s20' name='s20'>20</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E6 REGIONAL INDICATOR SYMBOL LETTER A (RI)'>&#x1F1E6;</span><span title='16.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F1E7 REGIONAL INDICATOR SYMBOL LETTER B (RI)'>&#x1F1E7;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F1E8 REGIONAL INDICATOR SYMBOL LETTER C (RI)'>&#x1F1E8;</span><span title='16.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F1E9 REGIONAL INDICATOR SYMBOL LETTER D (RI)'>&#x1F1E9;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s21' name='s21'>21</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F476 BABY (ExtPict)'>&#x1F476;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F3FF EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE)'>&#x1F3FF;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F476 BABY (ExtPict)'>&#x1F476;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s22' name='s22'>22</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s23' name='s23'>23</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s24' name='s24'>24</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+2701 UPPER BLADE SCISSORS (Other)'>&#x2701;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+2701 UPPER BLADE SCISSORS (Other)'>&#x2701;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s25' name='s25'>25</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+2701 UPPER BLADE SCISSORS (Other)'>&#x2701;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s26' name='s26'>26</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F476 BABY (ExtPict)'>&#x1F476;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F3FF EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE)'>&#x1F3FF;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0308 COMBINING DIAERESIS (Extend_FE)'>&#x25CC;&#x308;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+1F476 BABY (ExtPict)'>&#x1F476;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F3FF EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE)'>&#x1F3FF;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s27' name='s27'>27</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F3FF EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE)'>&#x1F3FF;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s28' name='s28'>28</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+1F3FF EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE)'>&#x1F3FF;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s29' name='s29'>29</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s30' name='s30'>30</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='3.3'><span>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s31' name='s31'>31</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+1F6D1 OCTAGONAL SIGN (ExtPict)'>&#x1F6D1;</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s32' name='s32'>32</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0308 COMBINING DIAERESIS (Extend_FE)'>&#x25CC;&#x308;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+200D ZERO WIDTH JOINER (ZWJ_FE)'>&#x25A1;</span><span title='4.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0308 COMBINING DIAERESIS (Extend_FE)'>&#x25CC;&#x308;</span><span title='5.0'><span>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
<tr><th style='text-align:right'><a href='#s33' name='s33'>33</a></th><td><font size='5'>
<span title='0.2'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0061 LATIN SMALL LETTER A (ALetter)'>a</span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0020 SPACE (WSegSpace)'> </span><span title='3.4'><span>&nbsp;</span>&nbsp;</span><span title='U+0020 SPACE (WSegSpace)'> </span><span title='999.0'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span><span title='U+0062 LATIN SMALL LETTER B (ALetter)'>b</span><span title='0.3'><span style='border-right: 1px solid blue'>&nbsp;</span>&nbsp;</span>
</font></td></tr>
</table>
<hr width='50%'>
<div align='center'>
<center>
<table cellspacing='0' cellpadding='0' border='0'>
<tr>
<td><a href='http://www.unicode.org/unicode/copyright.html'>
<img src='http://www.unicode.org/img/hb_notice.gif' border='0' alt='Access to Copyright and terms of use' width='216' height='50'></a></td>
</tr>
</table>
<script language='Javascript' type='text/javascript' src='http://www.unicode.org/webscripts/lastModified.js'>
</script>
</center>
</div>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

View File

@ -1,5 +1,5 @@
# WordBreakTest-12.1.0.txt
# Date: 2019-03-10, 10:53:29 GMT
# WordBreakTest-13.0.0.txt
# Date: 2019-11-20, 22:27:23 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

View File

@ -32,11 +32,13 @@ To update:
cases QChar may need additions to some of its enums.
* Build with the modified code, fix any compilation issues, make check
in suitable directories, including tst_QTextBoundaryFinder.
* That may have updated qtbase/src/corelib/text/qunicodetables.cpp;
if so the update matters; be sure to commit the changes to data/ at
the same time and update text/qt_attribution.json to match; use the
UCD Revision number, rather than the Unicode standard number, as the
Version, for all that qunicodetables.cpp uses the latter.
* That may have updated qtbase/src/corelib/text/qunicodetables.cpp; if
so the update matters; be sure to commit the changes to data/ at the
same time and update text/qt_attribution.json to match; use the UCD
Revision number, rather than the Unicode standard number, as the
Version, for all that qunicodetables.cpp uses the latter (see the
'UAX #44, UCD' page linked from https://www.unicode.org/ucd/ for the
table with this).
* If there are enum additions in qchar.h (public API), be sure to also
update the documentation in qchar.cpp for each affected enum,
respecting the existing ordering.

View File

@ -1,6 +1,6 @@
# ArabicShaping-12.1.0.txt
# Date: 2019-03-08, 23:59:00 GMT [KW, RP]
# © 2019 Unicode®, Inc.
# ArabicShaping-13.0.0.txt
# Date: 2020-01-31, 23:55:00 GMT [KW, RP]
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -8,24 +8,22 @@
# Unicode Character Database.
#
# This file defines the Joining_Type and Joining_Group property
# values for Arabic, Syriac, N'Ko, Mandaic, Manichaean,
# Hanifi Rohingya, and Sogdian positional
# values for Arabic, Syriac, N'Ko, Mandaic, and Manichaean positional
# shaping, repeating in machine readable form the information
# exemplified in Tables 9-3, 9-8, 9-9, 9-10, 9-14, 9-15, 9-16, 9-19,
# 9-20, 10-4, 10-5, 10-6, 10-7, 14-10, 16-16, and 19-5 of The Unicode Standard core
# 9-20, 10-4, 10-5, 10-6, 10-7, and 19-5 of The Unicode Standard core
# specification. This file also defines Joining_Type values for
# Mongolian, Phags-pa, Psalter Pahlavi, and Adlam positional shaping,
# Mongolian, Phags-pa, Psalter Pahlavi, Sogdian, Chorasmian, and Adlam positional shaping,
# and Joining_Type and Joining_Group values for Hanifi Rohingya positional shaping,
# which are not listed in tables in the standard.
#
# See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.4, 14.3, 14.10, 16.13, 19.4, and 19.9
# See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.4, 14.3, 14.10, 16.14, 19.4, and 19.9
# of The Unicode Standard core specification for more information.
#
# Each line contains four fields, separated by a semicolon.
#
# Field 0: the code point, in 4-digit hexadecimal
# form, of an Arabic, Syriac, N'Ko, Mandaic, Mongolian,
# Phags-pa, Manichaean, Psalter Pahlavi, Hanifi Rohingya, Sogdian,
# or other character.
# form, of a character.
#
# Field 1: gives a short schematic name for that character.
# The schematic name is descriptive of the shape, based as
@ -81,7 +79,7 @@
# joining group values will be defined only if an explicit proposal
# to define those values exactly has been approved by the UTC. This
# is the convention exemplified by the N'Ko, Mandaic, Mongolian,
# Phags-pa, Psalter Pahlavi, and Sogdian scripts.
# Phags-pa, Psalter Pahlavi, Sogdian, Chorasmian, and Adlam scripts.
# Only the Arabic, Manichaean, and Syriac scripts currently have
# explicit joining group values defined for all characters, including
# those which have only a single character in a particular Joining_Group
@ -416,9 +414,9 @@
0853; MANDAIC AR; D; No_Joining_Group
0854; MANDAIC ASH; R; No_Joining_Group
0855; MANDAIC AT; D; No_Joining_Group
0856; MANDAIC DUSHENNA; U; No_Joining_Group
0857; MANDAIC KAD; U; No_Joining_Group
0858; MANDAIC AIN; U; No_Joining_Group
0856; MANDAIC DUSHENNA; R; No_Joining_Group
0857; MANDAIC KAD; R; No_Joining_Group
0858; MANDAIC AIN; R; No_Joining_Group
# Syriac Supplement Characters
@ -465,6 +463,16 @@
08BB; AFRICAN FEH; D; AFRICAN FEH
08BC; AFRICAN QAF; D; AFRICAN QAF
08BD; AFRICAN NOON; D; AFRICAN NOON
08BE; DOTLESS BEH WITH 3 DOTS BELOW AND V ABOVE; D; BEH
08BF; DOTLESS BEH WITH 2 DOTS AND V ABOVE; D; BEH
08C0; DOTLESS BEH WITH TAH AND V ABOVE; D; BEH
08C1; HAH WITH 3 DOTS BELOW AND V ABOVE; D; HAH
08C2; KEHEH WITH V ABOVE; D; GAF
08C3; AIN WITH DIAMOND 4 DOTS ABOVE; D; AIN
08C4; AFRICAN QAF WITH 3 DOTS ABOVE; D; AFRICAN QAF
08C5; HAH WITH DOT BELOW AND 3 DOTS ABOVE; D; HAH
08C6; HAH WITH DIAMOND 4 DOTS BELOW; D; HAH
08C7; LAM WITH TAH ABOVE; D; LAM
08E2; ARABIC DISPUTED END OF AYAH; U; No_Joining_Group
# Mongolian Characters
@ -811,6 +819,37 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
10F53; SOGDIAN TWENTY; D; No_Joining_Group
10F54; SOGDIAN ONE HUNDRED; R; No_Joining_Group
# Chorasmian Characters
10FB0; CHORASMIAN ALEPH; D; No_Joining_Group
10FB1; CHORASMIAN SMALL ALEPH; U; No_Joining_Group
10FB2; CHORASMIAN BETH; D; No_Joining_Group
10FB3; CHORASMIAN GIMEL; D; No_Joining_Group
10FB4; CHORASMIAN DALETH; R; No_Joining_Group
10FB5; CHORASMIAN HE; R; No_Joining_Group
10FB6; CHORASMIAN WAW; R; No_Joining_Group
10FB7; CHORASMIAN CURLED WAW; U; No_Joining_Group
10FB8; CHORASMIAN ZAYIN; D; No_Joining_Group
10FB9; CHORASMIAN HETH; R; No_Joining_Group
10FBA; CHORASMIAN YODH; R; No_Joining_Group
10FBB; CHORASMIAN KAPH; D; No_Joining_Group
10FBC; CHORASMIAN LAMEDH; D; No_Joining_Group
10FBD; CHORASMIAN MEM; R; No_Joining_Group
10FBE; CHORASMIAN NUN; D; No_Joining_Group
10FBF; CHORASMIAN SAMEKH; D; No_Joining_Group
10FC0; CHORASMIAN AYIN; U; No_Joining_Group
10FC1; CHORASMIAN PE; D; No_Joining_Group
10FC2; CHORASMIAN RESH; R; No_Joining_Group
10FC3; CHORASMIAN SHIN; R; No_Joining_Group
10FC4; CHORASMIAN TAW; D; No_Joining_Group
10FC5; CHORASMIAN ONE; U; No_Joining_Group
10FC6; CHORASMIAN TWO; U; No_Joining_Group
10FC7; CHORASMIAN THREE; U; No_Joining_Group
10FC8; CHORASMIAN FOUR; U; No_Joining_Group
10FC9; CHORASMIAN TEN; R; No_Joining_Group
10FCA; CHORASMIAN TWENTY; D; No_Joining_Group
10FCB; CHORASMIAN ONE HUNDRED; L; No_Joining_Group
# Kaithi Number Signs
# These are prepended concatenation marks, comparable
# to the number signs in the Arabic script.

View File

@ -1,5 +1,5 @@
# BidiMirroring-12.1.0.txt
# Date: 2019-03-08, 23:59:00 GMT [KW, LI, RP]
# BidiMirroring-13.0.0.txt
# Date: 2019-09-09, 19:34:00 GMT [KW, LI, RP]
# © 2019 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -15,7 +15,7 @@
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
# The repertoire covered by the file is Unicode 12.1.0.
# The repertoire covered by the file is Unicode 13.0.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.

View File

@ -1,5 +1,5 @@
# Blocks-12.1.0.txt
# Date: 2019-03-08, 23:59:00 GMT [KW]
# Blocks-13.0.0.txt
# Date: 2019-07-10, 19:06:00 GMT [KW]
# © 2019 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -237,8 +237,10 @@ FFF0..FFFF; Specials
10C80..10CFF; Old Hungarian
10D00..10D3F; Hanifi Rohingya
10E60..10E7F; Rumi Numeral Symbols
10E80..10EBF; Yezidi
10F00..10F2F; Old Sogdian
10F30..10F6F; Sogdian
10FB0..10FDF; Chorasmian
10FE0..10FFF; Elymaic
11000..1107F; Brahmi
11080..110CF; Kaithi
@ -260,6 +262,7 @@ FFF0..FFFF; Specials
11700..1173F; Ahom
11800..1184F; Dogra
118A0..118FF; Warang Citi
11900..1195F; Dives Akuru
119A0..119FF; Nandinagari
11A00..11A4F; Zanabazar Square
11A50..11AAF; Soyombo
@ -269,6 +272,7 @@ FFF0..FFFF; Specials
11D00..11D5F; Masaram Gondi
11D60..11DAF; Gunjala Gondi
11EE0..11EFF; Makasar
11FB0..11FBF; Lisu Supplement
11FC0..11FFF; Tamil Supplement
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
@ -285,6 +289,8 @@ FFF0..FFFF; Specials
16FE0..16FFF; Ideographic Symbols and Punctuation
17000..187FF; Tangut
18800..18AFF; Tangut Components
18B00..18CFF; Khitan Small Script
18D00..18D8F; Tangut Supplement
1B000..1B0FF; Kana Supplement
1B100..1B12F; Kana Extended-A
1B130..1B16F; Small Kana Extension
@ -322,12 +328,14 @@ FFF0..FFFF; Specials
1F900..1F9FF; Supplemental Symbols and Pictographs
1FA00..1FA6F; Chess Symbols
1FA70..1FAFF; Symbols and Pictographs Extended-A
1FB00..1FBFF; Symbols for Legacy Computing
20000..2A6DF; CJK Unified Ideographs Extension B
2A700..2B73F; CJK Unified Ideographs Extension C
2B740..2B81F; CJK Unified Ideographs Extension D
2B820..2CEAF; CJK Unified Ideographs Extension E
2CEB0..2EBEF; CJK Unified Ideographs Extension F
2F800..2FA1F; CJK Compatibility Ideographs Supplement
30000..3134F; CJK Unified Ideographs Extension G
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement
F0000..FFFFF; Supplementary Private Use Area-A

View File

@ -1,5 +1,5 @@
# CaseFolding-12.1.0.txt
# Date: 2019-03-10, 10:53:00 GMT
# CaseFolding-13.0.0.txt
# Date: 2019-09-08, 23:30:59 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -1234,6 +1234,9 @@ A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
AB70; C; 13A0; # CHEROKEE SMALL LETTER A
AB71; C; 13A1; # CHEROKEE SMALL LETTER E
AB72; C; 13A2; # CHEROKEE SMALL LETTER I

View File

@ -1,5 +1,5 @@
# DerivedAge-12.1.0.txt
# Date: 2019-04-01, 09:10:08 GMT
# DerivedAge-13.0.0.txt
# Date: 2019-09-08, 23:30:59 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -1794,4 +1794,72 @@ AB66..AB67 ; 12.0 # [2] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..
# Total code points: 1
# ================================================
# Age=V13_0
# Newly assigned in Unicode 13.0.0 (March, 2020)
08BE..08C7 ; 13.0 # [10] ARABIC LETTER PEH WITH SMALL V..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
0B55 ; 13.0 # ORIYA SIGN OVERLINE
0D04 ; 13.0 # MALAYALAM LETTER VEDIC ANUSVARA
0D81 ; 13.0 # SINHALA SIGN CANDRABINDU
1ABF..1AC0 ; 13.0 # [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
2B97 ; 13.0 # SYMBOL FOR TYPE A ELECTRONICS
2E50..2E52 ; 13.0 # [3] CROSS PATTY WITH RIGHT CROSSBAR..TIRONIAN SIGN CAPITAL ET
31BB..31BF ; 13.0 # [5] BOPOMOFO FINAL LETTER G..BOPOMOFO LETTER AH
4DB6..4DBF ; 13.0 # [10] CJK UNIFIED IDEOGRAPH-4DB6..CJK UNIFIED IDEOGRAPH-4DBF
9FF0..9FFC ; 13.0 # [13] CJK UNIFIED IDEOGRAPH-9FF0..CJK UNIFIED IDEOGRAPH-9FFC
A7C7..A7CA ; 13.0 # [4] LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F5..A7F6 ; 13.0 # [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
A82C ; 13.0 # SYLOTI NAGRI SIGN ALTERNATE HASANTA
AB68..AB6B ; 13.0 # [4] LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE..MODIFIER LETTER RIGHT TACK
1019C ; 13.0 # ASCIA SYMBOL
10E80..10EA9 ; 13.0 # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAD ; 13.0 # [3] YEZIDI COMBINING HAMZA MARK..YEZIDI HYPHENATION MARK
10EB0..10EB1 ; 13.0 # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10FB0..10FCB ; 13.0 # [28] CHORASMIAN LETTER ALEPH..CHORASMIAN NUMBER ONE HUNDRED
11147 ; 13.0 # CHAKMA LETTER VAA
111CE..111CF ; 13.0 # [2] SHARADA VOWEL SIGN PRISHTHAMATRA E..SHARADA SIGN INVERTED CANDRABINDU
1145A ; 13.0 # NEWA DOUBLE COMMA
11460..11461 ; 13.0 # [2] NEWA SIGN JIHVAMULIYA..NEWA SIGN UPADHMANIYA
11900..11906 ; 13.0 # [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E
11909 ; 13.0 # DIVES AKURU LETTER O
1190C..11913 ; 13.0 # [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916 ; 13.0 # [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..11935 ; 13.0 # [30] DIVES AKURU LETTER DDA..DIVES AKURU VOWEL SIGN E
11937..11938 ; 13.0 # [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193B..11946 ; 13.0 # [12] DIVES AKURU SIGN ANUSVARA..DIVES AKURU END OF TEXT MARK
11950..11959 ; 13.0 # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
11FB0 ; 13.0 # LISU LETTER YHA
16FE4 ; 13.0 # KHITAN SMALL SCRIPT FILLER
16FF0..16FF1 ; 13.0 # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
18AF3..18CD5 ; 13.0 # [483] TANGUT COMPONENT-756..KHITAN SMALL SCRIPT CHARACTER-18CD5
18D00..18D08 ; 13.0 # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
1F10D..1F10F ; 13.0 # [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH
1F16D..1F16F ; 13.0 # [3] CIRCLED CC..CIRCLED HUMAN FIGURE
1F1AD ; 13.0 # MASK WORK SYMBOL
1F6D6..1F6D7 ; 13.0 # [2] HUT..ELEVATOR
1F6FB..1F6FC ; 13.0 # [2] PICKUP TRUCK..ROLLER SKATE
1F8B0..1F8B1 ; 13.0 # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F90C ; 13.0 # PINCHED FINGERS
1F972 ; 13.0 # SMILING FACE WITH TEAR
1F977..1F978 ; 13.0 # [2] NINJA..DISGUISED FACE
1F9A3..1F9A4 ; 13.0 # [2] MAMMOTH..DODO
1F9AB..1F9AD ; 13.0 # [3] BEAVER..SEAL
1F9CB ; 13.0 # BUBBLE TEA
1FA74 ; 13.0 # THONG SANDAL
1FA83..1FA86 ; 13.0 # [4] BOOMERANG..NESTING DOLLS
1FA96..1FAA8 ; 13.0 # [19] MILITARY HELMET..ROCK
1FAB0..1FAB6 ; 13.0 # [7] FLY..FEATHER
1FAC0..1FAC2 ; 13.0 # [3] ANATOMICAL HEART..PEOPLE HUGGING
1FAD0..1FAD6 ; 13.0 # [7] BLUEBERRIES..TEAPOT
1FB00..1FB92 ; 13.0 # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; 13.0 # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
1FBF0..1FBF9 ; 13.0 # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
2A6D7..2A6DD ; 13.0 # [7] CJK UNIFIED IDEOGRAPH-2A6D7..CJK UNIFIED IDEOGRAPH-2A6DD
30000..3134A ; 13.0 # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 5930
# EOF

View File

@ -1,5 +1,5 @@
# DerivedNormalizationProps-12.1.0.txt
# Date: 2019-04-01, 09:10:23 GMT
# DerivedNormalizationProps-13.0.0.txt
# Date: 2019-09-08, 23:31:08 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -986,11 +986,12 @@ FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET
114BB..114BC ; NFD_QC; N # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
114BE ; NFD_QC; N # Mc TIRHUTA VOWEL SIGN AU
115BA..115BB ; NFD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU
11938 ; NFD_QC; N # Mc DIVES AKURU VOWEL SIGN O
1D15E..1D164 ; NFD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB..1D1C0 ; NFD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
2F800..2FA1D ; NFD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 13232
# Total code points: 13233
# ================================================
@ -1128,8 +1129,9 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET
114BA ; NFC_QC; M # Mn TIRHUTA VOWEL SIGN SHORT E
114BD ; NFC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O
115AF ; NFC_QC; M # Mc SIDDHAM VOWEL SIGN AA
11930 ; NFC_QC; M # Mc DIVES AKURU VOWEL SIGN AA
# Total code points: 110
# Total code points: 111
# ================================================
@ -1469,6 +1471,7 @@ A69C..A69D ; NFKD_QC; N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFI
A770 ; NFKD_QC; N # Lm MODIFIER LETTER US
A7F8..A7F9 ; NFKD_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
AB5C..AB5F ; NFKD_QC; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB69 ; NFKD_QC; N # Lm MODIFIER LETTER SMALL TURNED W
AC00..D7A3 ; NFKD_QC; N # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
F900..FA0D ; NFKD_QC; N # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
FA10 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA10
@ -1598,6 +1601,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI
114BB..114BC ; NFKD_QC; N # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
114BE ; NFKD_QC; N # Mc TIRHUTA VOWEL SIGN AU
115BA..115BB ; NFKD_QC; N # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU
11938 ; NFKD_QC; N # Mc DIVES AKURU VOWEL SIGN O
1D15E..1D164 ; NFKD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB..1D1C0 ; NFKD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
1D400..1D454 ; NFKD_QC; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
@ -1683,9 +1687,10 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI
1F210..1F23B ; NFKD_QC; N # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; NFKD_QC; N # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; NFKD_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 16896
# Total code points: 16908
# ================================================
@ -1880,6 +1885,7 @@ A69C..A69D ; NFKC_QC; N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFI
A770 ; NFKC_QC; N # Lm MODIFIER LETTER US
A7F8..A7F9 ; NFKC_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
AB5C..AB5F ; NFKC_QC; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB69 ; NFKC_QC; N # Lm MODIFIER LETTER SMALL TURNED W
F900..FA0D ; NFKC_QC; N # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
FA10 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA10
FA12 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA12
@ -2085,9 +2091,10 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI
1F210..1F23B ; NFKC_QC; N # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; NFKC_QC; N # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; NFKC_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 4796
# Total code points: 4807
# ================================================
@ -2135,8 +2142,9 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI
114BA ; NFKC_QC; M # Mn TIRHUTA VOWEL SIGN SHORT E
114BD ; NFKC_QC; M # Mc TIRHUTA VOWEL SIGN SHORT O
115AF ; NFKC_QC; M # Mc SIDDHAM VOWEL SIGN AA
11930 ; NFKC_QC; M # Mc DIVES AKURU VOWEL SIGN AA
# Total code points: 110
# Total code points: 111
# ================================================
@ -2366,10 +2374,11 @@ FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBRE
114BB..114BC ; Expands_On_NFD # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
114BE ; Expands_On_NFD # Mc TIRHUTA VOWEL SIGN AU
115BA..115BB ; Expands_On_NFD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU
11938 ; Expands_On_NFD # Mc DIVES AKURU VOWEL SIGN O
1D15E..1D164 ; Expands_On_NFD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB..1D1C0 ; Expands_On_NFD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
# Total code points: 12215
# Total code points: 12216
# ================================================
@ -2719,6 +2728,7 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON
114BB..114BC ; Expands_On_NFKD # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
114BE ; Expands_On_NFKD # Mc TIRHUTA VOWEL SIGN AU
115BA..115BB ; Expands_On_NFKD # Mc [2] SIDDHAM VOWEL SIGN O..SIDDHAM VOWEL SIGN AU
11938 ; Expands_On_NFKD # Mc DIVES AKURU VOWEL SIGN O
1D15E..1D164 ; Expands_On_NFKD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D1BB..1D1C0 ; Expands_On_NFKD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
1F100..1F10A ; Expands_On_NFKD # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
@ -2731,7 +2741,7 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON
1F213 ; Expands_On_NFKD # So SQUARED KATAKANA DE
1F240..1F248 ; Expands_On_NFKD # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
# Total code points: 13389
# Total code points: 13390
# ================================================
@ -5333,12 +5343,16 @@ A7C2 ; NFKC_CF; A7C3 # L& LATIN CAPITAL LETTER ANGLICAN
A7C4 ; NFKC_CF; A794 # L& LATIN CAPITAL LETTER C WITH PALATAL HOOK
A7C5 ; NFKC_CF; 0282 # L& LATIN CAPITAL LETTER S WITH HOOK
A7C6 ; NFKC_CF; 1D8E # L& LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C7 ; NFKC_CF; A7C8 # L& LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9 ; NFKC_CF; A7CA # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7F5 ; NFKC_CF; A7F6 # L& LATIN CAPITAL LETTER REVERSED HALF H
A7F8 ; NFKC_CF; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE
A7F9 ; NFKC_CF; 0153 # Lm MODIFIER LETTER SMALL LIGATURE OE
AB5C ; NFKC_CF; A727 # Lm MODIFIER LETTER SMALL HENG
AB5D ; NFKC_CF; AB37 # Lm MODIFIER LETTER SMALL L WITH INVERTED LAZY S
AB5E ; NFKC_CF; 026B # Lm MODIFIER LETTER SMALL L WITH MIDDLE TILDE
AB5F ; NFKC_CF; AB52 # Lm MODIFIER LETTER SMALL U WITH LEFT HOOK
AB69 ; NFKC_CF; 028D # Lm MODIFIER LETTER SMALL TURNED W
AB70 ; NFKC_CF; 13A0 # L& CHEROKEE SMALL LETTER A
AB71 ; NFKC_CF; 13A1 # L& CHEROKEE SMALL LETTER E
AB72 ; NFKC_CF; 13A2 # L& CHEROKEE SMALL LETTER I
@ -8262,6 +8276,16 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF
1F248 ; NFKC_CF; 3014 6557 3015 # So TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250 ; NFKC_CF; 5F97 # So CIRCLED IDEOGRAPH ADVANTAGE
1F251 ; NFKC_CF; 53EF # So CIRCLED IDEOGRAPH ACCEPT
1FBF0 ; NFKC_CF; 0030 # Nd SEGMENTED DIGIT ZERO
1FBF1 ; NFKC_CF; 0031 # Nd SEGMENTED DIGIT ONE
1FBF2 ; NFKC_CF; 0032 # Nd SEGMENTED DIGIT TWO
1FBF3 ; NFKC_CF; 0033 # Nd SEGMENTED DIGIT THREE
1FBF4 ; NFKC_CF; 0034 # Nd SEGMENTED DIGIT FOUR
1FBF5 ; NFKC_CF; 0035 # Nd SEGMENTED DIGIT FIVE
1FBF6 ; NFKC_CF; 0036 # Nd SEGMENTED DIGIT SIX
1FBF7 ; NFKC_CF; 0037 # Nd SEGMENTED DIGIT SEVEN
1FBF8 ; NFKC_CF; 0038 # Nd SEGMENTED DIGIT EIGHT
1FBF9 ; NFKC_CF; 0039 # Nd SEGMENTED DIGIT NINE
2F800 ; NFKC_CF; 4E3D # Lo CJK COMPATIBILITY IDEOGRAPH-2F800
2F801 ; NFKC_CF; 4E38 # Lo CJK COMPATIBILITY IDEOGRAPH-2F801
2F802 ; NFKC_CF; 4E41 # Lo CJK COMPATIBILITY IDEOGRAPH-2F802
@ -8802,7 +8826,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] <reserved-E0080>..<reserved-E
E0100..E01EF ; NFKC_CF; # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
# Total code points: 10315
# Total code points: 10329
# ================================================
@ -9567,9 +9591,12 @@ A7BA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER GLO
A7BC ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL I
A7BE ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL U
A7C2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER ANGLICANA W
A7C4..A7C6 ; Changes_When_NFKC_Casefolded # L& [3] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C4..A7C7 ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7F5 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H
A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
AB5C..AB5F ; Changes_When_NFKC_Casefolded # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB69 ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER SMALL TURNED W
AB70..ABBF ; Changes_When_NFKC_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
F900..FA0D ; Changes_When_NFKC_Casefolded # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
FA10 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA10
@ -9787,6 +9814,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] <reserved-FFF0>..<reserv
1F210..1F23B ; Changes_When_NFKC_Casefolded # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; Changes_When_NFKC_Casefolded # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Changes_When_NFKC_Casefolded # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1FBF0..1FBF9 ; Changes_When_NFKC_Casefolded # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
2F800..2FA1D ; Changes_When_NFKC_Casefolded # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
E0000 ; Changes_When_NFKC_Casefolded # Cn <reserved-E0000>
E0001 ; Changes_When_NFKC_Casefolded # Cf LANGUAGE TAG
@ -9796,6 +9824,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] <reserved-E0080>..<reser
E0100..E01EF ; Changes_When_NFKC_Casefolded # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF ; Changes_When_NFKC_Casefolded # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
# Total code points: 10315
# Total code points: 10329
# EOF

View File

@ -1,5 +1,5 @@
# GraphemeBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# GraphemeBreakProperty-13.0.0.txt
# Date: 2019-10-21, 14:30:35 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -26,11 +26,13 @@
110BD ; Prepend # Cf KAITHI NUMBER SIGN
110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE
111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN
11941 ; Prepend # Lo DIVES AKURU INITIAL RA
11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
11D46 ; Prepend # Lo MASARAM GONDI REPHA
# Total code points: 22
# Total code points: 24
# ================================================
@ -139,7 +141,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0B3F ; Extend # Mn ORIYA VOWEL SIGN I
0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
@ -169,6 +171,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
0DCF ; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
@ -229,6 +232,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; Extend # Mn BALINESE SIGN REREKAN
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
@ -275,6 +279,7 @@ A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY
@ -315,6 +320,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
@ -328,6 +334,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
@ -368,6 +375,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
11930 ; Extend # Mc DIVES AKURU VOWEL SIGN AA
1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193E ; Extend # Mn DIVES AKURU VIRAMA
11943 ; Extend # Mn DIVES AKURU SIGN NUKTA
119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA
@ -399,6 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
@ -426,7 +438,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1970
# Total code points: 1984
# ================================================
@ -539,6 +551,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11182 ; SpacingMark # Mc SHARADA SIGN VISARGA
111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
111CE ; SpacingMark # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA
@ -570,6 +583,11 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
11838 ; SpacingMark # Mc DOGRA SIGN VISARGA
11931..11935 ; SpacingMark # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E
11937..11938 ; SpacingMark # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193D ; SpacingMark # Mc DIVES AKURU SIGN HALANTA
11940 ; SpacingMark # Mc DIVES AKURU MEDIAL YA
11942 ; SpacingMark # Mc DIVES AKURU MEDIAL RA
119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119DC..119DF ; SpacingMark # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
119E4 ; SpacingMark # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
@ -586,10 +604,11 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA
11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
# Total code points: 375
# Total code points: 388
# ================================================

View File

@ -1,6 +1,6 @@
# LineBreak-12.1.0.txt
# Date: 2019-03-31, 22:04:15 GMT [KW, LI]
# © 2019 Unicode®, Inc.
# LineBreak-13.0.0.txt
# Date: 2020-02-17, 07:43:02 GMT [KW, LI]
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -32,9 +32,10 @@
# outside of allocated blocks, default to "ID":
# Plane 2: U+20000..U+2FFFD
# Plane 3: U+30000..U+3FFFD
# - All unassigned code points in the following Plane 1 range, whether
# - All unassigned code points in the following Plane 1 ranges, whether
# inside or outside of allocated blocks, also default to "ID":
# Plane 1 range: U+1F000..U+1FFFD
# Plane 1 range: U+1F000..U+1FAFF
# Plane 1 range: U+1FC00..U+1FFFD
# - The unassigned code points in the following block default to "PR":
# Currency Symbols: U+20A0..U+20CF
#
@ -277,7 +278,7 @@
085E;AL # Po MANDAIC PUNCTUATION
0860..086A;AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4;AL # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD;AL # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08B6..08C7;AL # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
08D3..08E1;CM # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
08E2;AL # Cf ARABIC DISPUTED END OF AYAH
08E3..08FF;CM # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
@ -396,7 +397,7 @@
0B47..0B48;CM # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C;CM # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D;CM # Mn ORIYA SIGN VIRAMA
0B56;CM # Mn ORIYA AI LENGTH MARK
0B55..0B56;CM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57;CM # Mc ORIYA AU LENGTH MARK
0B5C..0B5D;AL # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61;AL # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
@ -475,7 +476,7 @@
0CF1..0CF2;AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D00..0D01;CM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03;CM # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C;AL # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D04..0D0C;AL # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
0D0E..0D10;AL # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A;AL # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3B..0D3C;CM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
@ -496,6 +497,7 @@
0D70..0D78;AL # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
0D79;PO # So MALAYALAM DATE MARK
0D7A..0D7F;AL # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
0D81;CM # Mn SINHALA SIGN CANDRABINDU
0D82..0D83;CM # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0D85..0D96;AL # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1;AL # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
@ -764,6 +766,7 @@
1AA8..1AAD;SA # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
1AB0..1ABD;CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE;CM # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0;CM # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1B00..1B03;CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04;CM # Mc BALINESE SIGN BISAH
1B05..1B33;AL # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
@ -1303,7 +1306,7 @@
2B55..2B59;AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE
2B5A..2B73;AL # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95;AL # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BFF;AL # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
2B97..2BFF;AL # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
2C00..2C2E;AL # Lu [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E;AL # Ll [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C60..2C7B;AL # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E
@ -1385,6 +1388,8 @@
2E4C;BA # Po MEDIEVAL COMMA
2E4D;AL # Po PARAGRAPHUS MARK
2E4E..2E4F;BA # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER
2E50..2E51;AL # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
2E52;AL # Po TIRONIAN SIGN CAPITAL ET
2E80..2E99;ID # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3;ID # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5;ID # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
@ -1488,7 +1493,7 @@
3190..3191;ID # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
3192..3195;ID # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3196..319F;ID # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31A0..31BA;ID # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31A0..31BF;ID # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31C0..31E3;ID # So [36] CJK STROKE T..CJK STROKE Q
31F0..31FF;CJ # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200..321E;ID # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
@ -1503,11 +1508,10 @@
32B1..32BF;ID # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
32C0..32FF;ID # So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA
3300..33FF;ID # So [256] SQUARE APAATO..SQUARE GAL
3400..4DB5;ID # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4DB6..4DBF;ID # Cn [10] <reserved-4DB6>..<reserved-4DBF>
3400..4DBF;ID # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4DC0..4DFF;AL # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
4E00..9FEF;ID # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
9FF0..9FFF;ID # Cn [16] <reserved-9FF0>..<reserved-9FFF>
4E00..9FFC;ID # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
9FFD..9FFF;ID # Cn [3] <reserved-9FFD>..<reserved-9FFF>
A000..A014;ID # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015;NS # Lm YI SYLLABLE WU
A016..A48C;ID # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
@ -1550,7 +1554,8 @@ A789..A78A;AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUA
A78B..A78E;AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F;AL # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7BF;AL # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
A7C2..A7C6;AL # L& [5] LATIN CAPITAL LETTER ANGLICANA W..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C2..A7CA;AL # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F5..A7F6;AL # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
A7F7;AL # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9;AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA;AL # Ll LATIN LETTER SMALL CAPITAL TURNED M
@ -1566,6 +1571,7 @@ A823..A824;CM # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIG
A825..A826;CM # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827;CM # Mc SYLOTI NAGRI VOWEL SIGN OO
A828..A82B;AL # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
A82C;CM # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A830..A835;AL # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
A836..A837;AL # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A838;PO # Sc NORTH INDIC RUPEE MARK
@ -1670,7 +1676,9 @@ AB28..AB2E;AL # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
AB30..AB5A;AL # Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5B;AL # Sk MODIFIER BREVE WITH INVERTED BREVE
AB5C..AB5F;AL # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB67;AL # Ll [8] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB60..AB68;AL # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
AB69;AL # Lm MODIFIER LETTER SMALL TURNED W
AB6A..AB6B;AL # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
AB70..ABBF;AL # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
ABC0..ABE2;AL # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
ABE3..ABE4;CM # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
@ -2642,7 +2650,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
10179..10189;AL # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A..1018B;AL # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
1018C..1018E;AL # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
10190..1019B;AL # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
10190..1019C;AL # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
101A0;AL # So GREEK SYMBOL TAU RHO
101D0..101FC;AL # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
101FD;CM # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
@ -2746,6 +2754,10 @@ FFFD;AI # So REPLACEMENT CHARACTER
10D24..10D27;CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10D30..10D39;NU # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
10E60..10E7E;AL # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
10E80..10EA9;AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAC;CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD;BA # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1;AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F00..10F1C;AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26;AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27;AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@ -2753,6 +2765,8 @@ FFFD;AI # So REPLACEMENT CHARACTER
10F46..10F50;CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F51..10F54;AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
10F55..10F59;AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
10FB0..10FC4;AL # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
10FC5..10FCB;AL # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
10FE0..10FF6;AL # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
11000;CM # Mc BRAHMI SIGN CANDRABINDU
11001;CM # Mn BRAHMI SIGN ANUSVARA
@ -2786,6 +2800,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
11140..11143;BA # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK
11144;AL # Lo CHAKMA LETTER LHAA
11145..11146;CM # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
11147;AL # Lo CHAKMA LETTER VAA
11150..11172;AL # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
11173;CM # Mn MAHAJANI SIGN NUKTA
11174;AL # Po MAHAJANI ABBREVIATION SIGN
@ -2803,6 +2818,8 @@ FFFD;AI # So REPLACEMENT CHARACTER
111C8;BA # Po SHARADA SEPARATOR
111C9..111CC;CM # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CD;AL # Po SHARADA SUTRA MARK
111CE;CM # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
111CF;CM # Mn SHARADA SIGN INVERTED CANDRABINDU
111D0..111D9;NU # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
111DA;AL # Lo SHARADA EKAM
111DB;BB # Po SHARADA SIGN SIDDHAM
@ -2865,10 +2882,10 @@ FFFD;AI # So REPLACEMENT CHARACTER
1144B..1144E;BA # Po [4] NEWA DANDA..NEWA GAP FILLER
1144F;AL # Po NEWA ABBREVIATION SIGN
11450..11459;NU # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
1145B;BA # Po NEWA PLACEHOLDER MARK
1145A..1145B;BA # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
1145D;AL # Po NEWA INSERTION SIGN
1145E;CM # Mn NEWA SANDHI MARK
1145F;AL # Lo NEWA LETTER VEDIC ANUSVARA
1145F..11461;AL # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
11480..114AF;AL # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114B0..114B2;CM # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
114B3..114B8;CM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
@ -2938,6 +2955,23 @@ FFFD;AI # So REPLACEMENT CHARACTER
118E0..118E9;NU # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
118EA..118F2;AL # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
118FF;AL # Lo WARANG CITI OM
11900..11906;AL # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E
11909;AL # Lo DIVES AKURU LETTER O
1190C..11913;AL # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916;AL # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..1192F;AL # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
11930..11935;CM # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
11937..11938;CM # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193B..1193C;CM # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193D;CM # Mc DIVES AKURU SIGN HALANTA
1193E;CM # Mn DIVES AKURU VIRAMA
1193F;AL # Lo DIVES AKURU PREFIXED NASAL SIGN
11940;CM # Mc DIVES AKURU MEDIAL YA
11941;AL # Lo DIVES AKURU INITIAL RA
11942;CM # Mc DIVES AKURU MEDIAL RA
11943;CM # Mn DIVES AKURU SIGN NUKTA
11944..11946;BA # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
11950..11959;NU # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
119A0..119A7;AL # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
119AA..119D0;AL # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
119D1..119D3;CM # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
@ -3021,6 +3055,7 @@ FFFD;AI # So REPLACEMENT CHARACTER
11EF3..11EF4;CM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
11EF5..11EF6;CM # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
11EF7..11EF8;AL # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
11FB0;AL # Lo LISU LETTER YHA
11FC0..11FD4;AL # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
11FD5..11FDC;AL # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
11FDD..11FE0;PO # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN
@ -3083,8 +3118,12 @@ FFFD;AI # So REPLACEMENT CHARACTER
16FE0..16FE1;NS # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
16FE2;NS # Po OLD CHINESE HOOK MARK
16FE3;NS # Lm OLD CHINESE ITERATION MARK
16FE4;GL # Mn KHITAN SMALL SCRIPT FILLER
16FF0..16FF1;CM # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
17000..187F7;ID # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2;ID # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
18800..18AFF;ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768
18B00..18CD5;AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
18D00..18D08;ID # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
1B000..1B0FF;ID # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
1B100..1B11E;ID # Lo [31] HENTAIGANA LETTER RE-3..HENTAIGANA LETTER N-MU-MO-2
1B150..1B152;CJ # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
@ -3250,14 +3289,15 @@ FFFD;AI # So REPLACEMENT CHARACTER
1F0D1..1F0F5;ID # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F0F6..1F0FF;ID # Cn [10] <reserved-1F0F6>..<reserved-1F0FF>
1F100..1F10C;AI # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F10D..1F10F;ID # Cn [3] <reserved-1F10D>..<reserved-1F10F>
1F10D..1F10F;ID # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH
1F110..1F12D;AI # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD
1F12E..1F12F;AL # So [2] CIRCLED WZ..COPYLEFT SYMBOL
1F130..1F169;AI # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F16A..1F16C;AL # So [3] RAISED MC SIGN..RAISED MR SIGN
1F16D..1F16F;ID # Cn [3] <reserved-1F16D>..<reserved-1F16F>
1F16D..1F16F;ID # So [3] CIRCLED CC..CIRCLED HUMAN FIGURE
1F170..1F1AC;AI # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F1AD..1F1E5;ID # Cn [57] <reserved-1F1AD>..<reserved-1F1E5>
1F1AD;ID # So MASK WORK SYMBOL
1F1AE..1F1E5;ID # Cn [56] <reserved-1F1AE>..<reserved-1F1E5>
1F1E6..1F1FF;RI # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F200..1F202;ID # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
1F203..1F20F;ID # Cn [13] <reserved-1F203>..<reserved-1F20F>
@ -3348,12 +3388,12 @@ FFFD;AI # So REPLACEMENT CHARACTER
1F6C0;EB # So BATH
1F6C1..1F6CB;ID # So [11] BATHTUB..COUCH AND LAMP
1F6CC;EB # So SLEEPING ACCOMMODATION
1F6CD..1F6D5;ID # So [9] SHOPPING BAGS..HINDU TEMPLE
1F6D6..1F6DF;ID # Cn [10] <reserved-1F6D6>..<reserved-1F6DF>
1F6CD..1F6D7;ID # So [11] SHOPPING BAGS..ELEVATOR
1F6D8..1F6DF;ID # Cn [8] <reserved-1F6D8>..<reserved-1F6DF>
1F6E0..1F6EC;ID # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6ED..1F6EF;ID # Cn [3] <reserved-1F6ED>..<reserved-1F6EF>
1F6F0..1F6FA;ID # So [11] SATELLITE..AUTO RICKSHAW
1F6FB..1F6FF;ID # Cn [5] <reserved-1F6FB>..<reserved-1F6FF>
1F6F0..1F6FC;ID # So [13] SATELLITE..ROLLER SKATE
1F6FD..1F6FF;ID # Cn [3] <reserved-1F6FD>..<reserved-1F6FF>
1F700..1F773;AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F774..1F77F;ID # Cn [12] <reserved-1F774>..<reserved-1F77F>
1F780..1F7D4;AL # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
@ -3370,9 +3410,11 @@ FFFD;AI # So REPLACEMENT CHARACTER
1F860..1F887;AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F888..1F88F;ID # Cn [8] <reserved-1F888>..<reserved-1F88F>
1F890..1F8AD;AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F8AE..1F8FF;ID # Cn [82] <reserved-1F8AE>..<reserved-1F8FF>
1F8AE..1F8AF;ID # Cn [2] <reserved-1F8AE>..<reserved-1F8AF>
1F8B0..1F8B1;ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F8B2..1F8FF;ID # Cn [78] <reserved-1F8B2>..<reserved-1F8FF>
1F900..1F90B;AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F90C;ID # Cn <reserved-1F90C>
1F90C;EB # So PINCHED FINGERS
1F90D..1F90E;ID # So [2] WHITE HEART..BROWN HEART
1F90F;EB # So PINCHING HAND
1F910..1F917;ID # So [8] ZIPPER-MOUTH FACE..HUGGING FACE
@ -3383,22 +3425,18 @@ FFFD;AI # So REPLACEMENT CHARACTER
1F930..1F939;EB # So [10] PREGNANT WOMAN..JUGGLING
1F93A..1F93B;ID # So [2] FENCER..MODERN PENTATHLON
1F93C..1F93E;EB # So [3] WRESTLERS..HANDBALL
1F93F..1F971;ID # So [51] DIVING MASK..YAWNING FACE
1F972;ID # Cn <reserved-1F972>
1F973..1F976;ID # So [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
1F977..1F979;ID # Cn [3] <reserved-1F977>..<reserved-1F979>
1F97A..1F9A2;ID # So [41] FACE WITH PLEADING EYES..SWAN
1F9A3..1F9A4;ID # Cn [2] <reserved-1F9A3>..<reserved-1F9A4>
1F9A5..1F9AA;ID # So [6] SLOTH..OYSTER
1F9AB..1F9AD;ID # Cn [3] <reserved-1F9AB>..<reserved-1F9AD>
1F9AE..1F9B4;ID # So [7] GUIDE DOG..BONE
1F93F..1F976;ID # So [56] DIVING MASK..FREEZING FACE
1F977;EB # So NINJA
1F978;ID # So DISGUISED FACE
1F979;ID # Cn <reserved-1F979>
1F97A..1F9B4;ID # So [59] FACE WITH PLEADING EYES..BONE
1F9B5..1F9B6;EB # So [2] LEG..FOOT
1F9B7;ID # So TOOTH
1F9B8..1F9B9;EB # So [2] SUPERHERO..SUPERVILLAIN
1F9BA;ID # So SAFETY VEST
1F9BB;EB # So EAR WITH HEARING AID
1F9BC..1F9CA;ID # So [15] MOTORIZED WHEELCHAIR..ICE CUBE
1F9CB..1F9CC;ID # Cn [2] <reserved-1F9CB>..<reserved-1F9CC>
1F9BC..1F9CB;ID # So [16] MOTORIZED WHEELCHAIR..BUBBLE TEA
1F9CC;ID # Cn <reserved-1F9CC>
1F9CD..1F9CF;EB # So [3] STANDING PERSON..DEAF PERSON
1F9D0;ID # So FACE WITH MONOCLE
1F9D1..1F9DD;EB # So [13] ADULT..ELF
@ -3407,17 +3445,26 @@ FFFD;AI # So REPLACEMENT CHARACTER
1FA54..1FA5F;ID # Cn [12] <reserved-1FA54>..<reserved-1FA5F>
1FA60..1FA6D;ID # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA6E..1FA6F;ID # Cn [2] <reserved-1FA6E>..<reserved-1FA6F>
1FA70..1FA73;ID # So [4] BALLET SHOES..SHORTS
1FA74..1FA77;ID # Cn [4] <reserved-1FA74>..<reserved-1FA77>
1FA70..1FA74;ID # So [5] BALLET SHOES..THONG SANDAL
1FA75..1FA77;ID # Cn [3] <reserved-1FA75>..<reserved-1FA77>
1FA78..1FA7A;ID # So [3] DROP OF BLOOD..STETHOSCOPE
1FA7B..1FA7F;ID # Cn [5] <reserved-1FA7B>..<reserved-1FA7F>
1FA80..1FA82;ID # So [3] YO-YO..PARACHUTE
1FA83..1FA8F;ID # Cn [13] <reserved-1FA83>..<reserved-1FA8F>
1FA90..1FA95;ID # So [6] RINGED PLANET..BANJO
1FA96..1FAFF;ID # Cn [106] <reserved-1FA96>..<reserved-1FAFF>
1FB00..1FFFD;ID # Cn [1278] <reserved-1FB00>..<reserved-1FFFD>
20000..2A6D6;ID # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A6D7..2A6FF;ID # Cn [41] <reserved-2A6D7>..<reserved-2A6FF>
1FA80..1FA86;ID # So [7] YO-YO..NESTING DOLLS
1FA87..1FA8F;ID # Cn [9] <reserved-1FA87>..<reserved-1FA8F>
1FA90..1FAA8;ID # So [25] RINGED PLANET..ROCK
1FAA9..1FAAF;ID # Cn [7] <reserved-1FAA9>..<reserved-1FAAF>
1FAB0..1FAB6;ID # So [7] FLY..FEATHER
1FAB7..1FABF;ID # Cn [9] <reserved-1FAB7>..<reserved-1FABF>
1FAC0..1FAC2;ID # So [3] ANATOMICAL HEART..PEOPLE HUGGING
1FAC3..1FACF;ID # Cn [13] <reserved-1FAC3>..<reserved-1FACF>
1FAD0..1FAD6;ID # So [7] BLUEBERRIES..TEAPOT
1FAD7..1FAFF;ID # Cn [41] <reserved-1FAD7>..<reserved-1FAFF>
1FB00..1FB92;AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA;AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
1FBF0..1FBF9;NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
1FC00..1FFFD;ID # Cn [1022] <reserved-1FC00>..<reserved-1FFFD>
20000..2A6DD;ID # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A6DE..2A6FF;ID # Cn [34] <reserved-2A6DE>..<reserved-2A6FF>
2A700..2B734;ID # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B735..2B73F;ID # Cn [11] <reserved-2B735>..<reserved-2B73F>
2B740..2B81D;ID # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
@ -3429,7 +3476,8 @@ FFFD;AI # So REPLACEMENT CHARACTER
2F800..2FA1D;ID # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
2FA1E..2FA1F;ID # Cn [2] <reserved-2FA1E>..<reserved-2FA1F>
2FA20..2FFFD;ID # Cn [1502] <reserved-2FA20>..<reserved-2FFFD>
30000..3FFFD;ID # Cn [65534] <reserved-30000>..<reserved-3FFFD>
30000..3134A;ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
3134B..3FFFD;ID # Cn [60595] <reserved-3134B>..<reserved-3FFFD>
E0001;CM # Cf LANGUAGE TAG
E0020..E007F;CM # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF;CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View File

@ -1,5 +1,5 @@
# NormalizationCorrections-12.1.0.txt
# Date: 2019-03-08, 23:59:00 GMT [KW, LI]
# NormalizationCorrections-13.0.0.txt
# Date: 2019-09-09, 19:50:00 GMT [KW, LI]
# © 2019 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#

View File

@ -1,6 +1,6 @@
# Scripts-12.1.0.txt
# Date: 2019-04-01, 09:10:42 GMT
# © 2019 Unicode®, Inc.
# Scripts-13.0.0.txt
# Date: 2020-01-22, 00:07:43 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -89,7 +89,6 @@
037E ; Common # Po GREEK QUESTION MARK
0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
@ -308,7 +307,7 @@
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BFF ; Common # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
@ -347,6 +346,8 @@
2E41 ; Common # Po REVERSED COMMA
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2E50..2E51 ; Common # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
2E52 ; Common # Po TIRONIAN SIGN CAPITAL ET
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -414,6 +415,7 @@ A839 ; Common # So NORTH INDIC QUANTITY MARK
A92E ; Common # Po KAYAH LI SIGN CWI
A9CF ; Common # Lm JAVANESE PANGRANGKEP
AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE
AB6A..AB6B ; Common # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
FD3E ; Common # Pe ORNATE LEFT PARENTHESIS
FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
@ -506,7 +508,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
10190..1019C ; Common # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
16FE2 ; Common # Po OLD CHINESE HOOK MARK
@ -581,8 +583,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F110..1F16C ; Common # So [93] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MR SIGN
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F10D..1F1AD ; Common # So [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
@ -591,9 +592,9 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
1F400..1F6D5 ; Common # So [726] RAT..HINDU TEMPLE
1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6FA ; Common # So [11] SATELLITE..AUTO RICKSHAW
1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
@ -602,22 +603,25 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F90D..1F971 ; Common # So [101] WHITE HEART..YAWNING FACE
1F973..1F976 ; Common # So [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
1F97A..1F9A2 ; Common # So [41] FACE WITH PLEADING EYES..SWAN
1F9A5..1F9AA ; Common # So [6] SLOTH..OYSTER
1F9AE..1F9CA ; Common # So [29] GUIDE DOG..ICE CUBE
1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1F978 ; Common # So [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
1F97A..1F9CB ; Common # So [82] FACE WITH PLEADING EYES..BUBBLE TEA
1F9CD..1FA53 ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA73 ; Common # So [4] BALLET SHOES..SHORTS
1FA70..1FA74 ; Common # So [5] BALLET SHOES..THONG SANDAL
1FA78..1FA7A ; Common # So [3] DROP OF BLOOD..STETHOSCOPE
1FA80..1FA82 ; Common # So [3] YO-YO..PARACHUTE
1FA90..1FA95 ; Common # So [6] RINGED PLANET..BANJO
1FA80..1FA86 ; Common # So [7] YO-YO..NESTING DOLLS
1FA90..1FAA8 ; Common # So [25] RINGED PLANET..ROCK
1FAB0..1FAB6 ; Common # So [7] FLY..FEATHER
1FAC0..1FAC2 ; Common # So [3] ANATOMICAL HEART..PEOPLE HUGGING
1FAD0..1FAD6 ; Common # So [7] BLUEBERRIES..TEAPOT
1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 7805
# Total code points: 8087
# ================================================
@ -661,7 +665,8 @@ A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSU
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7BF ; Latin # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
A7C2..A7C6 ; Latin # L& [5] LATIN CAPITAL LETTER ANGLICANA W..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C2..A7CA ; Latin # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
@ -669,12 +674,13 @@ A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGR
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA
AB66..AB67 ; Latin # L& [2] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB66..AB68 ; Latin # L& [3] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1366
# Total code points: 1374
# ================================================
@ -769,12 +775,13 @@ FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBININ
0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0560..0588 ; Armenian # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE
0589 ; Armenian # Po ARMENIAN FULL STOP
058A ; Armenian # Pd ARMENIAN HYPHEN
058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
058F ; Armenian # Sc ARMENIAN DRAM SIGN
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
# Total code points: 95
# Total code points: 96
# ================================================
@ -837,7 +844,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD ; Arabic # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08B6..08C7 ; Arabic # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
08D3..08E1 ; Arabic # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
@ -886,7 +893,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
# Total code points: 1281
# Total code points: 1291
# ================================================
@ -1051,7 +1058,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Oriya # Mn ORIYA SIGN VIRAMA
0B56 ; Oriya # Mn ORIYA AI LENGTH MARK
0B55..0B56 ; Oriya # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57 ; Oriya # Mc ORIYA AU LENGTH MARK
0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
@ -1061,7 +1068,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0B71 ; Oriya # Lo ORIYA LETTER WA
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
# Total code points: 90
# Total code points: 91
# ================================================
@ -1155,7 +1162,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D04..0D0C ; Malayalam # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
@ -1177,10 +1184,11 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 117
# Total code points: 118
# ================================================
0D81 ; Sinhala # Mn SINHALA SIGN CANDRABINDU
0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
@ -1197,7 +1205,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
111E1..111F4 ; Sinhala # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
# Total code points: 110
# Total code points: 111
# ================================================
@ -1515,9 +1523,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
3105..312F ; Bopomofo # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31A0..31BF ; Bopomofo # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
# Total code points: 72
# Total code points: 77
# ================================================
@ -1529,18 +1537,20 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FEF ; Han # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
3400..4DBF ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; Han # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
20000..2A6DD ; Han # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 89233
# Total code points: 94204
# ================================================
@ -1583,6 +1593,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Inherited # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
@ -1610,7 +1621,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 571
# Total code points: 573
# ================================================
@ -1783,8 +1794,9 @@ A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI
A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO
A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
A82C ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
# Total code points: 44
# Total code points: 45
# ================================================
@ -2063,8 +2075,9 @@ AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI
A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
11FB0 ; Lisu # Lo LISU LETTER YHA
# Total code points: 48
# Total code points: 49
# ================================================
@ -2217,8 +2230,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11140..11143 ; Chakma # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK
11144 ; Chakma # Lo CHAKMA LETTER LHAA
11145..11146 ; Chakma # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
11147 ; Chakma # Lo CHAKMA LETTER VAA
# Total code points: 70
# Total code points: 71
# ================================================
@ -2259,13 +2273,15 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR
111C9..111CC ; Sharada # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CD ; Sharada # Po SHARADA SUTRA MARK
111CE ; Sharada # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
111CF ; Sharada # Mn SHARADA SIGN INVERTED CANDRABINDU
111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
111DA ; Sharada # Lo SHARADA EKAM
111DB ; Sharada # Po SHARADA SIGN SIDDHAM
111DC ; Sharada # Lo SHARADA HEADSTROKE
111DD..111DF ; Sharada # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2
# Total code points: 94
# Total code points: 96
# ================================================
@ -2650,12 +2666,12 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
1145B ; Newa # Po NEWA PLACEHOLDER MARK
1145A..1145B ; Newa # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
1145D ; Newa # Po NEWA INSERTION SIGN
1145E ; Newa # Mn NEWA SANDHI MARK
1145F ; Newa # Lo NEWA LETTER VEDIC ANUSVARA
1145F..11461 ; Newa # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
# Total code points: 94
# Total code points: 97
# ================================================
@ -2668,9 +2684,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768
18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
# Total code points: 6892
# Total code points: 6914
# ================================================
@ -2835,4 +2852,49 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 59
# ================================================
10FB0..10FC4 ; Chorasmian # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
10FC5..10FCB ; Chorasmian # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
# Total code points: 28
# ================================================
11900..11906 ; Dives_Akuru # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E
11909 ; Dives_Akuru # Lo DIVES AKURU LETTER O
1190C..11913 ; Dives_Akuru # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916 ; Dives_Akuru # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..1192F ; Dives_Akuru # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
11930..11935 ; Dives_Akuru # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
11937..11938 ; Dives_Akuru # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193B..1193C ; Dives_Akuru # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193D ; Dives_Akuru # Mc DIVES AKURU SIGN HALANTA
1193E ; Dives_Akuru # Mn DIVES AKURU VIRAMA
1193F ; Dives_Akuru # Lo DIVES AKURU PREFIXED NASAL SIGN
11940 ; Dives_Akuru # Mc DIVES AKURU MEDIAL YA
11941 ; Dives_Akuru # Lo DIVES AKURU INITIAL RA
11942 ; Dives_Akuru # Mc DIVES AKURU MEDIAL RA
11943 ; Dives_Akuru # Mn DIVES AKURU SIGN NUKTA
11944..11946 ; Dives_Akuru # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
11950..11959 ; Dives_Akuru # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
# Total code points: 72
# ================================================
16FE4 ; Khitan_Small_Script # Mn KHITAN SMALL SCRIPT FILLER
18B00..18CD5 ; Khitan_Small_Script # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
# Total code points: 471
# ================================================
10E80..10EA9 ; Yezidi # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAC ; Yezidi # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD ; Yezidi # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; Yezidi # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
# Total code points: 47
# EOF

View File

@ -1,5 +1,5 @@
# SentenceBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
# SentenceBreakProperty-13.0.0.txt
# Date: 2019-11-27, 03:13:39 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -110,7 +110,7 @@
0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
@ -152,6 +152,7 @@
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU
0D82..0D83 ; Extend # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
0DCF..0DD1 ; Extend # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
@ -240,6 +241,7 @@
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Extend # Mc BALINESE SIGN BISAH
1B34 ; Extend # Mn BALINESE SIGN REREKAN
@ -304,6 +306,7 @@ A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A823..A824 ; Extend # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
@ -363,6 +366,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
@ -386,6 +390,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CE ; Extend # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU
1122C..1122E ; Extend # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11232..11233 ; Extend # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
@ -452,6 +458,14 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11838 ; Extend # Mc DOGRA SIGN VISARGA
11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
11930..11935 ; Extend # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
11937..11938 ; Extend # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193D ; Extend # Mc DIVES AKURU SIGN HALANTA
1193E ; Extend # Mn DIVES AKURU VIRAMA
11940 ; Extend # Mc DIVES AKURU MEDIAL YA
11942 ; Extend # Mc DIVES AKURU MEDIAL RA
11943 ; Extend # Mn DIVES AKURU SIGN NUKTA
119D1..119D3 ; Extend # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
@ -499,6 +513,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
16FF0..16FF1 ; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
@ -525,7 +541,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 2368
# Total code points: 2395
# ================================================
@ -1192,11 +1208,14 @@ A7BB ; Lower # L& LATIN SMALL LETTER GLOTTAL A
A7BD ; Lower # L& LATIN SMALL LETTER GLOTTAL I
A7BF ; Lower # L& LATIN SMALL LETTER GLOTTAL U
A7C3 ; Lower # L& LATIN SMALL LETTER ANGLICANA W
A7C8 ; Lower # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
A7CA ; Lower # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F6 ; Lower # L& LATIN SMALL LETTER REVERSED HALF H
A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M
AB30..AB5A ; Lower # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5C..AB5F ; Lower # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB67 ; Lower # L& [8] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB60..AB68 ; Lower # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
AB70..ABBF ; Lower # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
@ -1236,7 +1255,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA
1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
# Total code points: 2293
# Total code points: 2297
# ================================================
@ -1840,7 +1859,9 @@ A7BA ; Upper # L& LATIN CAPITAL LETTER GLOTTAL A
A7BC ; Upper # L& LATIN CAPITAL LETTER GLOTTAL I
A7BE ; Upper # L& LATIN CAPITAL LETTER GLOTTAL U
A7C2 ; Upper # L& LATIN CAPITAL LETTER ANGLICANA W
A7C4..A7C6 ; Upper # L& [3] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C4..A7C7 ; Upper # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9 ; Upper # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7F5 ; Upper # L& LATIN CAPITAL LETTER REVERSED HALF H
FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
104B0..104D3 ; Upper # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
@ -1883,7 +1904,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
# Total code points: 1893
# Total code points: 1896
# ================================================
@ -1923,7 +1944,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0860..086A ; OLetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4 ; OLetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD ; OLetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08B6..08C7 ; OLetter # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; OLetter # Lo DEVANAGARI OM
@ -2000,7 +2021,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0CDE ; OLetter # Lo KANNADA LETTER FA
0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D05..0D0C ; OLetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; OLetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A ; OLetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3D ; OLetter # Lo MALAYALAM SIGN AVAGRAHA
@ -2137,10 +2158,10 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
30FF ; OLetter # Lo KATAKANA DIGRAPH KOTO
3105..312F ; OLetter # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
3131..318E ; OLetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31BA ; OLetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31A0..31BF ; OLetter # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3400..4DB5 ; OLetter # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FEF ; OLetter # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
3400..4DBF ; OLetter # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; OLetter # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; OLetter # Lm YI SYLLABLE WU
A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
@ -2199,6 +2220,7 @@ AB09..AB0E ; OLetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDH
AB11..AB16 ; OLetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
AB20..AB26 ; OLetter # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
AB28..AB2E ; OLetter # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
AB69 ; OLetter # Lm MODIFIER LETTER SMALL TURNED W
ABC0..ABE2 ; OLetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
AC00..D7A3 ; OLetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
D7B0..D7C6 ; OLetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
@ -2281,15 +2303,19 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10B80..10B91 ; OLetter # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
10C00..10C48 ; OLetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
10D00..10D23 ; OLetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
10FB0..10FC4 ; OLetter # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
10FE0..10FF6 ; OLetter # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
11003..11037 ; OLetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
11083..110AF ; OLetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
110D0..110E8 ; OLetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
11103..11126 ; OLetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
11144 ; OLetter # Lo CHAKMA LETTER LHAA
11147 ; OLetter # Lo CHAKMA LETTER VAA
11150..11172 ; OLetter # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
11176 ; OLetter # Lo MAHAJANI LIGATURE SHRI
11183..111B2 ; OLetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA
@ -2315,7 +2341,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1135D..11361 ; OLetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
11400..11434 ; OLetter # Lo [53] NEWA LETTER A..NEWA LETTER HA
11447..1144A ; OLetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
1145F ; OLetter # Lo NEWA LETTER VEDIC ANUSVARA
1145F..11461 ; OLetter # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
11480..114AF ; OLetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114C4..114C5 ; OLetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
114C7 ; OLetter # Lo TIRHUTA OM
@ -2327,7 +2353,13 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
116B8 ; OLetter # Lo TAKRI LETTER ARCHAIC KHA
11700..1171A ; OLetter # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
11800..1182B ; OLetter # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA
118FF ; OLetter # Lo WARANG CITI OM
118FF..11906 ; OLetter # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E
11909 ; OLetter # Lo DIVES AKURU LETTER O
1190C..11913 ; OLetter # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916 ; OLetter # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..1192F ; OLetter # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
1193F ; OLetter # Lo DIVES AKURU PREFIXED NASAL SIGN
11941 ; OLetter # Lo DIVES AKURU INITIAL RA
119A0..119A7 ; OLetter # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
119AA..119D0 ; OLetter # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
119E1 ; OLetter # Lo NANDINAGARI SIGN AVAGRAHA
@ -2352,6 +2384,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11D6A..11D89 ; OLetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA
11D98 ; OLetter # Lo GUNJALA GONDI OM
11EE0..11EF2 ; OLetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA
11FB0 ; OLetter # Lo LISU LETTER YHA
12000..12399 ; OLetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
12400..1246E ; OLetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
12480..12543 ; OLetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
@ -2370,7 +2403,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
16FE0..16FE1 ; OLetter # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK
17000..187F7 ; OLetter # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2 ; OLetter # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
18800..18CD5 ; OLetter # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
18D00..18D08 ; OLetter # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
1B000..1B11E ; OLetter # Lo [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
1B150..1B152 ; OLetter # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
1B164..1B167 ; OLetter # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
@ -2418,14 +2452,15 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1EEA1..1EEA3 ; OLetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
1EEA5..1EEA9 ; OLetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
1EEAB..1EEBB ; OLetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
20000..2A6D6 ; OLetter # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
20000..2A6DD ; OLetter # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; OLetter # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; OLetter # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 121822
# Total code points: 127413
# ================================================
@ -2480,6 +2515,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@ -2489,8 +2525,9 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Numeric # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
# Total code points: 632
# Total code points: 652
# ================================================
@ -2563,6 +2600,8 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA
1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
11944 ; STerm # Po DIVES AKURU DOUBLE DANDA
11946 ; STerm # Po DIVES AKURU END OF TEXT MARK
11A42..11A43 ; STerm # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD
11A9B..11A9C ; STerm # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD
11C41..11C42 ; STerm # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA
@ -2575,7 +2614,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1DA88 ; STerm # Po SIGNWRITING FULL STOP
# Total code points: 138
# Total code points: 140
# ================================================

View File

@ -1,5 +1,5 @@
# SpecialCasing-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
# SpecialCasing-13.0.0.txt
# Date: 2019-09-08, 23:31:24 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# WordBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
# © 2019 Unicode®, Inc.
# WordBreakProperty-13.0.0.txt
# Date: 2020-01-22, 00:07:44 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -146,7 +146,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
0B47..0B48 ; Extend # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Extend # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
@ -188,6 +188,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU
0D82..0D83 ; Extend # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
0DCF..0DD1 ; Extend # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
@ -276,6 +277,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Extend # Mc BALINESE SIGN BISAH
1B34 ; Extend # Mn BALINESE SIGN REREKAN
@ -340,6 +342,7 @@ A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A823..A824 ; Extend # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
@ -399,6 +402,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
@ -422,6 +426,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CE ; Extend # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU
1122C..1122E ; Extend # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11232..11233 ; Extend # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
@ -488,6 +494,14 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11838 ; Extend # Mc DOGRA SIGN VISARGA
11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
11930..11935 ; Extend # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
11937..11938 ; Extend # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193D ; Extend # Mc DIVES AKURU SIGN HALANTA
1193E ; Extend # Mn DIVES AKURU VIRAMA
11940 ; Extend # Mc DIVES AKURU MEDIAL YA
11942 ; Extend # Mc DIVES AKURU MEDIAL RA
11943 ; Extend # Mn DIVES AKURU SIGN NUKTA
119D1..119D3 ; Extend # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
@ -535,6 +549,8 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Extend # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
16FF0..16FF1 ; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
@ -562,7 +578,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 2372
# Total code points: 2399
# ================================================
@ -635,6 +651,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
02D2..02D7 ; ALetter # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN
02DE..02DF ; ALetter # Sk [2] MODIFIER LETTER RHOTIC HOOK..MODIFIER LETTER CROSS ACCENT
02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02E5..02EB ; ALetter # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
02EC ; ALetter # Lm MODIFIER LETTER VOICING
02ED ; ALetter # Sk MODIFIER LETTER UNASPIRATED
02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE
@ -654,9 +671,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
048A..052F ; ALetter # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055B..055C ; ALetter # Po [2] ARMENIAN EMPHASIS MARK..ARMENIAN EXCLAMATION MARK
055A..055C ; ALetter # Po [3] ARMENIAN APOSTROPHE..ARMENIAN EXCLAMATION MARK
055E ; ALetter # Po ARMENIAN QUESTION MARK
0560..0588 ; ALetter # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE
058A ; ALetter # Pd ARMENIAN HYPHEN
05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH
0620..063F ; ALetter # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0640 ; ALetter # Lm ARABIC TATWEEL
@ -682,7 +700,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0860..086A ; ALetter # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4 ; ALetter # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD ; ALetter # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08B6..08C7 ; ALetter # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; ALetter # Lo DEVANAGARI OM
@ -759,7 +777,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0CDE ; ALetter # Lo KANNADA LETTER FA
0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A ; ALetter # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3D ; ALetter # Lo MALAYALAM SIGN AVAGRAHA
@ -912,7 +930,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
303C ; ALetter # Lo MASU MARK
3105..312F ; ALetter # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31BA ; ALetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31A0..31BF ; ALetter # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; ALetter # Lm YI SYLLABLE WU
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
@ -929,6 +947,7 @@ A680..A69B ; ALetter # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL L
A69C..A69D ; ALetter # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A708..A716 ; ALetter # Sk [15] MODIFIER LETTER EXTRA-HIGH DOTTED TONE BAR..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; ALetter # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
@ -939,7 +958,8 @@ A789..A78A ; ALetter # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT
A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; ALetter # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7BF ; ALetter # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
A7C2..A7C6 ; ALetter # L& [5] LATIN CAPITAL LETTER ANGLICANA W..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C2..A7CA ; ALetter # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F5..A7F6 ; ALetter # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
A7F7 ; ALetter # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; ALetter # L& LATIN LETTER SMALL CAPITAL TURNED M
@ -971,7 +991,8 @@ AB28..AB2E ; ALetter # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
AB30..AB5A ; ALetter # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5B ; ALetter # Sk MODIFIER BREVE WITH INVERTED BREVE
AB5C..AB5F ; ALetter # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB67 ; ALetter # L& [8] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB60..AB68 ; ALetter # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
AB69 ; ALetter # Lm MODIFIER LETTER SMALL TURNED W
AB70..ABBF ; ALetter # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
ABC0..ABE2 ; ALetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
@ -1052,15 +1073,19 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10C80..10CB2 ; ALetter # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
10CC0..10CF2 ; ALetter # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
10D00..10D23 ; ALetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
10FB0..10FC4 ; ALetter # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
10FE0..10FF6 ; ALetter # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
11003..11037 ; ALetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
110D0..110E8 ; ALetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
11103..11126 ; ALetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
11144 ; ALetter # Lo CHAKMA LETTER LHAA
11147 ; ALetter # Lo CHAKMA LETTER VAA
11150..11172 ; ALetter # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
11176 ; ALetter # Lo MAHAJANI LIGATURE SHRI
11183..111B2 ; ALetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA
@ -1086,7 +1111,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1135D..11361 ; ALetter # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
11400..11434 ; ALetter # Lo [53] NEWA LETTER A..NEWA LETTER HA
11447..1144A ; ALetter # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
1145F ; ALetter # Lo NEWA LETTER VEDIC ANUSVARA
1145F..11461 ; ALetter # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
11480..114AF ; ALetter # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114C4..114C5 ; ALetter # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
114C7 ; ALetter # Lo TIRHUTA OM
@ -1098,7 +1123,13 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
116B8 ; ALetter # Lo TAKRI LETTER ARCHAIC KHA
11800..1182B ; ALetter # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA
118A0..118DF ; ALetter # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
118FF ; ALetter # Lo WARANG CITI OM
118FF..11906 ; ALetter # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E
11909 ; ALetter # Lo DIVES AKURU LETTER O
1190C..11913 ; ALetter # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916 ; ALetter # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..1192F ; ALetter # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
1193F ; ALetter # Lo DIVES AKURU PREFIXED NASAL SIGN
11941 ; ALetter # Lo DIVES AKURU INITIAL RA
119A0..119A7 ; ALetter # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
119AA..119D0 ; ALetter # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
119E1 ; ALetter # Lo NANDINAGARI SIGN AVAGRAHA
@ -1123,6 +1154,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11D6A..11D89 ; ALetter # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA
11D98 ; ALetter # Lo GUNJALA GONDI OM
11EE0..11EF2 ; ALetter # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA
11FB0 ; ALetter # Lo LISU LETTER YHA
12000..12399 ; ALetter # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
12400..1246E ; ALetter # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
12480..12543 ; ALetter # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
@ -1219,20 +1251,21 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
# Total code points: 28693
# Total code points: 28854
# ================================================
003A ; MidLetter # Po COLON
00B7 ; MidLetter # Po MIDDLE DOT
0387 ; MidLetter # Po GREEK ANO TELEIA
055F ; MidLetter # Po ARMENIAN ABBREVIATION MARK
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
2027 ; MidLetter # Po HYPHENATION POINT
FE13 ; MidLetter # Po PRESENTATION FORM FOR VERTICAL COLON
FE55 ; MidLetter # Po SMALL COLON
FF1A ; MidLetter # Po FULLWIDTH COLON
# Total code points: 8
# Total code points: 9
# ================================================
@ -1318,6 +1351,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@ -1327,8 +1361,9 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E140..1E149 ; Numeric # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Numeric # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
# Total code points: 631
# Total code points: 651
# ================================================

View File

@ -38,8 +38,8 @@
#include <private/qunicodetables_p.h>
#endif
#define DATA_VERSION_S "12.1"
#define DATA_VERSION_STR "QChar::Unicode_12_1"
#define DATA_VERSION_S "13.0"
#define DATA_VERSION_STR "QChar::Unicode_13_0"
static QHash<QByteArray, QChar::UnicodeVersion> age_map;
@ -72,6 +72,7 @@ static void initAgeMap()
{ QChar::Unicode_11_0, "11.0" },
{ QChar::Unicode_12_0, "12.0" },
{ QChar::Unicode_12_1, "12.1" }, // UCD Revision 24
{ QChar::Unicode_13_0, "13.0" }, // UCD Revision 26
{ QChar::Unicode_Unassigned, 0 }
};
AgeMap *d = ageMap;
@ -798,6 +799,11 @@ static void initScriptMap()
{ QChar::Script_Nandinagari, "Nandinagari" },
{ QChar::Script_NyiakengPuachueHmong, "NyiakengPuachueHmong" },
{ QChar::Script_Wancho, "Wancho" },
// 13.0
{ QChar::Script_Chorasmian, "Chorasmian" },
{ QChar::Script_DivesAkuru, "DivesAkuru" },
{ QChar::Script_KhitanSmallScript, "KhitanSmallScript" },
{ QChar::Script_Yezidi, "Yezidi" },
// unhandled
{ QChar::Script_Unknown, 0 }
@ -3045,7 +3051,7 @@ int main(int, char **)
QByteArray header =
"/****************************************************************************\n"
"**\n"
"** Copyright (C) 2016 The Qt Company Ltd.\n"
"** Copyright (C) 2020 The Qt Company Ltd.\n"
"** Contact: https://www.qt.io/licensing/\n"
"**\n"
"** This file is part of the QtCore module of the Qt Toolkit.\n"