Update most Unicode data to version 10.0

The text segmentation data is not being updated in this change,
as it requires additional code changes. Updating those will
come in a follow-up commit.

Change-Id: I5d6b6bc96044e8dd0c25cf6f79756e7f68bf6e7c
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Reviewed-by: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@qt.io>
This commit is contained in:
Lars Knoll 2017-12-12 09:47:53 +01:00
parent 2b2b9c9962
commit 8bfabb34de
16 changed files with 8905 additions and 5375 deletions

View File

@ -296,6 +296,20 @@ public:
Script_OldHungarian,
Script_SignWriting,
// Unicode 9.0 additions
Script_Adlam,
Script_Bhaiksuki,
Script_Marchen,
Script_Newa,
Script_Osage,
Script_Tangut,
// Unicode 10.0 additions
Script_MasaramGondi,
Script_Nushu,
Script_Soyombo,
Script_ZanabazarSquare,
ScriptCount
};
@ -387,7 +401,9 @@ public:
Unicode_6_2,
Unicode_6_3,
Unicode_7_0,
Unicode_8_0
Unicode_8_0,
Unicode_9_0,
Unicode_10_0
};
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO

File diff suppressed because it is too large Load Diff

View File

@ -37,7 +37,7 @@
**
****************************************************************************/
/* This file is autogenerated from the Unicode 8.0 database. Do not edit */
/* This file is autogenerated from the Unicode 10.0 database. Do not edit */
//
// W A R N I N G
@ -59,7 +59,7 @@
QT_BEGIN_NAMESPACE
#define UNICODE_DATA_VERSION QChar::Unicode_8_0
#define UNICODE_DATA_VERSION QChar::Unicode_10_0
namespace QUnicodeTables {

View File

@ -202,7 +202,21 @@ static const hb_script_t _qtscript_to_hbscript[] = {
HB_SCRIPT_HATRAN,
HB_SCRIPT_MULTANI,
HB_SCRIPT_OLD_HUNGARIAN,
HB_SCRIPT_SIGNWRITING
HB_SCRIPT_SIGNWRITING,
// Unicode 9.0 additions
HB_SCRIPT_ADLAM,
HB_SCRIPT_BHAIKSUKI,
HB_SCRIPT_MARCHEN,
HB_SCRIPT_NEWA,
HB_SCRIPT_OSAGE,
HB_SCRIPT_TANGUT,
// Unicode 10.0 additions
HB_SCRIPT_MASARAM_GONDI,
HB_SCRIPT_NUSHU,
HB_SCRIPT_SOYOMBO,
HB_SCRIPT_ZANABAZAR_SQUARE
};
Q_STATIC_ASSERT(QChar::ScriptCount == sizeof(_qtscript_to_hbscript) / sizeof(_qtscript_to_hbscript[0]));

View File

@ -250,7 +250,17 @@ static const char specialLanguages[][6] = {
"", // Hatran
"", // Multani
"", // OldHungarian
"" // SignWriting
"", // SignWriting
"", // Adlam
"", // Bhaiksuki
"", // Marchen
"", // Newa
"", // Osage
"", // Tangut
"", // MasaramGondi
"", // Nushu
"", // Soyombo
"" // ZanabazarSquare
};
Q_STATIC_ASSERT(sizeof specialLanguages / sizeof *specialLanguages == QChar::ScriptCount);

View File

@ -1,23 +1,23 @@
# ArabicShaping-8.0.0.txt
# Date: 2015-02-17, 23:33:00 GMT [RP]
# ArabicShaping-10.0.0.txt
# Date: 2017-02-16, 00:00:00 GMT [RP, KW]
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This file defines the Joining_Type and Joining_Group property
# values for Arabic, Syriac, N'Ko, Mandaic, and Manichaean positional
# shaping, repeating in machine readable form the information
# exemplified in Tables 9-3, 9-8, 9-9, 9-10, 9-14, 9-15, 9-16, 9-19,
# 9-20, 10-4, 10-5, 10-6, 10-7, and 19-5 of The Unicode Standard core
# specification. This file also defines Joining_Type values for
# Mongolian, Phags-pa, and Psalter Pahlavi positional shaping, which
# are not listed in tables in the standard.
# Mongolian, Phags-pa, Psalter Pahlavi, and Adlam positional shaping,
# which are not listed in tables in the standard.
#
# See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.4, 14.3, 19.4 of
# The Unicode Standard core specification for more information.
# See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.4, 14.3, 19.4, and 19.9
# of The Unicode Standard core specification for more information.
#
# Each line contains four fields, separated by a semicolon.
#
@ -50,8 +50,8 @@
# Field 3: defines the joining group (property name: Joining_Group)
#
# The values of the joining group are based schematically on character
# names. Where a schematic character name consists of two or more parts separated
# by spaces, the formal Joining_Group property value, as specified in
# names. Where a schematic character name consists of two or more parts
# separated by spaces, the formal Joining_Group property value, as specified in
# PropertyValueAliases.txt, consists of the same name parts joined by
# underscores. Hence, the entry:
#
@ -86,11 +86,11 @@
# Note: Code points that are not explicitly listed in this file are
# either of joining type T or U:
#
# - Those that not explicitly listed that are of General Category Mn, Me, or Cf
# - Those that are not explicitly listed and that are of General Category Mn, Me, or Cf
# have joining type T.
# - All others not explicitly listed have joining type U.
#
# For an explicit listing of characters of joining type T, see
# For an explicit listing of all characters of joining type T, see
# the derived property file DerivedJoiningType.txt.
#
# #############################################################
@ -413,6 +413,20 @@
0857; MANDAIC KAD; U; No_Joining_Group
0858; MANDAIC AIN; U; No_Joining_Group
# Syriac Supplement Characters
0860; MALAYALAM NGA; D; MALAYALAM NGA
0861; MALAYALAM JA; U; MALAYALAM JA
0862; MALAYALAM NYA; D; MALAYALAM NYA
0863; MALAYALAM TTA; D; MALAYALAM TTA
0864; MALAYALAM NNA; D; MALAYALAM NNA
0865; MALAYALAM NNNA; D; MALAYALAM NNNA
0866; MALAYALAM BHA; U; MALAYALAM BHA
0867; MALAYALAM RA; R; MALAYALAM RA
0868; MALAYALAM LLA; D; MALAYALAM LLA
0869; MALAYALAM LLLA; R; MALAYALAM LLLA
086A; MALAYALAM SSA; R; MALAYALAM SSA
# Arabic Extended-A Characters
08A0; DOTLESS BEH WITH V BELOW; D; BEH
@ -436,6 +450,15 @@
08B2; REH WITH DOT AND INVERTED V ABOVE; R; REH
08B3; AIN WITH 3 DOTS BELOW; D; AIN
08B4; KAF WITH DOT BELOW; D; KAF
08B6; BEH WITH MEEM ABOVE; D; BEH
08B7; DOTLESS BEH WITH 3 DOTS BELOW AND MEEM ABOVE; D; BEH
08B8; DOTLESS BEH WITH TEH ABOVE; D; BEH
08B9; REH WITH NOON ABOVE; R; REH
08BA; YEH WITH NOON ABOVE; D; YEH
08BB; AFRICAN FEH; D; AFRICAN FEH
08BC; AFRICAN QAF; D; AFRICAN QAF
08BD; AFRICAN NOON; D; AFRICAN NOON
08E2; ARABIC DISPUTED END OF AYAH; U; No_Joining_Group
# Mongolian Characters
@ -536,8 +559,8 @@
1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group
1883; MONGOLIAN ALI GALI UBADAMA; U; No_Joining_Group
1884; MONGOLIAN ALI GALI INVERTED UBADAMA; U; No_Joining_Group
1885; MONGOLIAN ALI GALI BALUDA; U; No_Joining_Group
1886; MONGOLIAN ALI GALI THREE BALUDA; U; No_Joining_Group
1885; MONGOLIAN ALI GALI BALUDA; T; No_Joining_Group
1886; MONGOLIAN ALI GALI THREE BALUDA; T; No_Joining_Group
1887; MONGOLIAN ALI GALI A; D; No_Joining_Group
1888; MONGOLIAN ALI GALI I; D; No_Joining_Group
1889; MONGOLIAN ALI GALI KA; D; No_Joining_Group
@ -578,6 +601,7 @@
200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
200D; ZERO WIDTH JOINER; C; No_Joining_Group
202F; NARROW NO-BREAK SPACE; U; No_Joining_Group
2066; LEFT-TO-RIGHT ISOLATE; U; No_Joining_Group
2067; RIGHT-TO-LEFT ISOLATE; U; No_Joining_Group
2068; FIRST STRONG ISOLATE; U; No_Joining_Group
@ -711,4 +735,75 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
10BAE; PSALTER PAHLAVI TWENTY; D; No_Joining_Group
10BAF; PSALTER PAHLAVI HUNDRED; U; No_Joining_Group
# Adlam Characters
1E900;ADLAM CAPITAL ALIF; D; No_Joining_Group
1E901;ADLAM CAPITAL DAALI; D; No_Joining_Group
1E902;ADLAM CAPITAL LAAM; D; No_Joining_Group
1E903;ADLAM CAPITAL MIIM; D; No_Joining_Group
1E904;ADLAM CAPITAL BA; D; No_Joining_Group
1E905;ADLAM CAPITAL SINNYIIYHE; D; No_Joining_Group
1E906;ADLAM CAPITAL PE; D; No_Joining_Group
1E907;ADLAM CAPITAL BHE; D; No_Joining_Group
1E908;ADLAM CAPITAL RA; D; No_Joining_Group
1E909;ADLAM CAPITAL E; D; No_Joining_Group
1E90A;ADLAM CAPITAL FA; D; No_Joining_Group
1E90B;ADLAM CAPITAL I; D; No_Joining_Group
1E90C;ADLAM CAPITAL O; D; No_Joining_Group
1E90D;ADLAM CAPITAL DHA; D; No_Joining_Group
1E90E;ADLAM CAPITAL YHE; D; No_Joining_Group
1E90F;ADLAM CAPITAL WAW; D; No_Joining_Group
1E910;ADLAM CAPITAL NUN; D; No_Joining_Group
1E911;ADLAM CAPITAL KAF; D; No_Joining_Group
1E912;ADLAM CAPITAL YA; D; No_Joining_Group
1E913;ADLAM CAPITAL U; D; No_Joining_Group
1E914;ADLAM CAPITAL JIIM; D; No_Joining_Group
1E915;ADLAM CAPITAL CHI; D; No_Joining_Group
1E916;ADLAM CAPITAL HA; D; No_Joining_Group
1E917;ADLAM CAPITAL QAAF; D; No_Joining_Group
1E918;ADLAM CAPITAL GA; D; No_Joining_Group
1E919;ADLAM CAPITAL NYA; D; No_Joining_Group
1E91A;ADLAM CAPITAL TU; D; No_Joining_Group
1E91B;ADLAM CAPITAL NHA; D; No_Joining_Group
1E91C;ADLAM CAPITAL VA; D; No_Joining_Group
1E91D;ADLAM CAPITAL KHA; D; No_Joining_Group
1E91E;ADLAM CAPITAL GBE; D; No_Joining_Group
1E91F;ADLAM CAPITAL ZAL; D; No_Joining_Group
1E920;ADLAM CAPITAL KPO; D; No_Joining_Group
1E921;ADLAM CAPITAL SHA; D; No_Joining_Group
1E922;ADLAM SMALL ALIF; D; No_Joining_Group
1E923;ADLAM SMALL DAALI; D; No_Joining_Group
1E924;ADLAM SMALL LAAM; D; No_Joining_Group
1E925;ADLAM SMALL MIIM; D; No_Joining_Group
1E926;ADLAM SMALL BA; D; No_Joining_Group
1E927;ADLAM SMALL SINNYIIYHE; D; No_Joining_Group
1E928;ADLAM SMALL PE; D; No_Joining_Group
1E929;ADLAM SMALL BHE; D; No_Joining_Group
1E92A;ADLAM SMALL RA; D; No_Joining_Group
1E92B;ADLAM SMALL E; D; No_Joining_Group
1E92C;ADLAM SMALL FA; D; No_Joining_Group
1E92D;ADLAM SMALL I; D; No_Joining_Group
1E92E;ADLAM SMALL O; D; No_Joining_Group
1E92F;ADLAM SMALL DHA; D; No_Joining_Group
1E930;ADLAM SMALL YHE; D; No_Joining_Group
1E931;ADLAM SMALL WAW; D; No_Joining_Group
1E932;ADLAM SMALL NUN; D; No_Joining_Group
1E933;ADLAM SMALL KAF; D; No_Joining_Group
1E934;ADLAM SMALL YA; D; No_Joining_Group
1E935;ADLAM SMALL U; D; No_Joining_Group
1E936;ADLAM SMALL JIIM; D; No_Joining_Group
1E937;ADLAM SMALL CHI; D; No_Joining_Group
1E938;ADLAM SMALL HA; D; No_Joining_Group
1E939;ADLAM SMALL QAAF; D; No_Joining_Group
1E93A;ADLAM SMALL GA; D; No_Joining_Group
1E93B;ADLAM SMALL NYA; D; No_Joining_Group
1E93C;ADLAM SMALL TU; D; No_Joining_Group
1E93D;ADLAM SMALL NHA; D; No_Joining_Group
1E93E;ADLAM SMALL VA; D; No_Joining_Group
1E93F;ADLAM SMALL KHA; D; No_Joining_Group
1E940;ADLAM SMALL GBE; D; No_Joining_Group
1E941;ADLAM SMALL ZAL; D; No_Joining_Group
1E942;ADLAM SMALL KPO; D; No_Joining_Group
1E943;ADLAM SMALL SHA; D; No_Joining_Group
# EOF

View File

@ -1,32 +1,34 @@
# BidiMirroring-8.0.0.txt
# Date: 2015-01-20, 18:30:00 GMT [KW, LI]
# BidiMirroring-10.0.0.txt
# Date: 2017-04-12, 17:30:00 GMT [KW, LI]
# © 2017 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Bidi_Mirroring_Glyph Property
#
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the Bidi_Mirrored=Yes property
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
# The repertoire covered by the file is Unicode 8.0.0.
#
# The repertoire covered by the file is Unicode 10.0.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
# Note that for "real" mirroring, a rendering engine needs to select
# appropriate alternative glyphs, and that many Unicode characters do not
# have a mirror-image Unicode character.
#
#
# Each mapping line contains two fields, separated by a semicolon (';').
# Each of the two fields contains a code point represented as a
# variable-length hexadecimal value with 4 to 6 digits.
# A comment indicates where the characters are "BEST FIT" mirroring.
#
# Code points for which Bidi_Mirrored=Yes, but for which no appropriate
#
# Code points for which Bidi_Mirrored=Yes, but for which no appropriate
# characters exist with mirrored glyphs, are
# listed as comments at the end of the file.
#
@ -36,14 +38,14 @@
# point has the default value for the Bidi_Mirroring_Glyph property,
# that means that no other character exists whose glyph is suitable
# for character-based mirroring.
#
#
# For information on bidi mirroring, see UAX #9: Unicode Bidirectional Algorithm,
# at http://www.unicode.org/unicode/reports/tr9/
#
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
# and for subsequent versions by Ken Whistler and Laurentiu Iancu.
#
#
# ############################################################
#
# Property: Bidi_Mirroring_Glyph

View File

@ -1,9 +1,9 @@
# Blocks-8.0.0.txt
# Date: 2014-11-10, 23:04:00 GMT [KW]
# Blocks-10.0.0.txt
# Date: 2017-04-12, 17:30:00 GMT [KW]
# © 2017 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Format:
@ -14,12 +14,12 @@
# Note: When comparing block names, casing, whitespace, hyphens,
# and underbars are ignored.
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
# For more information on the comparison of property values,
# For more information on the comparison of property values,
# see UAX #44: http://www.unicode.org/reports/tr44/
#
# All block ranges start with a value where (cp MOD 16) = 0,
# and end with a value where (cp MOD 16) = 15. In other words,
# the last hexadecimal digit of the start of range is ...0
# the last hexadecimal digit of the start of range is ...0
# and the last hexadecimal digit of the end of range is ...F.
# This constraint on block ranges guarantees that allocations
# are done in terms of whole columns, and that code chart display
@ -51,6 +51,7 @@
07C0..07FF; NKo
0800..083F; Samaritan
0840..085F; Mandaic
0860..086F; Syriac Supplement
08A0..08FF; Arabic Extended-A
0900..097F; Devanagari
0980..09FF; Bengali
@ -93,6 +94,7 @@
1BC0..1BFF; Batak
1C00..1C4F; Lepcha
1C50..1C7F; Ol Chiki
1C80..1C8F; Cyrillic Extended-C
1CC0..1CCF; Sundanese Supplement
1CD0..1CFF; Vedic Extensions
1D00..1D7F; Phonetic Extensions
@ -209,6 +211,7 @@ FFF0..FFFF; Specials
10400..1044F; Deseret
10450..1047F; Shavian
10480..104AF; Osmanya
104B0..104FF; Osage
10500..1052F; Elbasan
10530..1056F; Caucasian Albanian
10600..1077F; Linear A
@ -243,13 +246,20 @@ FFF0..FFFF; Specials
11280..112AF; Multani
112B0..112FF; Khudawadi
11300..1137F; Grantha
11400..1147F; Newa
11480..114DF; Tirhuta
11580..115FF; Siddham
11600..1165F; Modi
11660..1167F; Mongolian Supplement
11680..116CF; Takri
11700..1173F; Ahom
118A0..118FF; Warang Citi
11A00..11A4F; Zanabazar Square
11A50..11AAF; Soyombo
11AC0..11AFF; Pau Cin Hau
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
11D00..11D5F; Masaram Gondi
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
@ -260,7 +270,12 @@ FFF0..FFFF; Specials
16AD0..16AFF; Bassa Vah
16B00..16B8F; Pahawh Hmong
16F00..16F9F; Miao
16FE0..16FFF; Ideographic Symbols and Punctuation
17000..187FF; Tangut
18800..18AFF; Tangut Components
1B000..1B0FF; Kana Supplement
1B100..1B12F; Kana Extended-A
1B170..1B2FF; Nushu
1BC00..1BC9F; Duployan
1BCA0..1BCAF; Shorthand Format Controls
1D000..1D0FF; Byzantine Musical Symbols
@ -270,7 +285,9 @@ FFF0..FFFF; Specials
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1E000..1E02F; Glagolitic Supplement
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
@ -289,6 +306,7 @@ FFF0..FFFF; Specials
2A700..2B73F; CJK Unified Ideographs Extension C
2B740..2B81F; CJK Unified Ideographs Extension D
2B820..2CEAF; CJK Unified Ideographs Extension E
2CEB0..2EBEF; CJK Unified Ideographs Extension F
2F800..2FA1F; CJK Compatibility Ideographs Supplement
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement

View File

@ -1,10 +1,11 @@
# CaseFolding-8.0.0.txt
# Date: 2015-01-13, 18:16:36 GMT [MD]
# CaseFolding-10.0.0.txt
# Date: 2017-04-14, 05:40:18 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Case Folding Properties
#
@ -23,7 +24,7 @@
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, including how to have case folding
# For information on case folding, including how to have case folding
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
# The Unicode Standard.
#
@ -593,6 +594,15 @@
13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
@ -1163,6 +1173,7 @@ A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
@ -1327,6 +1338,42 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW
104B0; C; 104D8; # OSAGE CAPITAL LETTER A
104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
104B7; C; 104DF; # OSAGE CAPITAL LETTER E
104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
104BB; C; 104E3; # OSAGE CAPITAL LETTER I
104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
104C2; C; 104EA; # OSAGE CAPITAL LETTER O
104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
104CE; C; 104F6; # OSAGE CAPITAL LETTER U
104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
@ -1410,5 +1457,39 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
1E913; C; 1E935; # ADLAM CAPITAL LETTER U
1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
#
# EOF

View File

@ -1,10 +1,11 @@
# DerivedAge-8.0.0.txt
# Date: 2015-02-13, 13:30:18 GMT [MD]
# DerivedAge-10.0.0.txt
# Date: 2017-04-14, 05:40:18 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Unicode Character Database: Derived Property Data
# This file shows when various code points were first assigned in Unicode.
@ -12,7 +13,7 @@
# Notes:
#
# - The term 'assigned' means that a previously reserved code point was assigned
# to be a character (graphic, format, control, or private-use);
# to be a character (graphic, format, control, or private-use);
# a noncharacter code point; or a surrogate code point.
# For more information, see The Unicode Standard Section 2.4
#
@ -1535,4 +1536,115 @@ FE2E..FE2F ; 8.0 # [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYRILL
# Total code points: 7716
# ================================================
# Age=V9_0
# Newly assigned in Unicode 9.0.0 (June, 2016)
08B6..08BD ; 9.0 # [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08D4..08E1 ; 9.0 # [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
08E2 ; 9.0 # ARABIC DISPUTED END OF AYAH
0C80 ; 9.0 # KANNADA SIGN SPACING CANDRABINDU
0D4F ; 9.0 # MALAYALAM SIGN PARA
0D54..0D56 ; 9.0 # [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
0D58..0D5E ; 9.0 # [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
0D76..0D78 ; 9.0 # [3] MALAYALAM FRACTION ONE SIXTEENTH..MALAYALAM FRACTION THREE SIXTEENTHS
1C80..1C88 ; 9.0 # [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1DFB ; 9.0 # COMBINING DELETION MARK
23FB..23FE ; 9.0 # [4] POWER SYMBOL..POWER SLEEP SYMBOL
2E43..2E44 ; 9.0 # [2] DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK
A7AE ; 9.0 # LATIN CAPITAL LETTER SMALL CAPITAL I
A8C5 ; 9.0 # SAURASHTRA SIGN CANDRABINDU
1018D..1018E ; 9.0 # [2] GREEK INDICTION SIGN..NOMISMA SIGN
104B0..104D3 ; 9.0 # [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
104D8..104FB ; 9.0 # [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
1123E ; 9.0 # KHOJKI SIGN SUKUN
11400..11459 ; 9.0 # [90] NEWA LETTER A..NEWA DIGIT NINE
1145B ; 9.0 # NEWA PLACEHOLDER MARK
1145D ; 9.0 # NEWA INSERTION SIGN
11660..1166C ; 9.0 # [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
11C00..11C08 ; 9.0 # [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C36 ; 9.0 # [45] BHAIKSUKI LETTER E..BHAIKSUKI VOWEL SIGN VOCALIC L
11C38..11C45 ; 9.0 # [14] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI GAP FILLER-2
11C50..11C6C ; 9.0 # [29] BHAIKSUKI DIGIT ZERO..BHAIKSUKI HUNDREDS UNIT MARK
11C70..11C8F ; 9.0 # [32] MARCHEN HEAD MARK..MARCHEN LETTER A
11C92..11CA7 ; 9.0 # [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
11CA9..11CB6 ; 9.0 # [14] MARCHEN SUBJOINED LETTER YA..MARCHEN SIGN CANDRABINDU
16FE0 ; 9.0 # TANGUT ITERATION MARK
17000..187EC ; 9.0 # [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
18800..18AF2 ; 9.0 # [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
1E000..1E006 ; 9.0 # [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 ; 9.0 # [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E01B..1E021 ; 9.0 # [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
1E023..1E024 ; 9.0 # [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E026..1E02A ; 9.0 # [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
1E900..1E94A ; 9.0 # [75] ADLAM CAPITAL LETTER ALIF..ADLAM NUKTA
1E950..1E959 ; 9.0 # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1E95E..1E95F ; 9.0 # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
1F19B..1F1AC ; 9.0 # [18] SQUARED THREE D..SQUARED VOD
1F23B ; 9.0 # SQUARED CJK UNIFIED IDEOGRAPH-914D
1F57A ; 9.0 # MAN DANCING
1F5A4 ; 9.0 # BLACK HEART
1F6D1..1F6D2 ; 9.0 # [2] OCTAGONAL SIGN..SHOPPING TROLLEY
1F6F4..1F6F6 ; 9.0 # [3] SCOOTER..CANOE
1F919..1F91E ; 9.0 # [6] CALL ME HAND..HAND WITH INDEX AND MIDDLE FINGERS CROSSED
1F920..1F927 ; 9.0 # [8] FACE WITH COWBOY HAT..SNEEZING FACE
1F930 ; 9.0 # PREGNANT WOMAN
1F933..1F93E ; 9.0 # [12] SELFIE..HANDBALL
1F940..1F94B ; 9.0 # [12] WILTED FLOWER..MARTIAL ARTS UNIFORM
1F950..1F95E ; 9.0 # [15] CROISSANT..PANCAKES
1F985..1F991 ; 9.0 # [13] EAGLE..SQUID
# Total code points: 7500
# ================================================
# Age=V10_0
# Newly assigned in Unicode 10.0.0 (June, 2017)
0860..086A ; 10.0 # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
09FC..09FD ; 10.0 # [2] BENGALI LETTER VEDIC ANUSVARA..BENGALI ABBREVIATION SIGN
0AFA..0AFF ; 10.0 # [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
0D00 ; 10.0 # MALAYALAM SIGN COMBINING ANUSVARA ABOVE
0D3B..0D3C ; 10.0 # [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
1CF7 ; 10.0 # VEDIC SIGN ATIKRAMA
1DF6..1DF9 ; 10.0 # [4] COMBINING KAVYKA ABOVE RIGHT..COMBINING WIDE INVERTED BRIDGE BELOW
20BF ; 10.0 # BITCOIN SIGN
23FF ; 10.0 # OBSERVER EYE SYMBOL
2BD2 ; 10.0 # GROUP MARK
2E45..2E49 ; 10.0 # [5] INVERTED LOW KAVYKA..DOUBLE STACKED COMMA
312E ; 10.0 # BOPOMOFO LETTER O WITH DOT ABOVE
9FD6..9FEA ; 10.0 # [21] CJK UNIFIED IDEOGRAPH-9FD6..CJK UNIFIED IDEOGRAPH-9FEA
1032D..1032F ; 10.0 # [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE
11A00..11A47 ; 10.0 # [72] ZANABAZAR SQUARE LETTER A..ZANABAZAR SQUARE SUBJOINER
11A50..11A83 ; 10.0 # [52] SOYOMBO LETTER A..SOYOMBO LETTER KSSA
11A86..11A9C ; 10.0 # [23] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO MARK DOUBLE SHAD
11A9E..11AA2 ; 10.0 # [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
11D00..11D06 ; 10.0 # [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
11D08..11D09 ; 10.0 # [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
11D0B..11D36 ; 10.0 # [44] MASARAM GONDI LETTER AU..MASARAM GONDI VOWEL SIGN VOCALIC R
11D3A ; 10.0 # MASARAM GONDI VOWEL SIGN E
11D3C..11D3D ; 10.0 # [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
11D3F..11D47 ; 10.0 # [9] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI RA-KARA
11D50..11D59 ; 10.0 # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
16FE1 ; 10.0 # NUSHU ITERATION MARK
1B002..1B11E ; 10.0 # [285] HENTAIGANA LETTER A-1..HENTAIGANA LETTER N-MU-MO-2
1B170..1B2FB ; 10.0 # [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
1F260..1F265 ; 10.0 # [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F6D3..1F6D4 ; 10.0 # [2] STUPA..PAGODA
1F6F7..1F6F8 ; 10.0 # [2] SLED..FLYING SAUCER
1F900..1F90B ; 10.0 # [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F91F ; 10.0 # I LOVE YOU HAND SIGN
1F928..1F92F ; 10.0 # [8] FACE WITH ONE EYEBROW RAISED..SHOCKED FACE WITH EXPLODING HEAD
1F931..1F932 ; 10.0 # [2] BREAST-FEEDING..PALMS UP TOGETHER
1F94C ; 10.0 # CURLING STONE
1F95F..1F96B ; 10.0 # [13] DUMPLING..CANNED FOOD
1F992..1F997 ; 10.0 # [6] GIRAFFE FACE..CRICKET
1F9D0..1F9E6 ; 10.0 # [23] FACE WITH MONOCLE..SOCKS
2CEB0..2EBE0 ; 10.0 # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
# Total code points: 8518
# EOF

View File

@ -1,10 +1,11 @@
# DerivedNormalizationProps-8.0.0.txt
# Date: 2015-02-13, 13:30:23 GMT [MD]
# DerivedNormalizationProps-10.0.0.txt
# Date: 2017-02-14, 04:26:07 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -1679,12 +1680,12 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI
1F16A..1F16B ; NFKD_QC; N # So [2] RAISED MC SIGN..RAISED MD SIGN
1F190 ; NFKD_QC; N # So SQUARE DJ
1F200..1F202 ; NFKD_QC; N # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
1F210..1F23A ; NFKD_QC; N # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F210..1F23B ; NFKD_QC; N # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; NFKD_QC; N # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; NFKD_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 16893
# Total code points: 16894
# ================================================
@ -2082,12 +2083,12 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI
1F16A..1F16B ; NFKC_QC; N # So [2] RAISED MC SIGN..RAISED MD SIGN
1F190 ; NFKC_QC; N # So SQUARE DJ
1F200..1F202 ; NFKC_QC; N # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
1F210..1F23A ; NFKC_QC; N # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F210..1F23B ; NFKC_QC; N # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; NFKC_QC; N # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; NFKC_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 4793
# Total code points: 4794
# ================================================
@ -3513,6 +3514,14 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON
17B4..17B5 ; NFKC_CF; # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
180B..180D ; NFKC_CF; # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180E ; NFKC_CF; # Cf MONGOLIAN VOWEL SEPARATOR
1C80 ; NFKC_CF; 0432 # L& CYRILLIC SMALL LETTER ROUNDED VE
1C81 ; NFKC_CF; 0434 # L& CYRILLIC SMALL LETTER LONG-LEGGED DE
1C82 ; NFKC_CF; 043E # L& CYRILLIC SMALL LETTER NARROW O
1C83 ; NFKC_CF; 0441 # L& CYRILLIC SMALL LETTER WIDE ES
1C84..1C85 ; NFKC_CF; 0442 # L& [2] CYRILLIC SMALL LETTER TALL TE..CYRILLIC SMALL LETTER THREE-LEGGED TE
1C86 ; NFKC_CF; 044A # L& CYRILLIC SMALL LETTER TALL HARD SIGN
1C87 ; NFKC_CF; 0463 # L& CYRILLIC SMALL LETTER TALL YAT
1C88 ; NFKC_CF; A64B # L& CYRILLIC SMALL LETTER UNBLENDED UK
1D2C ; NFKC_CF; 0061 # Lm MODIFIER LETTER CAPITAL A
1D2D ; NFKC_CF; 00E6 # Lm MODIFIER LETTER CAPITAL AE
1D2E ; NFKC_CF; 0062 # Lm MODIFIER LETTER CAPITAL B
@ -5263,6 +5272,7 @@ A7AA ; NFKC_CF; 0266 # L& LATIN CAPITAL LETTER H WITH H
A7AB ; NFKC_CF; 025C # L& LATIN CAPITAL LETTER REVERSED OPEN E
A7AC ; NFKC_CF; 0261 # L& LATIN CAPITAL LETTER SCRIPT G
A7AD ; NFKC_CF; 026C # L& LATIN CAPITAL LETTER L WITH BELT
A7AE ; NFKC_CF; 026A # L& LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0 ; NFKC_CF; 029E # L& LATIN CAPITAL LETTER TURNED K
A7B1 ; NFKC_CF; 0287 # L& LATIN CAPITAL LETTER TURNED T
A7B2 ; NFKC_CF; 029D # L& LATIN CAPITAL LETTER J WITH CROSSED-TAIL
@ -6731,6 +6741,42 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF
10425 ; NFKC_CF; 1044D # L& DESERET CAPITAL LETTER ENG
10426 ; NFKC_CF; 1044E # L& DESERET CAPITAL LETTER OI
10427 ; NFKC_CF; 1044F # L& DESERET CAPITAL LETTER EW
104B0 ; NFKC_CF; 104D8 # L& OSAGE CAPITAL LETTER A
104B1 ; NFKC_CF; 104D9 # L& OSAGE CAPITAL LETTER AI
104B2 ; NFKC_CF; 104DA # L& OSAGE CAPITAL LETTER AIN
104B3 ; NFKC_CF; 104DB # L& OSAGE CAPITAL LETTER AH
104B4 ; NFKC_CF; 104DC # L& OSAGE CAPITAL LETTER BRA
104B5 ; NFKC_CF; 104DD # L& OSAGE CAPITAL LETTER CHA
104B6 ; NFKC_CF; 104DE # L& OSAGE CAPITAL LETTER EHCHA
104B7 ; NFKC_CF; 104DF # L& OSAGE CAPITAL LETTER E
104B8 ; NFKC_CF; 104E0 # L& OSAGE CAPITAL LETTER EIN
104B9 ; NFKC_CF; 104E1 # L& OSAGE CAPITAL LETTER HA
104BA ; NFKC_CF; 104E2 # L& OSAGE CAPITAL LETTER HYA
104BB ; NFKC_CF; 104E3 # L& OSAGE CAPITAL LETTER I
104BC ; NFKC_CF; 104E4 # L& OSAGE CAPITAL LETTER KA
104BD ; NFKC_CF; 104E5 # L& OSAGE CAPITAL LETTER EHKA
104BE ; NFKC_CF; 104E6 # L& OSAGE CAPITAL LETTER KYA
104BF ; NFKC_CF; 104E7 # L& OSAGE CAPITAL LETTER LA
104C0 ; NFKC_CF; 104E8 # L& OSAGE CAPITAL LETTER MA
104C1 ; NFKC_CF; 104E9 # L& OSAGE CAPITAL LETTER NA
104C2 ; NFKC_CF; 104EA # L& OSAGE CAPITAL LETTER O
104C3 ; NFKC_CF; 104EB # L& OSAGE CAPITAL LETTER OIN
104C4 ; NFKC_CF; 104EC # L& OSAGE CAPITAL LETTER PA
104C5 ; NFKC_CF; 104ED # L& OSAGE CAPITAL LETTER EHPA
104C6 ; NFKC_CF; 104EE # L& OSAGE CAPITAL LETTER SA
104C7 ; NFKC_CF; 104EF # L& OSAGE CAPITAL LETTER SHA
104C8 ; NFKC_CF; 104F0 # L& OSAGE CAPITAL LETTER TA
104C9 ; NFKC_CF; 104F1 # L& OSAGE CAPITAL LETTER EHTA
104CA ; NFKC_CF; 104F2 # L& OSAGE CAPITAL LETTER TSA
104CB ; NFKC_CF; 104F3 # L& OSAGE CAPITAL LETTER EHTSA
104CC ; NFKC_CF; 104F4 # L& OSAGE CAPITAL LETTER TSHA
104CD ; NFKC_CF; 104F5 # L& OSAGE CAPITAL LETTER DHA
104CE ; NFKC_CF; 104F6 # L& OSAGE CAPITAL LETTER U
104CF ; NFKC_CF; 104F7 # L& OSAGE CAPITAL LETTER WA
104D0 ; NFKC_CF; 104F8 # L& OSAGE CAPITAL LETTER KHA
104D1 ; NFKC_CF; 104F9 # L& OSAGE CAPITAL LETTER GHA
104D2 ; NFKC_CF; 104FA # L& OSAGE CAPITAL LETTER ZA
104D3 ; NFKC_CF; 104FB # L& OSAGE CAPITAL LETTER ZHA
10C80 ; NFKC_CF; 10CC0 # L& OLD HUNGARIAN CAPITAL LETTER A
10C81 ; NFKC_CF; 10CC1 # L& OLD HUNGARIAN CAPITAL LETTER AA
10C82 ; NFKC_CF; 10CC2 # L& OLD HUNGARIAN CAPITAL LETTER EB
@ -7819,6 +7865,40 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF
1D7FD ; NFKC_CF; 0037 # Nd MATHEMATICAL MONOSPACE DIGIT SEVEN
1D7FE ; NFKC_CF; 0038 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT
1D7FF ; NFKC_CF; 0039 # Nd MATHEMATICAL MONOSPACE DIGIT NINE
1E900 ; NFKC_CF; 1E922 # L& ADLAM CAPITAL LETTER ALIF
1E901 ; NFKC_CF; 1E923 # L& ADLAM CAPITAL LETTER DAALI
1E902 ; NFKC_CF; 1E924 # L& ADLAM CAPITAL LETTER LAAM
1E903 ; NFKC_CF; 1E925 # L& ADLAM CAPITAL LETTER MIIM
1E904 ; NFKC_CF; 1E926 # L& ADLAM CAPITAL LETTER BA
1E905 ; NFKC_CF; 1E927 # L& ADLAM CAPITAL LETTER SINNYIIYHE
1E906 ; NFKC_CF; 1E928 # L& ADLAM CAPITAL LETTER PE
1E907 ; NFKC_CF; 1E929 # L& ADLAM CAPITAL LETTER BHE
1E908 ; NFKC_CF; 1E92A # L& ADLAM CAPITAL LETTER RA
1E909 ; NFKC_CF; 1E92B # L& ADLAM CAPITAL LETTER E
1E90A ; NFKC_CF; 1E92C # L& ADLAM CAPITAL LETTER FA
1E90B ; NFKC_CF; 1E92D # L& ADLAM CAPITAL LETTER I
1E90C ; NFKC_CF; 1E92E # L& ADLAM CAPITAL LETTER O
1E90D ; NFKC_CF; 1E92F # L& ADLAM CAPITAL LETTER DHA
1E90E ; NFKC_CF; 1E930 # L& ADLAM CAPITAL LETTER YHE
1E90F ; NFKC_CF; 1E931 # L& ADLAM CAPITAL LETTER WAW
1E910 ; NFKC_CF; 1E932 # L& ADLAM CAPITAL LETTER NUN
1E911 ; NFKC_CF; 1E933 # L& ADLAM CAPITAL LETTER KAF
1E912 ; NFKC_CF; 1E934 # L& ADLAM CAPITAL LETTER YA
1E913 ; NFKC_CF; 1E935 # L& ADLAM CAPITAL LETTER U
1E914 ; NFKC_CF; 1E936 # L& ADLAM CAPITAL LETTER JIIM
1E915 ; NFKC_CF; 1E937 # L& ADLAM CAPITAL LETTER CHI
1E916 ; NFKC_CF; 1E938 # L& ADLAM CAPITAL LETTER HA
1E917 ; NFKC_CF; 1E939 # L& ADLAM CAPITAL LETTER QAAF
1E918 ; NFKC_CF; 1E93A # L& ADLAM CAPITAL LETTER GA
1E919 ; NFKC_CF; 1E93B # L& ADLAM CAPITAL LETTER NYA
1E91A ; NFKC_CF; 1E93C # L& ADLAM CAPITAL LETTER TU
1E91B ; NFKC_CF; 1E93D # L& ADLAM CAPITAL LETTER NHA
1E91C ; NFKC_CF; 1E93E # L& ADLAM CAPITAL LETTER VA
1E91D ; NFKC_CF; 1E93F # L& ADLAM CAPITAL LETTER KHA
1E91E ; NFKC_CF; 1E940 # L& ADLAM CAPITAL LETTER GBE
1E91F ; NFKC_CF; 1E941 # L& ADLAM CAPITAL LETTER ZAL
1E920 ; NFKC_CF; 1E942 # L& ADLAM CAPITAL LETTER KPO
1E921 ; NFKC_CF; 1E943 # L& ADLAM CAPITAL LETTER SHA
1EE00 ; NFKC_CF; 0627 # Lo ARABIC MATHEMATICAL ALEF
1EE01 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL BEH
1EE02 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL JEEM
@ -8083,6 +8163,7 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF
1F238 ; NFKC_CF; 7533 # So SQUARED CJK UNIFIED IDEOGRAPH-7533
1F239 ; NFKC_CF; 5272 # So SQUARED CJK UNIFIED IDEOGRAPH-5272
1F23A ; NFKC_CF; 55B6 # So SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F23B ; NFKC_CF; 914D # So SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240 ; NFKC_CF; 3014 672C 3015 # So TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C
1F241 ; NFKC_CF; 3014 4E09 3015 # So TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09
1F242 ; NFKC_CF; 3014 4E8C 3015 # So TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E8C
@ -8634,7 +8715,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] <reserved-E0080>..<reserved-E
E0100..E01EF ; NFKC_CF; # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
# Total code points: 10146
# Total code points: 10227
# ================================================
@ -8972,6 +9053,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-
17B4..17B5 ; Changes_When_NFKC_Casefolded # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
180B..180D ; Changes_When_NFKC_Casefolded # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
180E ; Changes_When_NFKC_Casefolded # Cf MONGOLIAN VOWEL SEPARATOR
1C80..1C88 ; Changes_When_NFKC_Casefolded # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1D2C..1D2E ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B
1D30..1D3A ; Changes_When_NFKC_Casefolded # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N
1D3C..1D4D ; Changes_When_NFKC_Casefolded # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G
@ -9389,7 +9471,7 @@ A7A2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER K W
A7A4 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
A7A6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
A7A8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
A7AA..A7AD ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER L WITH BELT
A7AA..A7AE ; Changes_When_NFKC_Casefolded # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0..A7B4 ; Changes_When_NFKC_Casefolded # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA
A7B6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER OMEGA
A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@ -9519,6 +9601,7 @@ FFE9..FFEC ; Changes_When_NFKC_Casefolded # Sm [4] HALFWIDTH LEFTWARDS ARRO
FFED..FFEE ; Changes_When_NFKC_Casefolded # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] <reserved-FFF0>..<reserved-FFF8>
10400..10427 ; Changes_When_NFKC_Casefolded # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
104B0..104D3 ; Changes_When_NFKC_Casefolded # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
10C80..10CB2 ; Changes_When_NFKC_Casefolded # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
118A0..118BF ; Changes_When_NFKC_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
1BCA0..1BCA3 ; Changes_When_NFKC_Casefolded # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
@ -9566,6 +9649,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] <reserved-FFF0>..<reserv
1D7C3 ; Changes_When_NFKC_Casefolded # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
1D7C4..1D7CB ; Changes_When_NFKC_Casefolded # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
1D7CE..1D7FF ; Changes_When_NFKC_Casefolded # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1E900..1E921 ; Changes_When_NFKC_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1EE00..1EE03 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
1EE05..1EE1F ; Changes_When_NFKC_Casefolded # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
1EE21..1EE22 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
@ -9605,7 +9689,7 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] <reserved-FFF0>..<reserv
1F16A..1F16B ; Changes_When_NFKC_Casefolded # So [2] RAISED MC SIGN..RAISED MD SIGN
1F190 ; Changes_When_NFKC_Casefolded # So SQUARE DJ
1F200..1F202 ; Changes_When_NFKC_Casefolded # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
1F210..1F23A ; Changes_When_NFKC_Casefolded # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F210..1F23B ; Changes_When_NFKC_Casefolded # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; Changes_When_NFKC_Casefolded # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Changes_When_NFKC_Casefolded # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
2F800..2FA1D ; Changes_When_NFKC_Casefolded # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
@ -9617,6 +9701,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] <reserved-E0080>..<reser
E0100..E01EF ; Changes_When_NFKC_Casefolded # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
E01F0..E0FFF ; Changes_When_NFKC_Casefolded # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
# Total code points: 10146
# Total code points: 10227
# EOF

View File

@ -1,12 +1,14 @@
# NormalizationCorrections-8.0.0.txt
# Date: 2015-03-07, 01:30:00 GMT [KW, LI]
# NormalizationCorrections-10.0.0.txt
# Date: 2017-04-13, 01:00:00 GMT [KW, LI]
# © 2017 Unicode®, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# The normalization stability policy of the Unicode Consortium
# ordinarily precludes any change to the decomposition
# for any character, once established in a relevant version
@ -25,7 +27,7 @@
#
# Currently this list has exactly six entries in it, one for the
# typo found and corrected in Corrigendum #3, and five for
# the typos and misidentifications found and corrected in
# the typos and misidentifications found and corrected in
# Corrigendum #4. All efforts
# will be made to keep the entries limited to just those fixes.
#
@ -35,7 +37,7 @@
# Field 2: Corrected decomposition
# Field 3: Version of Unicode for which the correction was
# entered into UnicodeData.txt, in n.n.n format.
# Comment: Indicates the Unicode Corrigendum which documents
# Comment: Indicates the Unicode Corrigendum which documents
# the correction
#
# For more information, see UAX #15, Unicode Normalization Forms.

View File

@ -1,10 +1,11 @@
# Scripts-8.0.0.txt
# Date: 2015-03-11, 22:29:42 GMT [MD]
# Scripts-10.0.0.txt
# Date: 2017-03-11, 06:40:37 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
# For more information, see:
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
# Especially the sections:
@ -92,10 +93,10 @@
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
061C ; Common # Cf ARABIC LETTER MARK
061F ; Common # Po ARABIC QUESTION MARK
0640 ; Common # Lm ARABIC TATWEEL
06DD ; Common # Cf ARABIC END OF AYAH
08E2 ; Common # Cf ARABIC DISPUTED END OF AYAH
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
@ -110,6 +111,7 @@
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
1CF7 ; Common # Mc VEDIC SIGN ATIKRAMA
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
200B ; Common # Cf ZERO WIDTH SPACE
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
@ -153,7 +155,7 @@
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20BE ; Common # Sc [31] EURO-CURRENCY SIGN..LARI SIGN
20A0..20BF ; Common # Sc [32] EURO-CURRENCY SIGN..BITCOIN SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
@ -223,8 +225,7 @@
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23FA ; Common # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
23E2..2426 ; Common # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
@ -309,7 +310,7 @@
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BB9 ; Common # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
2BBD..2BC8 ; Common # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
2BCA..2BD1 ; Common # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
2BCA..2BD2 ; Common # So [9] TOP HALF BLACK CIRCLE..GROUP MARK
2BEC..2BEF ; Common # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
@ -348,6 +349,7 @@
2E40 ; Common # Pd DOUBLE HYPHEN
2E41 ; Common # Po REVERSED COMMA
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E49 ; Common # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -572,19 +574,18 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
1F400..1F579 ; Common # So [378] RAT..JOYSTICK
1F57B..1F5A3 ; Common # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
1F5A5..1F6D0 ; Common # So [300] DESKTOP COMPUTER..PLACE OF WORSHIP
1F400..1F6D4 ; Common # So [725] RAT..PAGODA
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6F3 ; Common # So [4] SATELLITE..PASSENGER SHIP
1F6F0..1F6F8 ; Common # So [9] SATELLITE..FLYING SAUCER
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D4 ; Common # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
@ -592,13 +593,17 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F910..1F918 ; Common # So [9] ZIPPER-MOUTH FACE..SIGN OF THE HORNS
1F980..1F984 ; Common # So [5] CRAB..UNICORN FACE
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F910..1F93E ; Common # So [47] ZIPPER-MOUTH FACE..HANDBALL
1F940..1F94C ; Common # So [13] WILTED FLOWER..CURLING STONE
1F950..1F96B ; Common # So [28] CROISSANT..CANNED FOOD
1F980..1F997 ; Common # So [24] CRAB..CRICKET
1F9C0 ; Common # So CHEESE WEDGE
1F9D0..1F9E6 ; Common # So [23] FACE WITH MONOCLE..SOCKS
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 7179
# Total code points: 7363
# ================================================
@ -641,7 +646,7 @@ A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7AD ; Latin # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
A790..A7AE ; Latin # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0..A7B7 ; Latin # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@ -654,7 +659,7 @@ FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE S
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1349
# Total code points: 1350
# ================================================
@ -708,13 +713,13 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
1018C ; Greek # So GREEK SINUSOID SIGN
1018C..1018E ; Greek # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
101A0 ; Greek # So GREEK SYMBOL TAU RHO
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D245 ; Greek # So GREEK MUSICAL LEIMMA
# Total code points: 516
# Total code points: 518
# ================================================
@ -724,6 +729,7 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
@ -740,7 +746,7 @@ A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER
A69E..A69F ; Cyrillic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
# Total code points: 434
# Total code points: 443
# ================================================
@ -791,6 +797,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
060D ; Arabic # Po ARABIC DATE SEPARATOR
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061C ; Arabic # Cf ARABIC LETTER MARK
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
@ -815,6 +822,8 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD ; Arabic # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08D4..08E1 ; Arabic # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
@ -862,7 +871,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
# Total code points: 1257
# Total code points: 1280
# ================================================
@ -873,8 +882,9 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE
0860..086A ; Syriac # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
# Total code points: 77
# Total code points: 88
# ================================================
@ -944,8 +954,10 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
09FA ; Bengali # So BENGALI ISSHAR
09FB ; Bengali # Sc BENGALI GANDA MARK
09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA
09FD ; Bengali # Po BENGALI ABBREVIATION SIGN
# Total code points: 93
# Total code points: 95
# ================================================
@ -998,8 +1010,9 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
0AF0 ; Gujarati # Po GUJARATI ABBREVIATION SIGN
0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN
0AF9 ; Gujarati # Lo GUJARATI LETTER ZHA
0AFA..0AFF ; Gujarati # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
# Total code points: 85
# Total code points: 91
# ================================================
@ -1086,6 +1099,7 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
# ================================================
0C80 ; Kannada # Lo KANNADA SIGN SPACING CANDRABINDU
0C81 ; Kannada # Mn KANNADA SIGN CANDRABINDU
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@ -1109,15 +1123,16 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
# Total code points: 87
# Total code points: 88
# ================================================
0D01 ; Malayalam # Mn MALAYALAM SIGN CANDRABINDU
0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA
0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
@ -1125,15 +1140,18 @@ A8FD ; Devanagari # Lo DEVANAGARI JAIN OM
0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA
0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH
0D4F ; Malayalam # So MALAYALAM SIGN PARA
0D54..0D56 ; Malayalam # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK
0D58..0D5E ; Malayalam # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
0D5F..0D61 ; Malayalam # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0D70..0D75 ; Malayalam # No [6] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
0D70..0D78 ; Malayalam # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 100
# Total code points: 117
# ================================================
@ -1436,21 +1454,24 @@ AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETT
1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; Mongolian # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; Mongolian # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
1880..1884 ; Mongolian # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
1885..1886 ; Mongolian # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
1887..18A8 ; Mongolian # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA
18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
11660..1166C ; Mongolian # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
# Total code points: 153
# Total code points: 166
# ================================================
3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI
1B001 ; Hiragana # Lo HIRAGANA LETTER ARCHAIC YE
1B001..1B11E ; Hiragana # Lo [286] HIRAGANA LETTER ARCHAIC YE..HENTAIGANA LETTER N-MU-MO-2
1F200 ; Hiragana # So SQUARE HIRAGANA HOKA
# Total code points: 91
# Total code points: 376
# ================================================
@ -1469,10 +1490,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
# ================================================
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
3105..312D ; Bopomofo # Lo [41] BOPOMOFO LETTER B..BOPOMOFO LETTER IH
3105..312E ; Bopomofo # Lo [42] BOPOMOFO LETTER B..BOPOMOFO LETTER O WITH DOT ABOVE
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
# Total code points: 70
# Total code points: 71
# ================================================
@ -1485,16 +1506,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FD5 ; Han # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
4E00..9FEA ; Han # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 81734
# Total code points: 89228
# ================================================
@ -1509,8 +1531,9 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
10300..1031F ; Old_Italic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
1032D..1032F ; Old_Italic # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE
# Total code points: 36
# Total code points: 39
# ================================================
@ -1542,8 +1565,8 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DF5 ; Inherited # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DC0..1DF9 ; Inherited # Mn [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
1DFB..1DFF ; Inherited # Mn [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
@ -1562,7 +1585,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 563
# Total code points: 568
# ================================================
@ -1705,8 +1728,13 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
2C00..2C2E ; Glagolitic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; Glagolitic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
1E000..1E006 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
1E008..1E018 ; Glagolitic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
1E01B..1E021 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
1E023..1E024 ; Glagolitic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
1E026..1E02A ; Glagolitic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
# Total code points: 94
# Total code points: 132
# ================================================
@ -1872,11 +1900,11 @@ A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4 ; Saurashtra # Mn SAURASHTRA SIGN VIRAMA
A8C4..A8C5 ; Saurashtra # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
# Total code points: 81
# Total code points: 82
# ================================================
@ -2314,8 +2342,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11235 ; Khojki # Mc KHOJKI SIGN VIRAMA
11236..11237 ; Khojki # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
11238..1123D ; Khojki # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
1123E ; Khojki # Mn KHOJKI SIGN SUKUN
# Total code points: 61
# Total code points: 62
# ================================================
@ -2536,4 +2565,129 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 672
# ================================================
1E900..1E943 ; Adlam # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
1E944..1E94A ; Adlam # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
1E950..1E959 ; Adlam # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1E95E..1E95F ; Adlam # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
# Total code points: 87
# ================================================
11C00..11C08 ; Bhaiksuki # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; Bhaiksuki # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; Bhaiksuki # Mc BHAIKSUKI VOWEL SIGN AA
11C30..11C36 ; Bhaiksuki # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
11C38..11C3D ; Bhaiksuki # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
11C3E ; Bhaiksuki # Mc BHAIKSUKI SIGN VISARGA
11C3F ; Bhaiksuki # Mn BHAIKSUKI SIGN VIRAMA
11C40 ; Bhaiksuki # Lo BHAIKSUKI SIGN AVAGRAHA
11C41..11C45 ; Bhaiksuki # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
11C50..11C59 ; Bhaiksuki # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11C5A..11C6C ; Bhaiksuki # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
# Total code points: 97
# ================================================
11C70..11C71 ; Marchen # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
11C72..11C8F ; Marchen # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A
11C92..11CA7 ; Marchen # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
11CA9 ; Marchen # Mc MARCHEN SUBJOINED LETTER YA
11CAA..11CB0 ; Marchen # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
11CB1 ; Marchen # Mc MARCHEN VOWEL SIGN I
11CB2..11CB3 ; Marchen # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
11CB4 ; Marchen # Mc MARCHEN VOWEL SIGN O
11CB5..11CB6 ; Marchen # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
# Total code points: 68
# ================================================
11400..11434 ; Newa # Lo [53] NEWA LETTER A..NEWA LETTER HA
11435..11437 ; Newa # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
11438..1143F ; Newa # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
11440..11441 ; Newa # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
11442..11444 ; Newa # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
11445 ; Newa # Mc NEWA SIGN VISARGA
11446 ; Newa # Mn NEWA SIGN NUKTA
11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
1145B ; Newa # Po NEWA PLACEHOLDER MARK
1145D ; Newa # Po NEWA INSERTION SIGN
# Total code points: 92
# ================================================
104B0..104D3 ; Osage # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
104D8..104FB ; Osage # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
# Total code points: 72
# ================================================
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
17000..187EC ; Tangut # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
# Total code points: 6881
# ================================================
11D00..11D06 ; Masaram_Gondi # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
11D08..11D09 ; Masaram_Gondi # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
11D0B..11D30 ; Masaram_Gondi # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
11D31..11D36 ; Masaram_Gondi # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
11D3A ; Masaram_Gondi # Mn MASARAM GONDI VOWEL SIGN E
11D3C..11D3D ; Masaram_Gondi # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
11D3F..11D45 ; Masaram_Gondi # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
11D46 ; Masaram_Gondi # Lo MASARAM GONDI REPHA
11D47 ; Masaram_Gondi # Mn MASARAM GONDI RA-KARA
11D50..11D59 ; Masaram_Gondi # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
# Total code points: 75
# ================================================
16FE1 ; Nushu # Lm NUSHU ITERATION MARK
1B170..1B2FB ; Nushu # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
# Total code points: 397
# ================================================
11A50 ; Soyombo # Lo SOYOMBO LETTER A
11A51..11A56 ; Soyombo # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
11A57..11A58 ; Soyombo # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
11A59..11A5B ; Soyombo # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
11A5C..11A83 ; Soyombo # Lo [40] SOYOMBO LETTER KA..SOYOMBO LETTER KSSA
11A86..11A89 ; Soyombo # Lo [4] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A8A..11A96 ; Soyombo # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
11A97 ; Soyombo # Mc SOYOMBO SIGN VISARGA
11A98..11A99 ; Soyombo # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
11A9A..11A9C ; Soyombo # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
11A9E..11AA2 ; Soyombo # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
# Total code points: 80
# ================================================
11A00 ; Zanabazar_Square # Lo ZANABAZAR SQUARE LETTER A
11A01..11A06 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O
11A07..11A08 ; Zanabazar_Square # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU
11A09..11A0A ; Zanabazar_Square # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK
11A0B..11A32 ; Zanabazar_Square # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
11A33..11A38 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
11A39 ; Zanabazar_Square # Mc ZANABAZAR SQUARE SIGN VISARGA
11A3A ; Zanabazar_Square # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
11A3B..11A3E ; Zanabazar_Square # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
11A3F..11A46 ; Zanabazar_Square # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
11A47 ; Zanabazar_Square # Mn ZANABAZAR SQUARE SUBJOINER
# Total code points: 72
# EOF

View File

@ -1,10 +1,11 @@
# SpecialCasing-8.0.0.txt
# Date: 2014-12-16, 23:08:04 GMT [MD]
# SpecialCasing-10.0.0.txt
# Date: 2017-04-14, 05:40:43 GMT
# © 2017 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Special Casing
#
@ -196,7 +197,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
# ================================================================================
# Conditional Mappings
# The remainder of this file provides conditional casing data used to produce
# The remainder of this file provides conditional casing data used to produce
# full case mappings.
# ================================================================================
# Language-Insensitive Mappings

File diff suppressed because it is too large Load Diff

View File

@ -38,8 +38,8 @@
#include <private/qunicodetables_p.h>
#endif
#define DATA_VERSION_S "8.0"
#define DATA_VERSION_STR "QChar::Unicode_8_0"
#define DATA_VERSION_S "10.0"
#define DATA_VERSION_STR "QChar::Unicode_10_0"
static QHash<QByteArray, QChar::UnicodeVersion> age_map;
@ -67,6 +67,8 @@ static void initAgeMap()
{ QChar::Unicode_6_3, "6.3" },
{ QChar::Unicode_7_0, "7.0" },
{ QChar::Unicode_8_0, "8.0" },
{ QChar::Unicode_9_0, "9.0" },
{ QChar::Unicode_10_0, "10.0" },
{ QChar::Unicode_Unassigned, 0 }
};
AgeMap *d = ageMap;
@ -722,6 +724,20 @@ static void initScriptMap()
{ QChar::Script_Multani, "Multani" },
{ QChar::Script_OldHungarian, "OldHungarian" },
{ QChar::Script_SignWriting, "SignWriting" },
// 9.0
{ QChar::Script_Adlam, "Adlam" },
{ QChar::Script_Bhaiksuki, "Bhaiksuki" },
{ QChar::Script_Marchen, "Marchen" },
{ QChar::Script_Newa, "Newa" },
{ QChar::Script_Osage, "Osage" },
{ QChar::Script_Tangut, "Tangut" },
// 10.0
{ QChar::Script_MasaramGondi, "MasaramGondi" },
{ QChar::Script_Nushu, "Nushu" },
{ QChar::Script_Soyombo, "Soyombo" },
{ QChar::Script_ZanabazarSquare, "ZanabazarSquare" },
// unhandled
{ QChar::Script_Unknown, 0 }
};