ICU-4334 refresh Unicode 4.1 data

X-SVN-Rev: 17331
This commit is contained in:
Markus Scherer 2005-03-11 23:31:44 +00:00
parent 6e1070b4b2
commit 1fa2571e90
25 changed files with 1596 additions and 1480 deletions

View File

@ -43,6 +43,8 @@
#include "uassert.h"
#include "hash.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
// initial storage. Must be >= 0
// *** same as in uniset.cpp ! ***
#define START_EXTRA 16
@ -101,16 +103,15 @@ static const struct C99_Map {
// MUST be in SORTED order
{ "alnum", u_isalnum, UPROPS_SRC_CHAR },
{ "blank", u_isblank, UPROPS_SRC_PROPSVEC },
{ "cntrl", u_iscntrl, UPROPS_SRC_CHAR },
{ "digit", u_isdigit, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "cntrl", u_iscntrl, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "digit", u_isdigit, UPROPS_SRC_CHAR },
{ "graph", u_isgraph, UPROPS_SRC_CHAR },
{ "print", u_isprint, UPROPS_SRC_CHAR },
{ "punct", u_ispunct, UPROPS_SRC_CHAR },
{ "space", u_isspace, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "punct", u_ispunct, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "space", u_isspace, UPROPS_SRC_CHAR },
{ "title", u_istitle, UPROPS_SRC_CHAR },
{ "xdigit", u_isxdigit, UPROPS_SRC_CHAR }
};
#define C99_COUNT (10)
// TEMPORARY: Remove when deprecated category code constructor is removed.
static const UChar CATEGORY_NAMES[] = {
@ -1085,7 +1086,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
// TODO: Remove the following special-case code when
// these four C99-compatibility properties are implemented
// as enums/names.
for (int32_t i=0; i<C99_COUNT; ++i) {
for (int32_t i=0; i<LENGTHOF(C99_DISPATCH); ++i) {
int32_t c = uprv_comparePropertyNames(pname, C99_DISPATCH[i].name);
if (c == 0) {
applyFilter(c99Filter, (void*) &C99_DISPATCH[i], C99_DISPATCH[i].src, ec);

View File

@ -1,5 +1,5 @@
# Blocks-4.1.0.txt
# Date: 2004-12-03, 15:20 [KW]
# Date: 2005-1-31, 16:50 [KW]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -104,7 +104,7 @@
2A00..2AFF; Supplemental Mathematical Operators
2B00..2BFF; Miscellaneous Symbols and Arrows
2C00..2C5F; Glagolitic
2C80..2C8F; Coptic
2C80..2CFF; Coptic
2D00..2D2F; Georgian Supplement
2D30..2D7F; Tifinagh
2D80..2DDF; Ethiopic Extended
@ -119,7 +119,7 @@
3130..318F; Hangul Compatibility Jamo
3190..319F; Kanbun
31A0..31BF; Bopomofo Extended
31C0..31EF; CJK Basic Strokes
31C0..31EF; CJK Strokes
31F0..31FF; Katakana Phonetic Extensions
3200..32FF; Enclosed CJK Letters and Months
3300..33FF; CJK Compatibility

View File

@ -1,5 +1,5 @@
# DerivedAge-4.1.0.txt
# Date: 2004-12-11, 05:35:45 GMT [MD]
# Date: 2005-02-26, 02:31:13 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -736,8 +736,7 @@ E0100..E01EF ; 4.0 # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
03FC..03FF ; 4.1 # [4] GREEK RHO WITH STROKE SYMBOL..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
04F6..04F7 ; 4.1 # [2] CYRILLIC CAPITAL LETTER GHE WITH DESCENDER..CYRILLIC SMALL LETTER GHE WITH DESCENDER
05A2 ; 4.1 # HEBREW ACCENT ATNAH HAFUKH
05BA ; 4.1 # HEBREW POINT QAMATS QATAN
05C5..05C6 ; 4.1 # [2] HEBREW MARK LOWER DOT..HEBREW PUNCTUATION NUN HAFUKHA
05C5..05C7 ; 4.1 # [3] HEBREW MARK LOWER DOT..HEBREW POINT QAMATS QATAN
060B ; 4.1 # AFGHANI SIGN
061E ; 4.1 # ARABIC TRIPLE DOT PUNCTUATION MARK
0659..065E ; 4.1 # [6] ARABIC ZWARAKAY..ARABIC FATHA WITH TWO DOTS
@ -763,7 +762,7 @@ E0100..E01EF ; 4.0 # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1980..19A9 ; 4.1 # [42] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW XVA
19B0..19C9 ; 4.1 # [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2
19D0..19D9 ; 4.1 # [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19DE..19DF ; 4.1 # [2] NEW TAI LUE SIGN LE..NEW TAI LUE SIGN LEW
19DE..19DF ; 4.1 # [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
1A00..1A1B ; 4.1 # [28] BUGINESE LETTER KA..BUGINESE VOWEL SIGN AE
1A1E..1A1F ; 4.1 # [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
1D6C..1DC3 ; 4.1 # [88] LATIN SMALL LETTER B WITH MIDDLE TILDE..COMBINING SUSPENSION MARK
@ -786,7 +785,7 @@ E0100..E01EF ; 4.0 # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
2C80..2CEA ; 4.1 # [107] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL SHIMA SIMA
2CF9..2D25 ; 4.1 # [45] COPTIC OLD NUBIAN FULL STOP..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; 4.1 # [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; 4.1 # TIFINAGH MODIFIER LETTER LABIALIZATION
2D6F ; 4.1 # TIFINAGH MODIFIER LETTER LABIALIZATION MARK
2D80..2D96 ; 4.1 # [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
2DA0..2DA6 ; 4.1 # [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
2DA8..2DAE ; 4.1 # [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
@ -798,7 +797,7 @@ E0100..E01EF ; 4.0 # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
2DD8..2DDE ; 4.1 # [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
2E00..2E17 ; 4.1 # [24] RIGHT ANGLE SUBSTITUTION MARKER..DOUBLE OBLIQUE HYPHEN
2E1C..2E1D ; 4.1 # [2] LEFT LOW PARAPHRASE BRACKET..RIGHT LOW PARAPHRASE BRACKET
31C0..31CF ; 4.1 # [16] CJK BASIC STROKE T..CJK BASIC STROKE N
31C0..31CF ; 4.1 # [16] CJK STROKE T..CJK STROKE N
327E ; 4.1 # CIRCLED HANGUL IEUNG U
9FA6..9FBB ; 4.1 # [22] CJK UNIFIED IDEOGRAPH-9FA6..CJK UNIFIED IDEOGRAPH-9FBB
A700..A716 ; 4.1 # [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR

View File

@ -1,5 +1,5 @@
# DerivedBidiClass-4.1.0.txt
# Date: 2004-12-11, 05:36:16 GMT [MD]
# Date: 2005-02-26, 02:31:45 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -42,8 +42,7 @@
038E..03A1 ; L # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03CE ; L # L& [44] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER OMEGA WITH TONOS
03D0..03F5 ; L # L& [38] GREEK BETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
03F7..03FB ; L # L& [5] GREEK CAPITAL LETTER SHO..GREEK SMALL LETTER SAN
03FD..0481 ; L # L& [133] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC SMALL LETTER KOPPA
03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
0482 ; L # So CYRILLIC THOUSANDS SIGN
048A..04CE ; L # L& [69] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EM WITH TAIL
04D0..04F9 ; L # L& [42] CYRILLIC CAPITAL LETTER A WITH BREVE..CYRILLIC SMALL LETTER YERU WITH DIAERESIS
@ -270,7 +269,8 @@
12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
1360 ; L # So ETHIOPIC SECTION MARK
1361..1368 ; L # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
13A0..13F4 ; L # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
@ -375,7 +375,7 @@
2C80..2CE4 ; L # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; L # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION
2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
2DA0..2DA6 ; L # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
2DA8..2DAE ; L # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
@ -525,18 +525,19 @@ F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD>
100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD>
# The above property value applies to 872685 code points not listed here.
# Total code points: 1102297
# Total code points: 1102298
# ================================================
# Bidi_Class=Right_To_Left
0590 ; R # Cn <reserved-0590>
05BA ; R # Cn <reserved-05BA>
05BE ; R # Po HEBREW PUNCTUATION MAQAF
05C0 ; R # Po HEBREW PUNCTUATION PASEQ
05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ
05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA
05C7..05CF ; R # Cn [9] <reserved-05C7>..<reserved-05CF>
05C8..05CF ; R # Cn [8] <reserved-05C8>..<reserved-05CF>
05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05EB..05EF ; R # Cn [5] <reserved-05EB>..<reserved-05EF>
05F0..05F2 ; R # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
@ -579,8 +580,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
10A19..10A33 ; R # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A34..10A37 ; R # Cn [4] <reserved-10A34>..<reserved-10A37>
10A3B..10A3E ; R # Cn [4] <reserved-10A3B>..<reserved-10A3E>
10A40..10A43 ; R # Nd [4] KHAROSHTHI DIGIT ONE..KHAROSHTHI DIGIT FOUR
10A44..10A47 ; R # No [4] KHAROSHTHI NUMBER TEN..KHAROSHTHI NUMBER ONE THOUSAND
10A40..10A47 ; R # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
10A48..10A4F ; R # Cn [8] <reserved-10A48>..<reserved-10A4F>
10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
10A59..10FFF ; R # Cn [1447] <reserved-10A59>..<reserved-10FFF>
@ -610,8 +610,16 @@ FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
002B ; ES # Sm PLUS SIGN
002D ; ES # Pd HYPHEN-MINUS
207A..207B ; ES # Sm [2] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT MINUS
208A..208B ; ES # Sm [2] SUBSCRIPT PLUS SIGN..SUBSCRIPT MINUS
2212 ; ES # Sm MINUS SIGN
FB29 ; ES # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN
FE62 ; ES # Sm SMALL PLUS SIGN
FE63 ; ES # Pd SMALL HYPHEN-MINUS
FF0B ; ES # Sm FULLWIDTH PLUS SIGN
FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS
# Total code points: 2
# Total code points: 12
# ================================================
@ -630,26 +638,19 @@ FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT
17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL
2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME
207A..207B ; ET # Sm [2] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT MINUS
208A..208B ; ET # Sm [2] SUBSCRIPT PLUS SIGN..SUBSCRIPT MINUS
20A0..20B5 ; ET # Sc [22] EURO-CURRENCY SIGN..CEDI SIGN
212E ; ET # So ESTIMATED SYMBOL
2212..2213 ; ET # Sm [2] MINUS SIGN..MINUS-OR-PLUS SIGN
FB29 ; ET # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN
2213 ; ET # Sm MINUS-OR-PLUS SIGN
FE5F ; ET # Po SMALL NUMBER SIGN
FE62 ; ET # Sm SMALL PLUS SIGN
FE63 ; ET # Pd SMALL HYPHEN-MINUS
FE69 ; ET # Sc SMALL DOLLAR SIGN
FE6A ; ET # Po SMALL PERCENT SIGN
FF03 ; ET # Po FULLWIDTH NUMBER SIGN
FF04 ; ET # Sc FULLWIDTH DOLLAR SIGN
FF05 ; ET # Po FULLWIDTH PERCENT SIGN
FF0B ; ET # Sm FULLWIDTH PLUS SIGN
FF0D ; ET # Pd FULLWIDTH HYPHEN-MINUS
FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
# Total code points: 65
# Total code points: 55
# ================================================
@ -767,7 +768,6 @@ FF1A ; CS # Po FULLWIDTH COLON
0384..0385 ; ON # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS
0387 ; ON # Po GREEK ANO TELEIA
03F6 ; ON # Sm GREEK REVERSED LUNATE EPSILON SYMBOL
03FC ; ON # L& GREEK RHO WITH STROKE SYMBOL
058A ; ON # Pd ARMENIAN HYPHEN
060E..060F ; ON # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
06E9 ; ON # So ARABIC PLACE OF SAJDAH
@ -786,7 +786,8 @@ FF1A ; CS # Po FULLWIDTH COLON
1807..180A ; ON # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
1940 ; ON # So LIMBU SIGN LOO
1944..1945 ; ON # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
19DE..19FF ; ON # So [34] NEW TAI LUE SIGN LE..KHMER SYMBOL DAP-PRAM ROC
19DE..19DF ; ON # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
19E0..19FF ; ON # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC
1FBD ; ON # Sk GREEK KORONIS
1FBF..1FC1 ; ON # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FCD..1FCF ; ON # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
@ -966,20 +967,20 @@ FF1A ; CS # Po FULLWIDTH COLON
2CFD ; ON # No COPTIC FRACTION ONE HALF
2CFE..2CFF ; ON # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
2E00..2E01 ; ON # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; ON # Ps LEFT SUBSTITUTION BRACKET
2E03 ; ON # Pe RIGHT SUBSTITUTION BRACKET
2E04 ; ON # Ps LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; ON # Pe RIGHT DOTTED SUBSTITUTION BRACKET
2E02 ; ON # Pi LEFT SUBSTITUTION BRACKET
2E03 ; ON # Pf RIGHT SUBSTITUTION BRACKET
2E04 ; ON # Pi LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; ON # Pf RIGHT DOTTED SUBSTITUTION BRACKET
2E06..2E08 ; ON # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E09 ; ON # Ps LEFT TRANSPOSITION BRACKET
2E0A ; ON # Pe RIGHT TRANSPOSITION BRACKET
2E09 ; ON # Pi LEFT TRANSPOSITION BRACKET
2E0A ; ON # Pf RIGHT TRANSPOSITION BRACKET
2E0B ; ON # Po RAISED SQUARE
2E0C ; ON # Pi LEFT RAISED OMISSION BRACKET
2E0D ; ON # Pf RIGHT RAISED OMISSION BRACKET
2E0E..2E16 ; ON # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
2E17 ; ON # Pd DOUBLE OBLIQUE HYPHEN
2E1C ; ON # Ps LEFT LOW PARAPHRASE BRACKET
2E1D ; ON # Pe RIGHT LOW PARAPHRASE BRACKET
2E1C ; ON # Pi LEFT LOW PARAPHRASE BRACKET
2E1D ; ON # Pf RIGHT LOW PARAPHRASE BRACKET
2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
@ -1016,7 +1017,7 @@ FF1A ; CS # Po FULLWIDTH COLON
309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
30FB ; ON # Po KATAKANA MIDDLE DOT
31C0..31CF ; ON # So [16] CJK BASIC STROKE T..CJK BASIC STROKE N
31C0..31CF ; ON # So [16] CJK STROKE T..CJK STROKE N
321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU
3250 ; ON # So PARTNERSHIP SIGN
3251..325F ; ON # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
@ -1117,7 +1118,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
1D245 ; ON # So GREEK MUSICAL LEIMMA
1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
# Total code points: 3109
# Total code points: 3108
# ================================================
@ -1169,10 +1170,12 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
0300..036F ; NSM # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0483..0486 ; NSM # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA
0488..0489 ; NSM # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
0591..05BD ; NSM # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
0591..05B9 ; NSM # Mn [41] HEBREW ACCENT ETNAHTA..HEBREW POINT HOLAM
05BB..05BD ; NSM # Mn [3] HEBREW POINT QUBUTS..HEBREW POINT METEG
05BF ; NSM # Mn HEBREW POINT RAFE
05C1..05C2 ; NSM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN
0610..0615 ; NSM # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
064B..065E ; NSM # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF

View File

@ -1,5 +1,5 @@
# DerivedCoreProperties-4.1.0.txt
# Date: 2004-12-14, 01:09:24 GMT [MD]
# Date: 2005-03-10, 02:04:29 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -213,10 +213,12 @@ FFE9..FFEC ; Math
0531..0556 ; Alphabetic
0559 ; Alphabetic
0561..0587 ; Alphabetic
05B0..05BD ; Alphabetic
05B0..05B9 ; Alphabetic
05BB..05BD ; Alphabetic
05BF ; Alphabetic
05C1..05C2 ; Alphabetic
05C4..05C5 ; Alphabetic
05C7 ; Alphabetic
05D0..05EA ; Alphabetic
05F0..05F2 ; Alphabetic
0610..0615 ; Alphabetic
@ -1695,6 +1697,7 @@ FF21..FF3A ; Uppercase
# Derived Property: ID_Start
# Characters that can start an identifier.
# Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start
# NOTE: See UAX #31 for more information
0041..005A ; ID_Start
0061..007A ; ID_Start
@ -2089,7 +2092,7 @@ FFDA..FFDC ; ID_Start
# Derived Property: ID_Continue
# Characters that can continue an identifier.
# Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue
# NOTE: Cf characters should be filtered out.
# NOTE: See UAX #31 for more information
0030..0039 ; ID_Continue
0041..005A ; ID_Continue
@ -2126,10 +2129,12 @@ FFDA..FFDC ; ID_Start
0531..0556 ; ID_Continue
0559 ; ID_Continue
0561..0587 ; ID_Continue
0591..05BD ; ID_Continue
0591..05B9 ; ID_Continue
05BB..05BD ; ID_Continue
05BF ; ID_Continue
05C1..05C2 ; ID_Continue
05C4..05C5 ; ID_Continue
05C7 ; ID_Continue
05D0..05EA ; ID_Continue
05F0..05F2 ; ID_Continue
0610..0615 ; ID_Continue
@ -2650,7 +2655,6 @@ FFDA..FFDC ; ID_Continue
10A19..10A33 ; ID_Continue
10A38..10A3A ; ID_Continue
10A3F ; ID_Continue
10A40..10A43 ; ID_Continue
1D165..1D166 ; ID_Continue
1D167..1D169 ; ID_Continue
1D16D..1D172 ; ID_Continue
@ -2693,7 +2697,7 @@ FFDA..FFDC ; ID_Continue
2F800..2FA1D ; ID_Continue
E0100..E01EF ; ID_Continue
# Total code points: 92802
# Total code points: 92798
# ================================================
@ -2702,6 +2706,7 @@ E0100..E01EF ; ID_Continue
# Modified as described in UAX #15
# NOTE: Does NOT remove the non-NFKx characters.
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
# NOTE: See UAX #31 for more information
0041..005A ; XID_Start
0061..007A ; XID_Start
@ -3102,6 +3107,7 @@ FFDA..FFDC ; XID_Start
# NOTE: Cf characters should be filtered out.
# NOTE: Does NOT remove the non-NFKx characters.
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
# NOTE: See UAX #31 for more information
0030..0039 ; XID_Continue
0041..005A ; XID_Continue
@ -3138,10 +3144,12 @@ FFDA..FFDC ; XID_Start
0531..0556 ; XID_Continue
0559 ; XID_Continue
0561..0587 ; XID_Continue
0591..05BD ; XID_Continue
0591..05B9 ; XID_Continue
05BB..05BD ; XID_Continue
05BF ; XID_Continue
05C1..05C2 ; XID_Continue
05C4..05C5 ; XID_Continue
05C7 ; XID_Continue
05D0..05EA ; XID_Continue
05F0..05F2 ; XID_Continue
0610..0615 ; XID_Continue
@ -3667,7 +3675,6 @@ FFDA..FFDC ; XID_Continue
10A19..10A33 ; XID_Continue
10A38..10A3A ; XID_Continue
10A3F ; XID_Continue
10A40..10A43 ; XID_Continue
1D165..1D166 ; XID_Continue
1D167..1D169 ; XID_Continue
1D16D..1D172 ; XID_Continue
@ -3710,7 +3717,7 @@ FFDA..FFDC ; XID_Continue
2F800..2FA1D ; XID_Continue
E0100..E01EF ; XID_Continue
# Total code points: 92784
# Total code points: 92780
# ================================================
@ -3779,10 +3786,12 @@ FFFFE..FFFFF ; Default_Ignorable_Code_Point
0300..036F ; Grapheme_Extend
0483..0486 ; Grapheme_Extend
0488..0489 ; Grapheme_Extend
0591..05BD ; Grapheme_Extend
0591..05B9 ; Grapheme_Extend
05BB..05BD ; Grapheme_Extend
05BF ; Grapheme_Extend
05C1..05C2 ; Grapheme_Extend
05C4..05C5 ; Grapheme_Extend
05C7 ; Grapheme_Extend
0610..0615 ; Grapheme_Extend
064B..065E ; Grapheme_Extend
0670 ; Grapheme_Extend
@ -4273,7 +4282,8 @@ E0100..E01EF ; Grapheme_Extend
12D8..1310 ; Grapheme_Base
1312..1315 ; Grapheme_Base
1318..135A ; Grapheme_Base
1360..1368 ; Grapheme_Base
1360 ; Grapheme_Base
1361..1368 ; Grapheme_Base
1369..137C ; Grapheme_Base
1380..138F ; Grapheme_Base
1390..1399 ; Grapheme_Base
@ -4330,7 +4340,8 @@ E0100..E01EF ; Grapheme_Extend
19C1..19C7 ; Grapheme_Base
19C8..19C9 ; Grapheme_Base
19D0..19D9 ; Grapheme_Base
19DE..19FF ; Grapheme_Base
19DE..19DF ; Grapheme_Base
19E0..19FF ; Grapheme_Base
1A00..1A16 ; Grapheme_Base
1A19..1A1B ; Grapheme_Base
1A1E..1A1F ; Grapheme_Base
@ -4838,8 +4849,7 @@ FFFC..FFFD ; Grapheme_Base
10A10..10A13 ; Grapheme_Base
10A15..10A17 ; Grapheme_Base
10A19..10A33 ; Grapheme_Base
10A40..10A43 ; Grapheme_Base
10A44..10A47 ; Grapheme_Base
10A40..10A47 ; Grapheme_Base
10A50..10A58 ; Grapheme_Base
1D000..1D0F5 ; Grapheme_Base
1D100..1D126 ; Grapheme_Base

View File

@ -1,5 +1,5 @@
# DerivedJoiningType-4.1.0.txt
# Date: 2004-12-15, 02:15:30 GMT [MD]
# Date: 2005-02-26, 02:36:56 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -93,10 +93,12 @@
0300..036F ; T # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
0483..0486 ; T # Mn [4] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PSILI PNEUMATA
0488..0489 ; T # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
0591..05BD ; T # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
0591..05B9 ; T # Mn [41] HEBREW ACCENT ETNAHTA..HEBREW POINT HOLAM
05BB..05BD ; T # Mn [3] HEBREW POINT QUBUTS..HEBREW POINT METEG
05BF ; T # Mn HEBREW POINT RAFE
05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 ; T # Mn HEBREW POINT QAMATS QATAN
0610..0615 ; T # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
064B..065E ; T # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS
0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF

View File

@ -1,5 +1,5 @@
# DerivedNormalizationProps-4.1.0.txt
# Date: 2004-12-11, 05:42:12 GMT [MD]
# Date: 2005-02-26, 02:37:43 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -41,7 +41,6 @@
1D40 ; FC_NFKC; 0074
1D41 ; FC_NFKC; 0075
1D42 ; FC_NFKC; 0077
1D78 ; FC_NFKC; 043D
20A8 ; FC_NFKC; 0072 0073
2102 ; FC_NFKC; 0063
2103 ; FC_NFKC; 00B0 0063
@ -580,7 +579,7 @@
1D7A8 ; FC_NFKC; 03C9
1D7BB ; FC_NFKC; 03C3
# Total code points: 565
# Total code points: 564
# ================================================

View File

@ -1,5 +1,5 @@
# DerivedNumericValues-4.1.0.txt
# Date: 2004-12-11, 05:44:37 GMT [MD]
# Date: 2005-03-10, 03:07:57 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -201,22 +201,24 @@ FF10 ; 0.0 # Nd FULLWIDTH DIGIT ZERO
4E00 ; 1.0 # Lo CJK UNIFIED IDEOGRAPH-4E00
58F1 ; 1.0 # Lo CJK UNIFIED IDEOGRAPH-58F1
58F9 ; 1.0 # Lo CJK UNIFIED IDEOGRAPH-58F9
5E7A ; 1.0 # Lo CJK UNIFIED IDEOGRAPH-5E7A
5F0C ; 1.0 # Lo CJK UNIFIED IDEOGRAPH-5F0C
FF11 ; 1.0 # Nd FULLWIDTH DIGIT ONE
10107 ; 1.0 # No AEGEAN NUMBER ONE
10142 ; 1.0 # Nl GREEK ACROPHONIC ATTIC ONE DRACHMA
10158..1015A ; 1.0 # Nl [3] GREEK ACROPHONIC HERAEUM ONE PLETHRON..GREEK ACROPHONIC HERMIONE ONE
10158..1015A ; 1.0 # Nl [3] GREEK ACROPHONIC HERAEUM ONE PLETHRON..GREEK ACROPHONIC HERMIONIAN ONE
10320 ; 1.0 # No OLD ITALIC NUMERAL ONE
103D1 ; 1.0 # Nl OLD PERSIAN NUMBER ONE
104A1 ; 1.0 # Nd OSMANYA DIGIT ONE
10A40 ; 1.0 # Nd KHAROSHTHI DIGIT ONE
10A40 ; 1.0 # No KHAROSHTHI DIGIT ONE
1D7CF ; 1.0 # Nd MATHEMATICAL BOLD DIGIT ONE
1D7D9 ; 1.0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT ONE
1D7E3 ; 1.0 # Nd MATHEMATICAL SANS-SERIF DIGIT ONE
1D7ED ; 1.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT ONE
1D7F7 ; 1.0 # Nd MATHEMATICAL MONOSPACE DIGIT ONE
2092A ; 1.0 # Lo CJK UNIFIED IDEOGRAPH-2092A
# Total code points: 58
# Total code points: 60
# ================================================
@ -264,25 +266,28 @@ FF11 ; 1.0 # Nd FULLWIDTH DIGIT ONE
3193 ; 2.0 # No IDEOGRAPHIC ANNOTATION TWO MARK
3221 ; 2.0 # No PARENTHESIZED IDEOGRAPH TWO
3281 ; 2.0 # No CIRCLED IDEOGRAPH TWO
3483 ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-3483
4E8C ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-4E8C
5169 ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-5169
5F0D ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-5F0D
5F10 ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-5F10
8CAE ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-8CAE
8CB3 ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-8CB3
8D30 ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-8D30
FF12 ; 2.0 # Nd FULLWIDTH DIGIT TWO
10108 ; 2.0 # No AEGEAN NUMBER TWO
1015B..1015E ; 2.0 # Nl [4] GREEK ACROPHONIC EPIDAUREAN TWO..GREEK ACROPHONIC EPIDAUREAN TWO DRACHMAS
103D2 ; 2.0 # Nl OLD PERSIAN NUMBER TWO
104A2 ; 2.0 # Nd OSMANYA DIGIT TWO
10A41 ; 2.0 # Nd KHAROSHTHI DIGIT TWO
10A41 ; 2.0 # No KHAROSHTHI DIGIT TWO
1D7D0 ; 2.0 # Nd MATHEMATICAL BOLD DIGIT TWO
1D7DA ; 2.0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT TWO
1D7E4 ; 2.0 # Nd MATHEMATICAL SANS-SERIF DIGIT TWO
1D7EE ; 2.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT TWO
1D7F8 ; 2.0 # Nd MATHEMATICAL MONOSPACE DIGIT TWO
22390 ; 2.0 # Lo CJK UNIFIED IDEOGRAPH-22390
# Total code points: 58
# Total code points: 61
# ================================================
@ -331,19 +336,24 @@ FF12 ; 2.0 # Nd FULLWIDTH DIGIT TWO
3222 ; 3.0 # No PARENTHESIZED IDEOGRAPH THREE
3282 ; 3.0 # No CIRCLED IDEOGRAPH THREE
4E09 ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-4E09
53C1..53C3 ; 3.0 # Lo [3] CJK UNIFIED IDEOGRAPH-53C1..CJK UNIFIED IDEOGRAPH-53C3
4EE8 ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-4EE8
53C1..53C4 ; 3.0 # Lo [4] CJK UNIFIED IDEOGRAPH-53C1..CJK UNIFIED IDEOGRAPH-53C4
5F0E ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-5F0E
FF13 ; 3.0 # Nd FULLWIDTH DIGIT THREE
10109 ; 3.0 # No AEGEAN NUMBER THREE
104A3 ; 3.0 # Nd OSMANYA DIGIT THREE
10A42 ; 3.0 # Nd KHAROSHTHI DIGIT THREE
10A42 ; 3.0 # No KHAROSHTHI DIGIT THREE
1D7D1 ; 3.0 # Nd MATHEMATICAL BOLD DIGIT THREE
1D7DB ; 3.0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT THREE
1D7E5 ; 3.0 # Nd MATHEMATICAL SANS-SERIF DIGIT THREE
1D7EF ; 3.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT THREE
1D7F9 ; 3.0 # Nd MATHEMATICAL MONOSPACE DIGIT THREE
20AFD ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-20AFD
20B19 ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-20B19
22998 ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-22998
23B1B ; 3.0 # Lo CJK UNIFIED IDEOGRAPH-23B1B
# Total code points: 52
# Total code points: 58
# ================================================
@ -391,19 +401,23 @@ FF13 ; 3.0 # Nd FULLWIDTH DIGIT THREE
3195 ; 4.0 # No IDEOGRAPHIC ANNOTATION FOUR MARK
3223 ; 4.0 # No PARENTHESIZED IDEOGRAPH FOUR
3283 ; 4.0 # No CIRCLED IDEOGRAPH FOUR
4E96 ; 4.0 # Lo CJK UNIFIED IDEOGRAPH-4E96
56DB ; 4.0 # Lo CJK UNIFIED IDEOGRAPH-56DB
8086 ; 4.0 # Lo CJK UNIFIED IDEOGRAPH-8086
FF14 ; 4.0 # Nd FULLWIDTH DIGIT FOUR
1010A ; 4.0 # No AEGEAN NUMBER FOUR
104A4 ; 4.0 # Nd OSMANYA DIGIT FOUR
10A43 ; 4.0 # Nd KHAROSHTHI DIGIT FOUR
10A43 ; 4.0 # No KHAROSHTHI DIGIT FOUR
1D7D2 ; 4.0 # Nd MATHEMATICAL BOLD DIGIT FOUR
1D7DC ; 4.0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT FOUR
1D7E6 ; 4.0 # Nd MATHEMATICAL SANS-SERIF DIGIT FOUR
1D7F0 ; 4.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR
1D7FA ; 4.0 # Nd MATHEMATICAL MONOSPACE DIGIT FOUR
20064 ; 4.0 # Lo CJK UNIFIED IDEOGRAPH-20064
200E2 ; 4.0 # Lo CJK UNIFIED IDEOGRAPH-200E2
2626D ; 4.0 # Lo CJK UNIFIED IDEOGRAPH-2626D
# Total code points: 49
# Total code points: 53
# ================================================
@ -449,6 +463,8 @@ FF14 ; 4.0 # Nd FULLWIDTH DIGIT FOUR
3025 ; 5.0 # Nl HANGZHOU NUMERAL FIVE
3224 ; 5.0 # No PARENTHESIZED IDEOGRAPH FIVE
3284 ; 5.0 # No CIRCLED IDEOGRAPH FIVE
3405 ; 5.0 # Lo CJK UNIFIED IDEOGRAPH-3405
382A ; 5.0 # Lo CJK UNIFIED IDEOGRAPH-382A
4E94 ; 5.0 # Lo CJK UNIFIED IDEOGRAPH-4E94
4F0D ; 5.0 # Lo CJK UNIFIED IDEOGRAPH-4F0D
FF15 ; 5.0 # Nd FULLWIDTH DIGIT FIVE
@ -465,8 +481,9 @@ FF15 ; 5.0 # Nd FULLWIDTH DIGIT FIVE
1D7E7 ; 5.0 # Nd MATHEMATICAL SANS-SERIF DIGIT FIVE
1D7F1 ; 5.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE
1D7FB ; 5.0 # Nd MATHEMATICAL MONOSPACE DIGIT FIVE
20121 ; 5.0 # Lo CJK UNIFIED IDEOGRAPH-20121
# Total code points: 52
# Total code points: 55
# ================================================
@ -523,8 +540,9 @@ FF16 ; 6.0 # Nd FULLWIDTH DIGIT SIX
1D7E8 ; 6.0 # Nd MATHEMATICAL SANS-SERIF DIGIT SIX
1D7F2 ; 6.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT SIX
1D7FC ; 6.0 # Nd MATHEMATICAL MONOSPACE DIGIT SIX
20AEA ; 6.0 # Lo CJK UNIFIED IDEOGRAPH-20AEA
# Total code points: 47
# Total code points: 48
# ================================================
@ -570,6 +588,7 @@ FF16 ; 6.0 # Nd FULLWIDTH DIGIT SIX
3027 ; 7.0 # Nl HANGZHOU NUMERAL SEVEN
3226 ; 7.0 # No PARENTHESIZED IDEOGRAPH SEVEN
3286 ; 7.0 # No CIRCLED IDEOGRAPH SEVEN
3B4D ; 7.0 # Lo CJK UNIFIED IDEOGRAPH-3B4D
4E03 ; 7.0 # Lo CJK UNIFIED IDEOGRAPH-4E03
67D2 ; 7.0 # Lo CJK UNIFIED IDEOGRAPH-67D2
FF17 ; 7.0 # Nd FULLWIDTH DIGIT SEVEN
@ -580,8 +599,9 @@ FF17 ; 7.0 # Nd FULLWIDTH DIGIT SEVEN
1D7E9 ; 7.0 # Nd MATHEMATICAL SANS-SERIF DIGIT SEVEN
1D7F3 ; 7.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN
1D7FD ; 7.0 # Nd MATHEMATICAL MONOSPACE DIGIT SEVEN
20001 ; 7.0 # Lo CJK UNIFIED IDEOGRAPH-20001
# Total code points: 46
# Total code points: 48
# ================================================
@ -685,6 +705,7 @@ FF18 ; 8.0 # Nd FULLWIDTH DIGIT EIGHT
3228 ; 9.0 # No PARENTHESIZED IDEOGRAPH NINE
3288 ; 9.0 # No CIRCLED IDEOGRAPH NINE
4E5D ; 9.0 # Lo CJK UNIFIED IDEOGRAPH-4E5D
5EFE ; 9.0 # Lo CJK UNIFIED IDEOGRAPH-5EFE
7396 ; 9.0 # Lo CJK UNIFIED IDEOGRAPH-7396
FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
1010F ; 9.0 # No AEGEAN NUMBER NINE
@ -695,7 +716,7 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
1D7F5 ; 9.0 # Nd MATHEMATICAL SANS-SERIF BOLD DIGIT NINE
1D7FF ; 9.0 # Nd MATHEMATICAL MONOSPACE DIGIT NINE
# Total code points: 46
# Total code points: 47
# ================================================
@ -713,6 +734,7 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
3038 ; 10.0 # Nl HANGZHOU NUMERAL TEN
3229 ; 10.0 # No PARENTHESIZED IDEOGRAPH TEN
3289 ; 10.0 # No CIRCLED IDEOGRAPH TEN
4EC0 ; 10.0 # Lo CJK UNIFIED IDEOGRAPH-4EC0
5341 ; 10.0 # Lo CJK UNIFIED IDEOGRAPH-5341
62FE ; 10.0 # Lo CJK UNIFIED IDEOGRAPH-62FE
10110 ; 10.0 # No AEGEAN NUMBER TEN
@ -724,7 +746,7 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
103D3 ; 10.0 # Nl OLD PERSIAN NUMBER TEN
10A44 ; 10.0 # No KHAROSHTHI NUMBER TEN
# Total code points: 28
# Total code points: 29
# ================================================
@ -823,12 +845,13 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
249B ; 20.0 # No NUMBER TWENTY FULL STOP
24F4 ; 20.0 # No NEGATIVE CIRCLED NUMBER TWENTY
3039 ; 20.0 # Nl HANGZHOU NUMERAL TWENTY
5344 ; 20.0 # Lo CJK UNIFIED IDEOGRAPH-5344
5EFF ; 20.0 # Lo CJK UNIFIED IDEOGRAPH-5EFF
10111 ; 20.0 # No AEGEAN NUMBER TWENTY
103D4 ; 20.0 # Nl OLD PERSIAN NUMBER TWENTY
10A45 ; 20.0 # No KHAROSHTHI NUMBER TWENTY
# Total code points: 10
# Total code points: 11
# ================================================
@ -892,8 +915,9 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
5345 ; 30.0 # Lo CJK UNIFIED IDEOGRAPH-5345
10112 ; 30.0 # No AEGEAN NUMBER THIRTY
10165 ; 30.0 # Nl GREEK ACROPHONIC THESPIAN THIRTY
20983 ; 30.0 # Lo CJK UNIFIED IDEOGRAPH-20983
# Total code points: 6
# Total code points: 7
# ================================================
@ -953,9 +977,12 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
1375 ; 40.0 # No ETHIOPIC NUMBER FORTY
32B5 ; 40.0 # No CIRCLED NUMBER FORTY
534C ; 40.0 # Lo CJK UNIFIED IDEOGRAPH-534C
10113 ; 40.0 # No AEGEAN NUMBER FORTY
2098C ; 40.0 # Lo CJK UNIFIED IDEOGRAPH-2098C
2099C ; 40.0 # Lo CJK UNIFIED IDEOGRAPH-2099C
# Total code points: 3
# Total code points: 6
# ================================================
@ -1125,8 +1152,9 @@ FF19 ; 9.0 # Nd FULLWIDTH DIGIT NINE
# ================================================
10121 ; 900.0 # No AEGEAN NUMBER NINE HUNDRED
1034A ; 900.0 # Nl GOTHIC LETTER NINE HUNDRED
# Total code points: 1
# Total code points: 2
# ================================================

View File

@ -156,7 +156,8 @@
0559..055F;N
0561..0587;N
0589..058A;N
0591..05C6;N
0591..05B9;N
05BB..05C7;N
05D0..05EA;N
05F0..05F4;N
0600..0603;N

View File

@ -1,5 +1,5 @@
# GraphemeBreakProperty-4.1.0.txt
# Date: 2004-12-11, 05:44:40 GMT [MD]
# Date: 2005-02-26, 02:40:18 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -56,10 +56,12 @@ E0020..E007F ; Control
0300..036F ; Extend
0483..0486 ; Extend
0488..0489 ; Extend
0591..05BD ; Extend
0591..05B9 ; Extend
05BB..05BD ; Extend
05BF ; Extend
05C1..05C2 ; Extend
05C4..05C5 ; Extend
05C7 ; Extend
0610..0615 ; Extend
064B..065E ; Extend
0670 ; Extend

View File

@ -123,34 +123,37 @@
0561..0587;AL
0589;IS
058A;BA
0591..05BD;CM
0591..05B9;CM
05BB..05BD;CM
05BE;AL
05BF;CM
05C0;AL
05C1..05C2;CM
05C3;AL
05C4..05C5;CM
05C6;AL
05C6;EX
05C7;CM
05D0..05EA;AL
05F0..05F4;AL
0600..0603;AL
060B;PO
060C;AL
060C;EX
060D;IS
060E..060F;AL
0610..0615;CM
061B;AL
061E;AL
061F;AL
061B;EX
061E..061F;EX
0621..063A;AL
0640..064A;AL
064B..065E;CM
0660..0669;NU
066A;AL
066A;EX
066B..066C;NU
066D..066F;AL
0670;CM
0671..06D5;AL
0671..06D3;AL
06D4;EX
06D5;AL
06D6..06DC;CM
06DD;AL
06DE..06E4;CM
@ -332,7 +335,7 @@
0E47..0E4E;CM
0E4F;AL
0E50..0E59;NU
0E5A..0E5B;NS
0E5A..0E5B;BA
0E81..0E82;SA
0E84;SA
0E87..0E88;SA
@ -355,15 +358,24 @@
0EC8..0ECD;CM
0ED0..0ED9;NU
0EDC..0EDD;SA
0F00..0F0A;AL
0F00;AL
0F01..0F04;BB
0F05;AL
0F06..0F07;BB
0F08;GL
0F09..0F0A;BB
0F0B;BA
0F0C;GL
0F0D..0F0E;BA
0F0F..0F17;AL
0F0D..0F11;EX
0F12;GL
0F13;AL
0F14;EX
0F15..0F17;AL
0F18..0F19;CM
0F1A..0F1F;AL
0F20..0F29;NU
0F2A..0F34;AL
0F2A..0F33;AL
0F34;BA
0F35;CM
0F36;AL
0F37;CM
@ -376,16 +388,20 @@
0F3E..0F3F;CM
0F40..0F47;AL
0F49..0F6A;AL
0F71..0F84;CM
0F85;AL
0F71..0F7E;CM
0F7F;BA
0F80..0F84;CM
0F85;BA
0F86..0F87;CM
0F88..0F8B;AL
0F90..0F97;CM
0F99..0FBC;CM
0FBE..0FC5;AL
0FBE..0FBF;BA
0FC0..0FC5;AL
0FC6;CM
0FC7..0FCC;AL
0FCF..0FD1;AL
0FCF;AL
0FD0..0FD1;BB
1000..1021;SA
1023..1027;SA
1029..102A;SA
@ -429,7 +445,9 @@
1681..169A;AL
169B;OP
169C;CL
16A0..16F0;AL
16A0..16EA;AL
16EB..16ED;BA
16EE..16F0;AL
1700..170C;AL
170E..1711;AL
1712..1714;CM
@ -443,17 +461,22 @@
1772..1773;CM
1780..17B5;SA
17B6..17D3;CM
17D4;NS
17D5;BA
17D6..17DA;NS
17D4..17D5;BA
17D6..17D7;NS
17D8;BA
17D9;NS
17DA;BA
17DB;PR
17DC;AL
17DD;CM
17E0..17E9;NU
17F0..17F9;AL
1800..1805;AL
1800..1801;AL
1802..1805;BA
1806;BB
1807..180A;AL
1807;AL
1808..1809;BA
180A;AL
180B..180D;CM
180E;GL
1810..1819;NU
@ -473,10 +496,10 @@
19C1..19C7;AL
19C8..19C9;CM
19D0..19D9;NU
19DE..19DF;AL
19E0..1A16;AL
19DE..1A16;AL
1A17..1A1B;CM
1A1E..1A1F;AL
1A1E;BA
1A1F;AL
1D00..1DBF;AL
1DC0..1DC3;CM
1E00..1E9B;AL
@ -788,7 +811,9 @@
2C00..2C2E;AL
2C30..2C5E;AL
2C80..2CEA;AL
2CF9..2CFF;AL
2CF9..2CFC;BA
2CFD;AL
2CFE..2CFF;BA
2D00..2D25;AL
2D30..2D65;AL
2D6F;AL
@ -803,7 +828,7 @@
2DD8..2DDE;AL
2E00..2E0D;QU
2E0E..2E15;BA
2E16;BA
2E16;AL
2E17;BA
2E1C..2E1D;QU
2E80..2E99;ID
@ -1870,9 +1895,8 @@ FFFD;AI
10A38..10A3A;CM
10A3F;CM
10A40..10A47;AL
10A50..10A55;BA
10A56..10A57;BA
10A58;BA
10A50..10A57;BA
10A58;AL
1D000..1D0F5;AL
1D100..1D126;AL
1D12A..1D164;AL

View File

@ -1,4 +1,5 @@
# NormalizationCorrections-4.0.0.txt
# NormalizationCorrections-4.1.0.txt
# Date: 2005-01-10, 17:20:00 PST [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.

View File

@ -1,5 +1,5 @@
# NormalizationTest-4.1.0.txt
# Date: 2004-12-14, 02:28:42 GMT [MD]
# Date: 2005-02-26, 02:40:24 GMT [MD]
#
# Normalization Test Suite
# Format:
@ -13965,7 +13965,7 @@ FABB;8ACB;8ACB;8ACB;8ACB;
FABC;8B01;8B01;8B01;8B01;
FABD;8AFE;8AFE;8AFE;8AFE;
FABE;8AED;8AED;8AED;8AED;
FABF;8BE9;8BE9;8BE9;8BE9;
FABF;8B39;8B39;8B39;8B39;
FAC0;8B8A;8B8A;8B8A;8B8A;
FAC1;8D08;8D08;8D08;8D08;
FAC2;8F38;8F38;8F38;8F38;
@ -16935,8 +16935,6 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 05B8 05B9 05B8 05B7 0062;0061 05B7 05B8 05B8 05B9 0062;0061 05B7 05B8 05B8 05B9 0062;0061 05B7 05B8 05B8 05B9 0062;0061 05B7 05B8 05B8 05B9 0062;
0061 05BB 05B9 05B8 05B9 0062;0061 05B8 05B9 05B9 05BB 0062;0061 05B8 05B9 05B9 05BB 0062;0061 05B8 05B9 05B9 05BB 0062;0061 05B8 05B9 05B9 05BB 0062;
0061 05B9 05BB 05B9 05B8 0062;0061 05B8 05B9 05B9 05BB 0062;0061 05B8 05B9 05B9 05BB 0062;0061 05B8 05B9 05B9 05BB 0062;0061 05B8 05B9 05B9 05BB 0062;
0061 05B9 05B8 05B7 05BA 0062;0061 05B7 05B8 05BA 05B9 0062;0061 05B7 05B8 05BA 05B9 0062;0061 05B7 05B8 05BA 05B9 0062;0061 05B7 05B8 05BA 05B9 0062;
0061 05BA 05B9 05B8 05B7 0062;0061 05B7 05BA 05B8 05B9 0062;0061 05B7 05BA 05B8 05B9 0062;0061 05B7 05BA 05B8 05B9 0062;0061 05B7 05BA 05B8 05B9 0062;
0061 05BC 05BB 05B9 05BB 0062;0061 05B9 05BB 05BB 05BC 0062;0061 05B9 05BB 05BB 05BC 0062;0061 05B9 05BB 05BB 05BC 0062;0061 05B9 05BB 05BB 05BC 0062;
0061 05BB 05BC 05BB 05B9 0062;0061 05B9 05BB 05BB 05BC 0062;0061 05B9 05BB 05BB 05BC 0062;0061 05B9 05BB 05BB 05BC 0062;0061 05B9 05BB 05BB 05BC 0062;
0061 05BD 05BC 05BB 05BC 0062;0061 05BB 05BC 05BC 05BD 0062;0061 05BB 05BC 05BC 05BD 0062;0061 05BB 05BC 05BC 05BD 0062;0061 05BB 05BC 05BC 05BD 0062;
@ -16953,6 +16951,8 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 05C4 0315 0300 05AE 0062;0061 05AE 05C4 0300 0315 0062;0061 05AE 05C4 0300 0315 0062;0061 05AE 05C4 0300 0315 0062;0061 05AE 05C4 0300 0315 0062;
0061 059A 0316 302A 05C5 0062;0061 302A 0316 05C5 059A 0062;0061 302A 0316 05C5 059A 0062;0061 302A 0316 05C5 059A 0062;0061 302A 0316 05C5 059A 0062;
0061 05C5 059A 0316 302A 0062;0061 302A 05C5 0316 059A 0062;0061 302A 05C5 0316 059A 0062;0061 302A 05C5 0316 059A 0062;0061 302A 05C5 0316 059A 0062;
0061 05B9 05B8 05B7 05C7 0062;0061 05B7 05B8 05C7 05B9 0062;0061 05B7 05B8 05C7 05B9 0062;0061 05B7 05B8 05C7 05B9 0062;0061 05B7 05B8 05C7 05B9 0062;
0061 05C7 05B9 05B8 05B7 0062;0061 05B7 05C7 05B8 05B9 0062;0061 05B7 05C7 05B8 05B9 0062;0061 05B7 05C7 05B8 05B9 0062;0061 05B7 05C7 05B8 05B9 0062;
0061 0315 0300 05AE 0610 0062;00E0 05AE 0610 0315 0062;0061 05AE 0300 0610 0315 0062;00E0 05AE 0610 0315 0062;0061 05AE 0300 0610 0315 0062;
0061 0610 0315 0300 05AE 0062;0061 05AE 0610 0300 0315 0062;0061 05AE 0610 0300 0315 0062;0061 05AE 0610 0300 0315 0062;0061 05AE 0610 0300 0315 0062;
0061 0315 0300 05AE 0611 0062;00E0 05AE 0611 0315 0062;0061 05AE 0300 0611 0315 0062;00E0 05AE 0611 0315 0062;0061 05AE 0300 0611 0315 0062;
@ -17001,10 +17001,10 @@ FFEE;FFEE;FFEE;25CB;25CB;
0061 065B 0315 0300 05AE 0062;0061 05AE 065B 0300 0315 0062;0061 05AE 065B 0300 0315 0062;0061 05AE 065B 0300 0315 0062;0061 05AE 065B 0300 0315 0062;
0061 059A 0316 302A 065C 0062;0061 302A 0316 065C 059A 0062;0061 302A 0316 065C 059A 0062;0061 302A 0316 065C 059A 0062;0061 302A 0316 065C 059A 0062;
0061 065C 059A 0316 302A 0062;0061 302A 065C 0316 059A 0062;0061 302A 065C 0316 059A 0062;0061 302A 065C 0316 059A 0062;0061 302A 065C 0316 059A 0062;
0061 0650 064F 064E 065D 0062;0061 064E 064F 065D 0650 0062;0061 064E 064F 065D 0650 0062;0061 064E 064F 065D 0650 0062;0061 064E 064F 065D 0650 0062;
0061 065D 0650 064F 064E 0062;0061 064E 065D 064F 0650 0062;0061 064E 065D 064F 0650 0062;0061 064E 065D 064F 0650 0062;0061 064E 065D 064F 0650 0062;
0061 064F 064E 064D 065E 0062;0061 064D 064E 065E 064F 0062;0061 064D 064E 065E 064F 0062;0061 064D 064E 065E 064F 0062;0061 064D 064E 065E 064F 0062;
0061 065E 064F 064E 064D 0062;0061 064D 065E 064E 064F 0062;0061 064D 065E 064E 064F 0062;0061 064D 065E 064E 064F 0062;0061 064D 065E 064E 064F 0062;
0061 0315 0300 05AE 065D 0062;00E0 05AE 065D 0315 0062;0061 05AE 0300 065D 0315 0062;00E0 05AE 065D 0315 0062;0061 05AE 0300 065D 0315 0062;
0061 065D 0315 0300 05AE 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;0061 05AE 065D 0300 0315 0062;
0061 0315 0300 05AE 065E 0062;00E0 05AE 065E 0315 0062;0061 05AE 0300 065E 0315 0062;00E0 05AE 065E 0315 0062;0061 05AE 0300 065E 0315 0062;
0061 065E 0315 0300 05AE 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;0061 05AE 065E 0300 0315 0062;
0061 0711 0670 0652 0670 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;
0061 0670 0711 0670 0652 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;0061 0652 0670 0670 0711 0062;
0061 0315 0300 05AE 06D6 0062;00E0 05AE 06D6 0315 0062;0061 05AE 0300 06D6 0315 0062;00E0 05AE 06D6 0315 0062;0061 05AE 0300 06D6 0315 0062;

View File

@ -1,5 +1,5 @@
# PropList-4.1.0.txt
# Date: 2004-12-14, 01:07:52 GMT [MD]
# Date: 2005-03-10, 01:42:15 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -279,10 +279,12 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
# ================================================
0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI
05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG
05B0..05B9 ; Other_Alphabetic # Mn [10] HEBREW POINT SHEVA..HEBREW POINT HOLAM
05BB..05BD ; Other_Alphabetic # Mn [3] HEBREW POINT QUBUTS..HEBREW POINT METEG
05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE
05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN
0610..0615 ; Other_Alphabetic # Mn [6] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL HIGH TAH
064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA
0659..065E ; Other_Alphabetic # Mn [6] ARABIC ZWARAKAY..ARABIC FATHA WITH TWO DOTS
@ -821,7 +823,6 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
005C ; Pattern_Syntax # Po REVERSE SOLIDUS
005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET
005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT
005F ; Pattern_Syntax # Pc LOW LINE
0060 ; Pattern_Syntax # Sk GRAVE ACCENT
007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET
007C ; Pattern_Syntax # Sm VERTICAL LINE
@ -856,7 +857,6 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE
203F..2040 ; Pattern_Syntax # Pc [2] UNDERTIE..CHARACTER TIE
2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET
2044 ; Pattern_Syntax # Sm FRACTION SLASH
2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL
@ -864,7 +864,6 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN
2053 ; Pattern_Syntax # Po SWUNG DASH
2054 ; Pattern_Syntax # Pc INVERTED UNDERTIE
2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW
@ -905,9 +904,6 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F>
2460..249B ; Pattern_Syntax # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
249C..24E9 ; Pattern_Syntax # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
24EA..24FF ; Pattern_Syntax # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE
25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
@ -951,7 +947,6 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
2776..2793 ; Pattern_Syntax # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2794 ; Pattern_Syntax # So HEAVY WIDE-HEADED RIGHTWARDS ARROW
2795..2797 ; Pattern_Syntax # Cn [3] <reserved-2795>..<reserved-2797>
2798..27AF ; Pattern_Syntax # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
@ -1007,21 +1002,21 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2B00..2B13 ; Pattern_Syntax # So [20] NORTH EAST WHITE ARROW..SQUARE WITH BOTTOM HALF BLACK
2B14..2BFF ; Pattern_Syntax # Cn [236] <reserved-2B14>..<reserved-2BFF>
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Pattern_Syntax # Ps LEFT SUBSTITUTION BRACKET
2E03 ; Pattern_Syntax # Pe RIGHT SUBSTITUTION BRACKET
2E04 ; Pattern_Syntax # Ps LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Pattern_Syntax # Pe RIGHT DOTTED SUBSTITUTION BRACKET
2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET
2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET
2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E09 ; Pattern_Syntax # Ps LEFT TRANSPOSITION BRACKET
2E0A ; Pattern_Syntax # Pe RIGHT TRANSPOSITION BRACKET
2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET
2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET
2E0B ; Pattern_Syntax # Po RAISED SQUARE
2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET
2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET
2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN
2E18..2E1B ; Pattern_Syntax # Cn [4] <reserved-2E18>..<reserved-2E1B>
2E1C ; Pattern_Syntax # Ps LEFT LOW PARAPHRASE BRACKET
2E1D ; Pattern_Syntax # Pe RIGHT LOW PARAPHRASE BRACKET
2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET
2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET
2E1E..2E7F ; Pattern_Syntax # Cn [98] <reserved-2E1E>..<reserved-2E7F>
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
@ -1052,4 +1047,4 @@ FD3E ; Pattern_Syntax # Ps ORNATE LEFT PARENTHESIS
FD3F ; Pattern_Syntax # Pe ORNATE RIGHT PARENTHESIS
FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
# Total code points: 2955
# Total code points: 2761

View File

@ -1,5 +1,5 @@
# PropertyAliases-4.1.0.txt
# Date: 2004-12-11, 05:46:23 GMT [MD]
# Date: 2005-02-26, 11:13:41 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -150,7 +150,7 @@ Term ; Terminal_Punctuation
UIdeo ; Unified_Ideograph
Upper ; Uppercase
VS ; Variation_Selector
WSpace ; White_Space
WSpace ; White_Space ; space
XIDC ; XID_Continue
XIDS ; XID_Start
XO_NFC ; Expands_On_NFC

View File

@ -1,5 +1,5 @@
# PropertyValueAliases-4.1.0.txt
# Date: 2004-12-11, 05:46:23 GMT [MD]
# Date: 2005-02-26, 11:13:54 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -111,12 +111,12 @@ blk; n/a ; Buginese
blk; n/a ; Buhid
blk; n/a ; Byzantine_Musical_Symbols
blk; n/a ; Cherokee
blk; n/a ; CJK_Basic_Strokes
blk; n/a ; CJK_Compatibility
blk; n/a ; CJK_Compatibility_Forms
blk; n/a ; CJK_Compatibility_Ideographs
blk; n/a ; CJK_Compatibility_Ideographs_Supplement
blk; n/a ; CJK_Radicals_Supplement
blk; n/a ; CJK_Strokes
blk; n/a ; CJK_Symbols_and_Punctuation
blk; n/a ; CJK_Unified_Ideographs
blk; n/a ; CJK_Unified_Ideographs_Extension_A
@ -292,7 +292,7 @@ ea ; W ; Wide
# General_Category (gc)
gc ; C ; Other # Cc | Cf | Cn | Co | Cs
gc ; Cc ; Control
gc ; Cc ; Control ; cntrl
gc ; Cf ; Format
gc ; Cn ; Unassigned
gc ; Co ; Private_Use
@ -309,10 +309,10 @@ gc ; Mc ; Spacing_Mark
gc ; Me ; Enclosing_Mark
gc ; Mn ; Nonspacing_Mark
gc ; N ; Number # Nd | Nl | No
gc ; Nd ; Decimal_Number
gc ; Nd ; Decimal_Number ; digit
gc ; Nl ; Letter_Number
gc ; No ; Other_Number
gc ; P ; Punctuation # Pc | Pd | Pe | Pf | Pi | Po | Ps
gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps
gc ; Pc ; Connector_Punctuation
gc ; Pd ; Dash_Punctuation
gc ; Pe ; Close_Punctuation
@ -497,7 +497,7 @@ sc ; Bugi ; Buginese
sc ; Buhd ; Buhid
sc ; Cans ; Canadian_Aboriginal
sc ; Cher ; Cherokee
sc ; Copt ; Coptic
sc ; Copt ; Coptic ; Qaac
sc ; Cprt ; Cypriot
sc ; Cyrl ; Cyrillic
sc ; Deva ; Devanagari

View File

@ -1,5 +1,5 @@
# Scripts-4.1.0.txt
# Date: 2004-12-11, 05:46:26 GMT [MD]
# Date: 2005-03-10, 01:56:19 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.
@ -287,20 +287,20 @@
29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
2B00..2B13 ; Common # So [20] NORTH EAST WHITE ARROW..SQUARE WITH BOTTOM HALF BLACK
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Ps LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pe RIGHT SUBSTITUTION BRACKET
2E04 ; Common # Ps LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Common # Pe RIGHT DOTTED SUBSTITUTION BRACKET
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET
2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E09 ; Common # Ps LEFT TRANSPOSITION BRACKET
2E0A ; Common # Pe RIGHT TRANSPOSITION BRACKET
2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET
2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET
2E0B ; Common # Po RAISED SQUARE
2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET
2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET
2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN
2E1C ; Common # Ps LEFT LOW PARAPHRASE BRACKET
2E1D ; Common # Pe RIGHT LOW PARAPHRASE BRACKET
2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET
2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -342,7 +342,7 @@
3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31C0..31CF ; Common # So [16] CJK BASIC STROKE T..CJK BASIC STROKE N
31C0..31CF ; Common # So [16] CJK STROKE T..CJK STROKE N
3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
322A..3243 ; Common # So [26] PARENTHESIZED IDEOGRAPH MOON..PARENTHESIZED IDEOGRAPH REACH
3250 ; Common # So PARTNERSHIP SIGN
@ -624,7 +624,8 @@ FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SM
# ================================================
0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
0591..05B9 ; Hebrew # Mn [41] HEBREW ACCENT ETNAHTA..HEBREW POINT HOLAM
05BB..05BD ; Hebrew # Mn [3] HEBREW POINT QUBUTS..HEBREW POINT METEG
05BE ; Hebrew # Po HEBREW PUNCTUATION MAQAF
05BF ; Hebrew # Mn HEBREW POINT RAFE
05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ
@ -632,6 +633,7 @@ FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SM
05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA
05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN
05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 ; Hebrew # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
@ -1088,7 +1090,8 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L
1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
135F ; Ethiopic # Mn ETHIOPIC COMBINING GEMINATION MARK
1360..1368 ; Ethiopic # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
1360 ; Ethiopic # So ETHIOPIC SECTION MARK
1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
@ -1401,7 +1404,7 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
19C1..19C7 ; New_Tai_Lue # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B
19C8..19C9 ; New_Tai_Lue # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
19D0..19D9 ; New_Tai_Lue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19DE..19DF ; New_Tai_Lue # So [2] NEW TAI LUE SIGN LE..NEW TAI LUE SIGN LEW
19DE..19DF ; New_Tai_Lue # Po [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV
# Total code points: 80
@ -1415,7 +1418,7 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
# ================================================
2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION
2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
# Total code points: 55
@ -1455,8 +1458,7 @@ A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI
10A19..10A33 ; Kharoshthi # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A38..10A3A ; Kharoshthi # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Kharoshthi # Mn KHAROSHTHI VIRAMA
10A40..10A43 ; Kharoshthi # Nd [4] KHAROSHTHI DIGIT ONE..KHAROSHTHI DIGIT FOUR
10A44..10A47 ; Kharoshthi # No [4] KHAROSHTHI NUMBER TEN..KHAROSHTHI NUMBER ONE THOUSAND
10A40..10A47 ; Kharoshthi # No [8] KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
10A50..10A58 ; Kharoshthi # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
# Total code points: 65

View File

@ -1,5 +1,5 @@
# SentenceBreakProperty-4.1.0.txt
# Date: 2004-12-11, 05:46:48 GMT [MD]
# Date: 2005-02-26, 02:42:31 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.

View File

@ -1,5 +1,5 @@
# SpecialCasing-4.1.0.txt
# Date: 2004-12-16, 03:10:45 GMT [MD]
# Date: 2005-02-26, 02:42:51 GMT [MD]
#
# Special Casing Properties
#
@ -34,7 +34,8 @@
# RFC 3066 (or its successor), and replacing '-' by '_'.
#
# A context for a character C is defined by Section 3.13 Default Case Operations,
# on p. 89-90 of The Unicode Standard, Version 4.0, as amended by Unicode 4.0.1.
# on p. 89-90 of The Unicode Standard, Version 4.0, as amended by Unicode 4.1.0,
# as specified in http://www.unicode.org/versions/Unicode4.1.0/
#
# Parsers of this file must be prepared to deal with future additions to this format:
# * Additional contexts

View File

@ -984,7 +984,7 @@
03F9;GREEK CAPITAL LUNATE SIGMA SYMBOL;Lu;0;L;<compat> 03A3;;;;N;;;;03F2;
03FA;GREEK CAPITAL LETTER SAN;Lu;0;L;;;;;N;;;;03FB;
03FB;GREEK SMALL LETTER SAN;Ll;0;L;;;;;N;;;03FA;;03FA
03FC;GREEK RHO WITH STROKE SYMBOL;Ll;0;ON;;;;;N;;;;;
03FC;GREEK RHO WITH STROKE SYMBOL;Ll;0;L;;;;;N;;;;;
03FD;GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL;Lu;0;L;;;;;N;;;;;
03FE;GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL;Lu;0;L;;;;;N;;;;;
03FF;GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL;Lu;0;L;;;;;N;;;;;
@ -1379,7 +1379,6 @@
05B7;HEBREW POINT PATAH;Mn;17;NSM;;;;;N;;;;;
05B8;HEBREW POINT QAMATS;Mn;18;NSM;;;;;N;;;;;
05B9;HEBREW POINT HOLAM;Mn;19;NSM;;;;;N;;;;;
05BA;HEBREW POINT QAMATS QATAN;Mn;18;NSM;;;;;N;;;;;
05BB;HEBREW POINT QUBUTS;Mn;20;NSM;;;;;N;;;;;
05BC;HEBREW POINT DAGESH OR MAPIQ;Mn;21;NSM;;;;;N;HEBREW POINT DAGESH;or shuruq;;;
05BD;HEBREW POINT METEG;Mn;22;NSM;;;;;N;;*;;;
@ -1392,6 +1391,7 @@
05C4;HEBREW MARK UPPER DOT;Mn;230;NSM;;;;;N;;;;;
05C5;HEBREW MARK LOWER DOT;Mn;220;NSM;;;;;N;;;;;
05C6;HEBREW PUNCTUATION NUN HAFUKHA;Po;0;R;;;;;N;;;;;
05C7;HEBREW POINT QAMATS QATAN;Mn;18;NSM;;;;;N;;;;;
05D0;HEBREW LETTER ALEF;Lo;0;R;;;;;N;;;;;
05D1;HEBREW LETTER BET;Lo;0;R;;;;;N;;;;;
05D2;HEBREW LETTER GIMEL;Lo;0;R;;;;;N;;;;;
@ -1497,8 +1497,8 @@
065A;ARABIC VOWEL SIGN SMALL V ABOVE;Mn;230;NSM;;;;;N;;;;;
065B;ARABIC VOWEL SIGN INVERTED SMALL V ABOVE;Mn;230;NSM;;;;;N;;;;;
065C;ARABIC VOWEL SIGN DOT BELOW;Mn;220;NSM;;;;;N;;;;;
065D;ARABIC REVERSED DAMMA;Mn;31;NSM;;;;;N;;;;;
065E;ARABIC FATHA WITH TWO DOTS;Mn;30;NSM;;;;;N;;;;;
065D;ARABIC REVERSED DAMMA;Mn;230;NSM;;;;;N;;;;;
065E;ARABIC FATHA WITH TWO DOTS;Mn;230;NSM;;;;;N;;;;;
0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;;
0661;ARABIC-INDIC DIGIT ONE;Nd;0;AN;;1;1;1;N;;;;;
0662;ARABIC-INDIC DIGIT TWO;Nd;0;AN;;2;2;2;N;;;;;
@ -3720,7 +3720,7 @@
1359;ETHIOPIC SYLLABLE MYA;Lo;0;L;;;;;N;;;;;
135A;ETHIOPIC SYLLABLE FYA;Lo;0;L;;;;;N;;;;;
135F;ETHIOPIC COMBINING GEMINATION MARK;Mn;230;NSM;;;;;N;;;;;
1360;ETHIOPIC SECTION MARK;Po;0;L;;;;;N;;;;;
1360;ETHIOPIC SECTION MARK;So;0;L;;;;;N;;;;;
1361;ETHIOPIC WORDSPACE;Po;0;L;;;;;N;;;;;
1362;ETHIOPIC FULL STOP;Po;0;L;;;;;N;;;;;
1363;ETHIOPIC COMMA;Po;0;L;;;;;N;;;;;
@ -5129,8 +5129,8 @@
19D7;NEW TAI LUE DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
19D8;NEW TAI LUE DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
19D9;NEW TAI LUE DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
19DE;NEW TAI LUE SIGN LE;So;0;ON;;;;;N;;;;;
19DF;NEW TAI LUE SIGN LEW;So;0;ON;;;;;N;;;;;
19DE;NEW TAI LUE SIGN LAE;Po;0;ON;;;;;N;;;;;
19DF;NEW TAI LUE SIGN LAEV;Po;0;ON;;;;;N;;;;;
19E0;KHMER SYMBOL PATHAMASAT;So;0;ON;;;;;N;;;;;
19E1;KHMER SYMBOL MUOY KOET;So;0;ON;;;;;N;;;;;
19E2;KHMER SYMBOL PII KOET;So;0;ON;;;;;N;;;;;
@ -5982,8 +5982,8 @@
2077;SUPERSCRIPT SEVEN;No;0;EN;<super> 0037;;7;7;N;SUPERSCRIPT DIGIT SEVEN;;;;
2078;SUPERSCRIPT EIGHT;No;0;EN;<super> 0038;;8;8;N;SUPERSCRIPT DIGIT EIGHT;;;;
2079;SUPERSCRIPT NINE;No;0;EN;<super> 0039;;9;9;N;SUPERSCRIPT DIGIT NINE;;;;
207A;SUPERSCRIPT PLUS SIGN;Sm;0;ET;<super> 002B;;;;N;;;;;
207B;SUPERSCRIPT MINUS;Sm;0;ET;<super> 2212;;;;N;SUPERSCRIPT HYPHEN-MINUS;;;;
207A;SUPERSCRIPT PLUS SIGN;Sm;0;ES;<super> 002B;;;;N;;;;;
207B;SUPERSCRIPT MINUS;Sm;0;ES;<super> 2212;;;;N;SUPERSCRIPT HYPHEN-MINUS;;;;
207C;SUPERSCRIPT EQUALS SIGN;Sm;0;ON;<super> 003D;;;;N;;;;;
207D;SUPERSCRIPT LEFT PARENTHESIS;Ps;0;ON;<super> 0028;;;;Y;SUPERSCRIPT OPENING PARENTHESIS;;;;
207E;SUPERSCRIPT RIGHT PARENTHESIS;Pe;0;ON;<super> 0029;;;;Y;SUPERSCRIPT CLOSING PARENTHESIS;;;;
@ -5998,8 +5998,8 @@
2087;SUBSCRIPT SEVEN;No;0;EN;<sub> 0037;;7;7;N;SUBSCRIPT DIGIT SEVEN;;;;
2088;SUBSCRIPT EIGHT;No;0;EN;<sub> 0038;;8;8;N;SUBSCRIPT DIGIT EIGHT;;;;
2089;SUBSCRIPT NINE;No;0;EN;<sub> 0039;;9;9;N;SUBSCRIPT DIGIT NINE;;;;
208A;SUBSCRIPT PLUS SIGN;Sm;0;ET;<sub> 002B;;;;N;;;;;
208B;SUBSCRIPT MINUS;Sm;0;ET;<sub> 2212;;;;N;SUBSCRIPT HYPHEN-MINUS;;;;
208A;SUBSCRIPT PLUS SIGN;Sm;0;ES;<sub> 002B;;;;N;;;;;
208B;SUBSCRIPT MINUS;Sm;0;ES;<sub> 2212;;;;N;SUBSCRIPT HYPHEN-MINUS;;;;
208C;SUBSCRIPT EQUALS SIGN;Sm;0;ON;<sub> 003D;;;;N;;;;;
208D;SUBSCRIPT LEFT PARENTHESIS;Ps;0;ON;<sub> 0028;;;;Y;SUBSCRIPT OPENING PARENTHESIS;;;;
208E;SUBSCRIPT RIGHT PARENTHESIS;Pe;0;ON;<sub> 0029;;;;Y;SUBSCRIPT CLOSING PARENTHESIS;;;;
@ -6314,7 +6314,7 @@
220F;N-ARY PRODUCT;Sm;0;ON;;;;;N;;;;;
2210;N-ARY COPRODUCT;Sm;0;ON;;;;;N;;;;;
2211;N-ARY SUMMATION;Sm;0;ON;;;;;Y;;;;;
2212;MINUS SIGN;Sm;0;ET;;;;;N;;;;;
2212;MINUS SIGN;Sm;0;ES;;;;;N;;;;;
2213;MINUS-OR-PLUS SIGN;Sm;0;ET;;;;;N;;;;;
2214;DOT PLUS;Sm;0;ON;;;;;N;;;;;
2215;DIVISION SLASH;Sm;0;ON;;;;;Y;;;;;
@ -8676,8 +8676,8 @@
2D31;TIFINAGH LETTER YAB;Lo;0;L;;;;;N;;;;;
2D32;TIFINAGH LETTER YABH;Lo;0;L;;;;;N;;;;;
2D33;TIFINAGH LETTER YAG;Lo;0;L;;;;;N;;;;;
2D34;TIFINAGH LETTER YAGGH;Lo;0;L;;;;;N;;;;;
2D35;TIFINAGH LETTER KABYLE YAJ;Lo;0;L;;;;;N;;;;;
2D34;TIFINAGH LETTER YAGHH;Lo;0;L;;;;;N;;;;;
2D35;TIFINAGH LETTER BERBER ACADEMY YAJ;Lo;0;L;;;;;N;;;;;
2D36;TIFINAGH LETTER YAJ;Lo;0;L;;;;;N;;;;;
2D37;TIFINAGH LETTER YAD;Lo;0;L;;;;;N;;;;;
2D38;TIFINAGH LETTER YADH;Lo;0;L;;;;;N;;;;;
@ -8687,14 +8687,14 @@
2D3C;TIFINAGH LETTER YAF;Lo;0;L;;;;;N;;;;;
2D3D;TIFINAGH LETTER YAK;Lo;0;L;;;;;N;;;;;
2D3E;TIFINAGH LETTER TUAREG YAK;Lo;0;L;;;;;N;;;;;
2D3F;TIFINAGH LETTER YAKKH;Lo;0;L;;;;;N;;;;;
2D3F;TIFINAGH LETTER YAKHH;Lo;0;L;;;;;N;;;;;
2D40;TIFINAGH LETTER YAH;Lo;0;L;;;;;N;;Tuareg yab;;;
2D41;TIFINAGH LETTER KABYLE YAH;Lo;0;L;;;;;N;;;;;
2D41;TIFINAGH LETTER BERBER ACADEMY YAH;Lo;0;L;;;;;N;;;;;
2D42;TIFINAGH LETTER TUAREG YAH;Lo;0;L;;;;;N;;;;;
2D43;TIFINAGH LETTER YAHH;Lo;0;L;;;;;N;;;;;
2D44;TIFINAGH LETTER YAA;Lo;0;L;;;;;N;;;;;
2D45;TIFINAGH LETTER YAKH;Lo;0;L;;;;;N;;;;;
2D46;TIFINAGH LETTER TUAREG YAKH;Lo;0;L;;;;;N;;Tuareg four-points;;;
2D46;TIFINAGH LETTER TUAREG YAKH;Lo;0;L;;;;;N;;;;;
2D47;TIFINAGH LETTER YAQ;Lo;0;L;;;;;N;;;;;
2D48;TIFINAGH LETTER TUAREG YAQ;Lo;0;L;;;;;N;;;;;
2D49;TIFINAGH LETTER YI;Lo;0;L;;;;;N;;;;;
@ -8707,12 +8707,12 @@
2D50;TIFINAGH LETTER TUAREG YAGN;Lo;0;L;;;;;N;;;;;
2D51;TIFINAGH LETTER TUAREG YANG;Lo;0;L;;;;;N;;;;;
2D52;TIFINAGH LETTER YAP;Lo;0;L;;;;;N;;;;;
2D53;TIFINAGH LETTER YU;Lo;0;L;;;;;N;;;;;
2D53;TIFINAGH LETTER YU;Lo;0;L;;;;;N;;Tuareg yaw;;;
2D54;TIFINAGH LETTER YAR;Lo;0;L;;;;;N;;;;;
2D55;TIFINAGH LETTER YARR;Lo;0;L;;;;;N;;;;;
2D56;TIFINAGH LETTER YAGH;Lo;0;L;;;;;N;;;;;
2D57;TIFINAGH LETTER TUAREG YAGH;Lo;0;L;;;;;N;;;;;
2D58;TIFINAGH LETTER AYER YAGH;Lo;0;L;;;;;N;;Adrar yaj, Tuareg staggered five-points;;;
2D58;TIFINAGH LETTER AYER YAGH;Lo;0;L;;;;;N;;Adrar yaj;;;
2D59;TIFINAGH LETTER YAS;Lo;0;L;;;;;N;;;;;
2D5A;TIFINAGH LETTER YASS;Lo;0;L;;;;;N;;;;;
2D5B;TIFINAGH LETTER YASH;Lo;0;L;;;;;N;;;;;
@ -8726,7 +8726,7 @@
2D63;TIFINAGH LETTER YAZ;Lo;0;L;;;;;N;;;;;
2D64;TIFINAGH LETTER TAWELLEMET YAZ;Lo;0;L;;;;;N;;harpoon yaz;;;
2D65;TIFINAGH LETTER YAZZ;Lo;0;L;;;;;N;;;;;
2D6F;TIFINAGH MODIFIER LETTER LABIALIZATION;Lm;0;L;<super> 2D61;;;;N;;;;;
2D6F;TIFINAGH MODIFIER LETTER LABIALIZATION MARK;Lm;0;L;<super> 2D61;;;;N;;tamatart;;;
2D80;ETHIOPIC SYLLABLE LOA;Lo;0;L;;;;;N;;;;;
2D81;ETHIOPIC SYLLABLE MOA;Lo;0;L;;;;;N;;;;;
2D82;ETHIOPIC SYLLABLE ROA;Lo;0;L;;;;;N;;;;;
@ -8808,15 +8808,15 @@
2DDE;ETHIOPIC SYLLABLE GYO;Lo;0;L;;;;;N;;;;;
2E00;RIGHT ANGLE SUBSTITUTION MARKER;Po;0;ON;;;;;N;;;;;
2E01;RIGHT ANGLE DOTTED SUBSTITUTION MARKER;Po;0;ON;;;;;N;;;;;
2E02;LEFT SUBSTITUTION BRACKET;Ps;0;ON;;;;;Y;;;;;
2E03;RIGHT SUBSTITUTION BRACKET;Pe;0;ON;;;;;Y;;;;;
2E04;LEFT DOTTED SUBSTITUTION BRACKET;Ps;0;ON;;;;;Y;;;;;
2E05;RIGHT DOTTED SUBSTITUTION BRACKET;Pe;0;ON;;;;;Y;;;;;
2E02;LEFT SUBSTITUTION BRACKET;Pi;0;ON;;;;;Y;;;;;
2E03;RIGHT SUBSTITUTION BRACKET;Pf;0;ON;;;;;Y;;;;;
2E04;LEFT DOTTED SUBSTITUTION BRACKET;Pi;0;ON;;;;;Y;;;;;
2E05;RIGHT DOTTED SUBSTITUTION BRACKET;Pf;0;ON;;;;;Y;;;;;
2E06;RAISED INTERPOLATION MARKER;Po;0;ON;;;;;N;;;;;
2E07;RAISED DOTTED INTERPOLATION MARKER;Po;0;ON;;;;;N;;;;;
2E08;DOTTED TRANSPOSITION MARKER;Po;0;ON;;;;;N;;;;;
2E09;LEFT TRANSPOSITION BRACKET;Ps;0;ON;;;;;Y;;;;;
2E0A;RIGHT TRANSPOSITION BRACKET;Pe;0;ON;;;;;Y;;;;;
2E09;LEFT TRANSPOSITION BRACKET;Pi;0;ON;;;;;Y;;;;;
2E0A;RIGHT TRANSPOSITION BRACKET;Pf;0;ON;;;;;Y;;;;;
2E0B;RAISED SQUARE;Po;0;ON;;;;;N;;;;;
2E0C;LEFT RAISED OMISSION BRACKET;Pi;0;ON;;;;;Y;;;;;
2E0D;RIGHT RAISED OMISSION BRACKET;Pf;0;ON;;;;;Y;;;;;
@ -8830,8 +8830,8 @@
2E15;UPWARDS ANCORA;Po;0;ON;;;;;N;;;;;
2E16;DOTTED RIGHT-POINTING ANGLE;Po;0;ON;;;;;N;;;;;
2E17;DOUBLE OBLIQUE HYPHEN;Pd;0;ON;;;;;N;;;;;
2E1C;LEFT LOW PARAPHRASE BRACKET;Ps;0;ON;;;;;Y;;;;;
2E1D;RIGHT LOW PARAPHRASE BRACKET;Pe;0;ON;;;;;Y;;;;;
2E1C;LEFT LOW PARAPHRASE BRACKET;Pi;0;ON;;;;;Y;;;;;
2E1D;RIGHT LOW PARAPHRASE BRACKET;Pf;0;ON;;;;;Y;;;;;
2E80;CJK RADICAL REPEAT;So;0;ON;;;;;N;;;;;
2E81;CJK RADICAL CLIFF;So;0;ON;;;;;N;;;;;
2E82;CJK RADICAL SECOND ONE;So;0;ON;;;;;N;;;;;
@ -9600,22 +9600,22 @@
31B5;BOPOMOFO FINAL LETTER T;Lo;0;L;;;;;N;;;;;
31B6;BOPOMOFO FINAL LETTER K;Lo;0;L;;;;;N;;;;;
31B7;BOPOMOFO FINAL LETTER H;Lo;0;L;;;;;N;;;;;
31C0;CJK BASIC STROKE T;So;0;ON;;;;;N;;;;;
31C1;CJK BASIC STROKE WG;So;0;ON;;;;;N;;;;;
31C2;CJK BASIC STROKE XG;So;0;ON;;;;;N;;;;;
31C3;CJK BASIC STROKE WOG;So;0;ON;;;;;N;;;;;
31C4;CJK BASIC STROKE SW;So;0;ON;;;;;N;;;;;
31C5;CJK BASIC STROKE HZZ;So;0;ON;;;;;N;;;;;
31C6;CJK BASIC STROKE HZG;So;0;ON;;;;;N;;;;;
31C7;CJK BASIC STROKE HP;So;0;ON;;;;;N;;;;;
31C8;CJK BASIC STROKE HZWG;So;0;ON;;;;;N;;;;;
31C9;CJK BASIC STROKE SZZG;So;0;ON;;;;;N;;;;;
31CA;CJK BASIC STROKE HZT;So;0;ON;;;;;N;;;;;
31CB;CJK BASIC STROKE HZZP;So;0;ON;;;;;N;;;;;
31CC;CJK BASIC STROKE HPWG;So;0;ON;;;;;N;;;;;
31CD;CJK BASIC STROKE HZW;So;0;ON;;;;;N;;;;;
31CE;CJK BASIC STROKE HZZZ;So;0;ON;;;;;N;;;;;
31CF;CJK BASIC STROKE N;So;0;ON;;;;;N;;;;;
31C0;CJK STROKE T;So;0;ON;;;;;N;;;;;
31C1;CJK STROKE WG;So;0;ON;;;;;N;;;;;
31C2;CJK STROKE XG;So;0;ON;;;;;N;;;;;
31C3;CJK STROKE BXG;So;0;ON;;;;;N;;;;;
31C4;CJK STROKE SW;So;0;ON;;;;;N;;;;;
31C5;CJK STROKE HZZ;So;0;ON;;;;;N;;;;;
31C6;CJK STROKE HZG;So;0;ON;;;;;N;;;;;
31C7;CJK STROKE HP;So;0;ON;;;;;N;;;;;
31C8;CJK STROKE HZWG;So;0;ON;;;;;N;;;;;
31C9;CJK STROKE SZWG;So;0;ON;;;;;N;;;;;
31CA;CJK STROKE HZT;So;0;ON;;;;;N;;;;;
31CB;CJK STROKE HZZP;So;0;ON;;;;;N;;;;;
31CC;CJK STROKE HPWG;So;0;ON;;;;;N;;;;;
31CD;CJK STROKE HZW;So;0;ON;;;;;N;;;;;
31CE;CJK STROKE HZZZ;So;0;ON;;;;;N;;;;;
31CF;CJK STROKE N;So;0;ON;;;;;N;;;;;
31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;;
31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;;
31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;;
@ -11935,7 +11935,7 @@ FABB;CJK COMPATIBILITY IDEOGRAPH-FABB;Lo;0;L;8ACB;;;;N;;;;;
FABC;CJK COMPATIBILITY IDEOGRAPH-FABC;Lo;0;L;8B01;;;;N;;;;;
FABD;CJK COMPATIBILITY IDEOGRAPH-FABD;Lo;0;L;8AFE;;;;N;;;;;
FABE;CJK COMPATIBILITY IDEOGRAPH-FABE;Lo;0;L;8AED;;;;N;;;;;
FABF;CJK COMPATIBILITY IDEOGRAPH-FABF;Lo;0;L;8BE9;;;;N;;;;;
FABF;CJK COMPATIBILITY IDEOGRAPH-FABF;Lo;0;L;8B39;;;;N;;;;;
FAC0;CJK COMPATIBILITY IDEOGRAPH-FAC0;Lo;0;L;8B8A;;;;N;;;;;
FAC1;CJK COMPATIBILITY IDEOGRAPH-FAC1;Lo;0;L;8D08;;;;N;;;;;
FAC2;CJK COMPATIBILITY IDEOGRAPH-FAC2;Lo;0;L;8F38;;;;N;;;;;
@ -11986,7 +11986,7 @@ FB25;HEBREW LETTER WIDE LAMED;Lo;0;R;<font> 05DC;;;;N;;;;;
FB26;HEBREW LETTER WIDE FINAL MEM;Lo;0;R;<font> 05DD;;;;N;;;;;
FB27;HEBREW LETTER WIDE RESH;Lo;0;R;<font> 05E8;;;;N;;;;;
FB28;HEBREW LETTER WIDE TAV;Lo;0;R;<font> 05EA;;;;N;;;;;
FB29;HEBREW LETTER ALTERNATIVE PLUS SIGN;Sm;0;ET;<font> 002B;;;;N;;;;;
FB29;HEBREW LETTER ALTERNATIVE PLUS SIGN;Sm;0;ES;<font> 002B;;;;N;;;;;
FB2A;HEBREW LETTER SHIN WITH SHIN DOT;Lo;0;R;05E9 05C1;;;;N;;;;;
FB2B;HEBREW LETTER SHIN WITH SIN DOT;Lo;0;R;05E9 05C2;;;;N;;;;;
FB2C;HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT;Lo;0;R;FB49 05C1;;;;N;;;;;
@ -12694,8 +12694,8 @@ FE5E;SMALL RIGHT TORTOISE SHELL BRACKET;Pe;0;ON;<small> 3015;;;;N;SMALL CLOSING
FE5F;SMALL NUMBER SIGN;Po;0;ET;<small> 0023;;;;N;;;;;
FE60;SMALL AMPERSAND;Po;0;ON;<small> 0026;;;;N;;;;;
FE61;SMALL ASTERISK;Po;0;ON;<small> 002A;;;;N;;;;;
FE62;SMALL PLUS SIGN;Sm;0;ET;<small> 002B;;;;N;;;;;
FE63;SMALL HYPHEN-MINUS;Pd;0;ET;<small> 002D;;;;N;;;;;
FE62;SMALL PLUS SIGN;Sm;0;ES;<small> 002B;;;;N;;;;;
FE63;SMALL HYPHEN-MINUS;Pd;0;ES;<small> 002D;;;;N;;;;;
FE64;SMALL LESS-THAN SIGN;Sm;0;ON;<small> 003C;;;;N;;;;;
FE65;SMALL GREATER-THAN SIGN;Sm;0;ON;<small> 003E;;;;N;;;;;
FE66;SMALL EQUALS SIGN;Sm;0;ON;<small> 003D;;;;N;;;;;
@ -12854,9 +12854,9 @@ FF07;FULLWIDTH APOSTROPHE;Po;0;ON;<wide> 0027;;;;N;;;;;
FF08;FULLWIDTH LEFT PARENTHESIS;Ps;0;ON;<wide> 0028;;;;Y;FULLWIDTH OPENING PARENTHESIS;;;;
FF09;FULLWIDTH RIGHT PARENTHESIS;Pe;0;ON;<wide> 0029;;;;Y;FULLWIDTH CLOSING PARENTHESIS;;;;
FF0A;FULLWIDTH ASTERISK;Po;0;ON;<wide> 002A;;;;N;;;;;
FF0B;FULLWIDTH PLUS SIGN;Sm;0;ET;<wide> 002B;;;;N;;;;;
FF0B;FULLWIDTH PLUS SIGN;Sm;0;ES;<wide> 002B;;;;N;;;;;
FF0C;FULLWIDTH COMMA;Po;0;CS;<wide> 002C;;;;N;;;;;
FF0D;FULLWIDTH HYPHEN-MINUS;Pd;0;ET;<wide> 002D;;;;N;;;;;
FF0D;FULLWIDTH HYPHEN-MINUS;Pd;0;ES;<wide> 002D;;;;N;;;;;
FF0E;FULLWIDTH FULL STOP;Po;0;CS;<wide> 002E;;;;N;FULLWIDTH PERIOD;;;;
FF0F;FULLWIDTH SOLIDUS;Po;0;CS;<wide> 002F;;;;N;FULLWIDTH SLASH;;;;
FF10;FULLWIDTH DIGIT ZERO;Nd;0;EN;<wide> 0030;0;0;0;N;;;;;
@ -13368,7 +13368,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
10157;GREEK ACROPHONIC ATTIC TEN MNAS;Nl;0;ON;;;;10;N;;;;;
10158;GREEK ACROPHONIC HERAEUM ONE PLETHRON;Nl;0;ON;;;;1;N;;;;;
10159;GREEK ACROPHONIC THESPIAN ONE;Nl;0;ON;;;;1;N;;;;;
1015A;GREEK ACROPHONIC HERMIONE ONE;Nl;0;ON;;;;1;N;;;;;
1015A;GREEK ACROPHONIC HERMIONIAN ONE;Nl;0;ON;;;;1;N;;;;;
1015B;GREEK ACROPHONIC EPIDAUREAN TWO;Nl;0;ON;;;;2;N;;;;;
1015C;GREEK ACROPHONIC THESPIAN TWO;Nl;0;ON;;;;2;N;;;;;
1015D;GREEK ACROPHONIC CYRENAIC TWO DRACHMAS;Nl;0;ON;;;;2;N;;;;;
@ -13478,7 +13478,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
10347;GOTHIC LETTER IGGWS;Lo;0;L;;;;;N;;;;;
10348;GOTHIC LETTER HWAIR;Lo;0;L;;;;;N;;;;;
10349;GOTHIC LETTER OTHAL;Lo;0;L;;;;;N;;;;;
1034A;GOTHIC LETTER NINE HUNDRED;Nl;0;L;;;;;N;;;;;
1034A;GOTHIC LETTER NINE HUNDRED;Nl;0;L;;;;900;N;;;;;
10380;UGARITIC LETTER ALPA;Lo;0;L;;;;;N;;;;;
10381;UGARITIC LETTER BETA;Lo;0;L;;;;;N;;;;;
10382;UGARITIC LETTER GAMLA;Lo;0;L;;;;;N;;;;;
@ -13831,10 +13831,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
10A39;KHAROSHTHI SIGN CAUDA;Mn;1;NSM;;;;;N;;;;;
10A3A;KHAROSHTHI SIGN DOT BELOW;Mn;220;NSM;;;;;N;;;;;
10A3F;KHAROSHTHI VIRAMA;Mn;9;NSM;;;;;N;;;;;
10A40;KHAROSHTHI DIGIT ONE;Nd;0;R;;1;1;1;N;;;;;
10A41;KHAROSHTHI DIGIT TWO;Nd;0;R;;2;2;2;N;;;;;
10A42;KHAROSHTHI DIGIT THREE;Nd;0;R;;3;3;3;N;;;;;
10A43;KHAROSHTHI DIGIT FOUR;Nd;0;R;;4;4;4;N;;;;;
10A40;KHAROSHTHI DIGIT ONE;No;0;R;;;1;1;N;;;;;
10A41;KHAROSHTHI DIGIT TWO;No;0;R;;;2;2;N;;;;;
10A42;KHAROSHTHI DIGIT THREE;No;0;R;;;3;3;N;;;;;
10A43;KHAROSHTHI DIGIT FOUR;No;0;R;;;4;4;N;;;;;
10A44;KHAROSHTHI NUMBER TEN;No;0;R;;;;10;N;;;;;
10A45;KHAROSHTHI NUMBER TWENTY;No;0;R;;;;20;N;;;;;
10A46;KHAROSHTHI NUMBER ONE HUNDRED;No;0;R;;;;100;N;;;;;

View File

@ -1,5 +1,5 @@
# WordBreakProperty-4.1.0.txt
# Date: 2004-12-11, 05:47:24 GMT [MD]
# Date: 2005-02-26, 02:43:08 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2005 Unicode, Inc.

View File

@ -2205,7 +2205,7 @@ TestAdditionalProperties() {
/* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
/* test default Bidi classes for unassigned code points */
{ 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x05c7, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x07f2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
{ 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },

View File

@ -1306,8 +1306,8 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C"
"-\\u045E\\u0476\\u0477\\u0483-\\u0486\\u04C1\\u04C2\\u04D0-"
"\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-"
"\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
"\\u05C5\\u0610-\\u0615\\u0622-\\u0626\\u064B-\\u065E\\u0670"
"\\u04F5\\u04F8\\u04F9\\u0591-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4"
"\\u05C5\\u05C7\\u0610-\\u0615\\u0622-\\u0626\\u064B-\\u065E\\u0670"
"\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
"\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u0929\\u0931"
"\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC"
@ -1365,7 +1365,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B"
"\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447"
"\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486\\u04D8\\u04D9"
"\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5"
"\\u04E8\\u04E9\\u0591-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7"
"\\u0610-\\u0615\\u0622\\u0623\\u0627\\u0648\\u064A-\\u065E"
"\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7"
"\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u0928\\u0930"
@ -1474,7 +1474,7 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483"
"-\\u0486\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-"
"\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587"
"\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u0610-"
"\\u0591-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-"
"\\u0615\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678"
"\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7"
"\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u0929\\u0931"
@ -1559,8 +1559,8 @@ initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) {
"\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423"
"\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E"
"\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0486"
"\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1"
"\\u05C2\\u05C4\\u05C5\\u0610-\\u0615\\u0622\\u0623\\u0627\\u0648"
"\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1"
"\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u0615\\u0622\\u0623\\u0627\\u0648"
"\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2\\u06D5-"
"\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711"
"\\u0730-\\u074A\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-"

File diff suppressed because it is too large Load Diff

View File

@ -102,6 +102,16 @@ my %UNSUPPORTED = (Composition_Exclusion => 1,
# missing.
my %MISSING_FROM_UCHAR;
# Additional property aliases beyond short and long names,
# like space in addition to WSpace and White_Space in Unicode 4.1.
# Hashtable, maps long name to alias.
# For example, maps White_Space->space.
#
# If multiple additional aliases are defined,
# then they are separated in the value string with '|'.
# For example, White_Space->space|outer_space
my %additional_property_aliases;
#----------------------------------------------------------------------
# Emitted class names
@ -574,14 +584,20 @@ sub merge_PropertyAliases {
for my $subh (map { $h->{$_} } @TOP) {
for my $enum (keys %$subh) {
my $name = $subh->{$enum};
die "Error: Property $name not found (or used more than once)"
unless (exists $pa->{$name});
my $long_name = $subh->{$enum};
if (!exists $pa->{$long_name}) {
die "Error: Property $long_name not found (or used more than once)";
}
$subh->{$enum} = $pa->{$name} . "|" . $name;
delete $pa->{$name};
my $value = $pa->{$long_name} . "|" . $long_name;
if (exists $additional_property_aliases{$long_name}) {
$value .= "|" . $additional_property_aliases{$long_name};
}
$subh->{$enum} = $value;
delete $pa->{$long_name};
}
}
my @err;
for my $name (keys %$pa) {
$MISSING_FROM_UCHAR{$pa->{$name}} = 1;
@ -750,6 +766,8 @@ sub read_PropertyAliases {
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
my $sym = 0; # Used to make "n/a" strings unique
while (<$in>) {
# Read version (embedded in a comment)
@ -768,14 +786,39 @@ sub read_PropertyAliases {
s/\#.*//;
next unless (/\S/);
if (/^\s*(.+?)\s*;\s*(.+?)\s*$/i) {
die "Error: Duplicate property $1 in $filename"
if (exists $hash->{$2});
$hash->{$2} = $1;
$fam->{$2} = $family;
}
if (/^\s*(.+?)\s*;/) {
my $short = $1;
my @fields = /;\s*([^\s;]+)/g;
if (@fields < 1 || @fields > 2) {
my $number = @fields;
die "Error: Wrong number of fields ($number) in $filename at $_";
}
else {
# Make "n/a" strings unique
my $long = $fields[0];
if ($long eq 'n/a') {
$long .= sprintf("%03d", $sym++);
}
# Add long name->short name to the hash=pa hash table
if (exists $hash->{$long}) {
die "Error: Duplicate property $long in $filename"
}
$hash->{$long} = $short;
$fam->{$long} = $family;
# Add the list of further aliases to the additional_property_aliases hash table,
# using the long property name as the key.
# For example:
# White_Space->space|outer_space
if (@fields > 1) {
my $value = pop @fields;
while (@fields > 1) {
$value .= "|" . pop @fields;
}
$additional_property_aliases{$long} = $value;
}
} else {
die "Error: Can't parse $_ in $filename";
}
}
@ -822,7 +865,7 @@ sub read_PropertyValueAliases {
if (/^\s*(.+?)\s*;/i) {
my $prop = $1;
my @fields = /;\s*([^\s;]+)/g;
die "Error: Wrong number of fields"
die "Error: Wrong number of fields in $filename"
if (@fields < 2 || @fields > 3);
# Make "n/a" strings unique
$fields[0] .= sprintf("%03d", $sym++) if ($fields[0] eq 'n/a');