ICU-9437 support UCD 6.2
X-SVN-Rev: 32062
This commit is contained in:
parent
ff29cbdfe4
commit
db9611caa9
@ -5,7 +5,7 @@
|
||||
* machine-generated by: icu/tools/unicode/py/preparseucd.py
|
||||
*/
|
||||
|
||||
#define UNICODE_VERSION { 6, 1, 0, 0 }
|
||||
#define UNICODE_VERSION { 6, 2, 0, 0 }
|
||||
|
||||
static const Value VALUES_binprop[2] = {
|
||||
Value(0, "N No F False"),
|
||||
@ -450,7 +450,7 @@ static const Value VALUES_jt[6] = {
|
||||
Value(U_JT_TRANSPARENT, "T Transparent"),
|
||||
};
|
||||
|
||||
static const Value VALUES_lb[39] = {
|
||||
static const Value VALUES_lb[40] = {
|
||||
Value(U_LB_UNKNOWN, "XX Unknown"),
|
||||
Value(U_LB_AMBIGUOUS, "AI Ambiguous"),
|
||||
Value(U_LB_ALPHABETIC, "AL Alphabetic"),
|
||||
@ -490,6 +490,7 @@ static const Value VALUES_lb[39] = {
|
||||
Value(U_LB_CLOSE_PARENTHESIS, "CP Close_Parenthesis"),
|
||||
Value(U_LB_CONDITIONAL_JAPANESE_STARTER, "CJ Conditional_Japanese_Starter"),
|
||||
Value(U_LB_HEBREW_LETTER, "HL Hebrew_Letter"),
|
||||
Value(U_LB_ZERO_WIDTH_JOINER, "ZJ Zero_Width_Joiner"),
|
||||
};
|
||||
|
||||
static const Value VALUES_nt[4] = {
|
||||
@ -810,7 +811,7 @@ static const Value VALUES_tccc[56] = {
|
||||
Value(240, "IS Iota_Subscript"),
|
||||
};
|
||||
|
||||
static const Value VALUES_GCB[12] = {
|
||||
static const Value VALUES_GCB[14] = {
|
||||
Value(U_GCB_OTHER, "XX Other"),
|
||||
Value(U_GCB_CONTROL, "CN Control"),
|
||||
Value(U_GCB_CR, "CR CR"),
|
||||
@ -823,6 +824,8 @@ static const Value VALUES_GCB[12] = {
|
||||
Value(U_GCB_V, "V V"),
|
||||
Value(U_GCB_SPACING_MARK, "SM SpacingMark"),
|
||||
Value(U_GCB_PREPEND, "PP Prepend"),
|
||||
Value(U_GCB_AFTER_JOINER, "AJ After_Joiner"),
|
||||
Value(U_GCB_JOINER, "J Joiner"),
|
||||
};
|
||||
|
||||
static const Value VALUES_SB[15] = {
|
||||
@ -843,7 +846,7 @@ static const Value VALUES_SB[15] = {
|
||||
Value(U_SB_SCONTINUE, "SC SContinue"),
|
||||
};
|
||||
|
||||
static const Value VALUES_WB[13] = {
|
||||
static const Value VALUES_WB[15] = {
|
||||
Value(U_WB_OTHER, "XX Other"),
|
||||
Value(U_WB_ALETTER, "LE ALetter"),
|
||||
Value(U_WB_FORMAT, "FO Format"),
|
||||
@ -857,6 +860,8 @@ static const Value VALUES_WB[13] = {
|
||||
Value(U_WB_LF, "LF LF"),
|
||||
Value(U_WB_MIDNUMLET, "MB MidNumLet"),
|
||||
Value(U_WB_NEWLINE, "NL Newline"),
|
||||
Value(U_WB_AFTER_JOINER, "AJ After_Joiner"),
|
||||
Value(U_WB_JOINER, "J Joiner"),
|
||||
};
|
||||
|
||||
static const Value VALUES_gcm[38] = {
|
||||
@ -966,7 +971,7 @@ static const Property PROPERTIES[94] = {
|
||||
Property(UCHAR_GENERAL_CATEGORY, "gc General_Category", VALUES_gc, 30),
|
||||
Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 58),
|
||||
Property(UCHAR_JOINING_TYPE, "jt Joining_Type", VALUES_jt, 6),
|
||||
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 39),
|
||||
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 40),
|
||||
Property(UCHAR_NUMERIC_TYPE, "nt Numeric_Type", VALUES_nt, 4),
|
||||
Property(UCHAR_SCRIPT, "sc Script", VALUES_sc, 159),
|
||||
Property(UCHAR_HANGUL_SYLLABLE_TYPE, "hst Hangul_Syllable_Type", VALUES_hst, 6),
|
||||
@ -976,9 +981,9 @@ static const Property PROPERTIES[94] = {
|
||||
Property(UCHAR_NFKC_QUICK_CHECK, "NFKC_QC NFKC_Quick_Check", VALUES_NFKC_QC, 3),
|
||||
Property(UCHAR_LEAD_CANONICAL_COMBINING_CLASS, "lccc Lead_Canonical_Combining_Class", VALUES_lccc, 56),
|
||||
Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 56),
|
||||
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 12),
|
||||
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 14),
|
||||
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, 15),
|
||||
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 13),
|
||||
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 15),
|
||||
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
|
||||
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
|
||||
Property(UCHAR_AGE, "age Age"),
|
||||
|
@ -2029,10 +2029,13 @@ def main():
|
||||
for (basename, path, parser) in files:
|
||||
print "Parsing %s" % basename
|
||||
value = _files[basename]
|
||||
# Unicode data files are in UTF-8.
|
||||
charset = "UTF-8"
|
||||
if basename == "NamesList.txt":
|
||||
in_file = codecs.open(path, "r", "ISO-8859-1")
|
||||
else:
|
||||
in_file = open(path, "r")
|
||||
# The NamesList used to be in Latin-1 before Unicode 6.2.
|
||||
numeric_ucd_version = [int(field) for field in _ucd_version.split('.')]
|
||||
if numeric_ucd_version < [6, 2]: charset = "ISO-8859-1"
|
||||
in_file = codecs.open(path, "r", charset)
|
||||
with in_file:
|
||||
parser(in_file)
|
||||
_null_or_defaults = _null_values.copy()
|
||||
|
Loading…
Reference in New Issue
Block a user