ICU-9437 support UCD 6.2

X-SVN-Rev: 32062
This commit is contained in:
Markus Scherer 2012-07-24 21:11:29 +00:00
parent ff29cbdfe4
commit db9611caa9
2 changed files with 18 additions and 10 deletions

View File

@ -5,7 +5,7 @@
* machine-generated by: icu/tools/unicode/py/preparseucd.py
*/
#define UNICODE_VERSION { 6, 1, 0, 0 }
#define UNICODE_VERSION { 6, 2, 0, 0 }
static const Value VALUES_binprop[2] = {
Value(0, "N No F False"),
@ -450,7 +450,7 @@ static const Value VALUES_jt[6] = {
Value(U_JT_TRANSPARENT, "T Transparent"),
};
static const Value VALUES_lb[39] = {
static const Value VALUES_lb[40] = {
Value(U_LB_UNKNOWN, "XX Unknown"),
Value(U_LB_AMBIGUOUS, "AI Ambiguous"),
Value(U_LB_ALPHABETIC, "AL Alphabetic"),
@ -490,6 +490,7 @@ static const Value VALUES_lb[39] = {
Value(U_LB_CLOSE_PARENTHESIS, "CP Close_Parenthesis"),
Value(U_LB_CONDITIONAL_JAPANESE_STARTER, "CJ Conditional_Japanese_Starter"),
Value(U_LB_HEBREW_LETTER, "HL Hebrew_Letter"),
Value(U_LB_ZERO_WIDTH_JOINER, "ZJ Zero_Width_Joiner"),
};
static const Value VALUES_nt[4] = {
@ -810,7 +811,7 @@ static const Value VALUES_tccc[56] = {
Value(240, "IS Iota_Subscript"),
};
static const Value VALUES_GCB[12] = {
static const Value VALUES_GCB[14] = {
Value(U_GCB_OTHER, "XX Other"),
Value(U_GCB_CONTROL, "CN Control"),
Value(U_GCB_CR, "CR CR"),
@ -823,6 +824,8 @@ static const Value VALUES_GCB[12] = {
Value(U_GCB_V, "V V"),
Value(U_GCB_SPACING_MARK, "SM SpacingMark"),
Value(U_GCB_PREPEND, "PP Prepend"),
Value(U_GCB_AFTER_JOINER, "AJ After_Joiner"),
Value(U_GCB_JOINER, "J Joiner"),
};
static const Value VALUES_SB[15] = {
@ -843,7 +846,7 @@ static const Value VALUES_SB[15] = {
Value(U_SB_SCONTINUE, "SC SContinue"),
};
static const Value VALUES_WB[13] = {
static const Value VALUES_WB[15] = {
Value(U_WB_OTHER, "XX Other"),
Value(U_WB_ALETTER, "LE ALetter"),
Value(U_WB_FORMAT, "FO Format"),
@ -857,6 +860,8 @@ static const Value VALUES_WB[13] = {
Value(U_WB_LF, "LF LF"),
Value(U_WB_MIDNUMLET, "MB MidNumLet"),
Value(U_WB_NEWLINE, "NL Newline"),
Value(U_WB_AFTER_JOINER, "AJ After_Joiner"),
Value(U_WB_JOINER, "J Joiner"),
};
static const Value VALUES_gcm[38] = {
@ -966,7 +971,7 @@ static const Property PROPERTIES[94] = {
Property(UCHAR_GENERAL_CATEGORY, "gc General_Category", VALUES_gc, 30),
Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 58),
Property(UCHAR_JOINING_TYPE, "jt Joining_Type", VALUES_jt, 6),
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 39),
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 40),
Property(UCHAR_NUMERIC_TYPE, "nt Numeric_Type", VALUES_nt, 4),
Property(UCHAR_SCRIPT, "sc Script", VALUES_sc, 159),
Property(UCHAR_HANGUL_SYLLABLE_TYPE, "hst Hangul_Syllable_Type", VALUES_hst, 6),
@ -976,9 +981,9 @@ static const Property PROPERTIES[94] = {
Property(UCHAR_NFKC_QUICK_CHECK, "NFKC_QC NFKC_Quick_Check", VALUES_NFKC_QC, 3),
Property(UCHAR_LEAD_CANONICAL_COMBINING_CLASS, "lccc Lead_Canonical_Combining_Class", VALUES_lccc, 56),
Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 56),
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 12),
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 14),
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, 15),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 13),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 15),
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
Property(UCHAR_AGE, "age Age"),

View File

@ -2029,10 +2029,13 @@ def main():
for (basename, path, parser) in files:
print "Parsing %s" % basename
value = _files[basename]
# Unicode data files are in UTF-8.
charset = "UTF-8"
if basename == "NamesList.txt":
in_file = codecs.open(path, "r", "ISO-8859-1")
else:
in_file = open(path, "r")
# The NamesList used to be in Latin-1 before Unicode 6.2.
numeric_ucd_version = [int(field) for field in _ucd_version.split('.')]
if numeric_ucd_version < [6, 2]: charset = "ISO-8859-1"
in_file = codecs.open(path, "r", charset)
with in_file:
parser(in_file)
_null_or_defaults = _null_values.copy()