ICU-10821 initial tools update for Unicode 7.0

X-SVN-Rev: 35596
This commit is contained in:
Markus Scherer 2014-04-03 22:43:00 +00:00
parent e3a6ed0372
commit f440aa17d9
2 changed files with 101 additions and 49 deletions

View File

@ -1,11 +1,11 @@
/**
* Copyright (C) 2002-2013, International Business Machines Corporation and
* Copyright (C) 2002-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*
* machine-generated by: icu/tools/unicode/py/preparseucd.py
*/
#define UNICODE_VERSION { 6, 3, 0, 0 }
#define UNICODE_VERSION { 7, 0, 0, 0 }
static const Value VALUES_binprop[2] = {
Value(0, "N No F False"),
@ -38,7 +38,7 @@ static const Value VALUES_bc[23] = {
Value(U_POP_DIRECTIONAL_ISOLATE, "PDI Pop_Directional_Isolate"),
};
static const Value VALUES_blk[221] = {
static const Value VALUES_blk[253] = {
Value(UBLOCK_NO_BLOCK, "NB No_Block"),
Value(UBLOCK_BASIC_LATIN, "ASCII Basic_Latin"),
Value(UBLOCK_LATIN_1_SUPPLEMENT, "Latin_1_Sup Latin_1_Supplement Latin_1"),
@ -260,6 +260,38 @@ static const Value VALUES_blk[221] = {
Value(UBLOCK_SORA_SOMPENG, "Sora_Sompeng Sora_Sompeng"),
Value(UBLOCK_SUNDANESE_SUPPLEMENT, "Sundanese_Sup Sundanese_Supplement"),
Value(UBLOCK_TAKRI, "Takri Takri"),
Value(UBLOCK_BASSA_VAH, "Bassa_Vah Bassa_Vah"),
Value(UBLOCK_CAUCASIAN_ALBANIAN, "Caucasian_Albanian Caucasian_Albanian"),
Value(UBLOCK_COPTIC_EPACT_NUMBERS, "Coptic_Epact_Numbers Coptic_Epact_Numbers"),
Value(UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED, "Diacriticals_Ext Combining_Diacritical_Marks_Extended"),
Value(UBLOCK_DUPLOYAN, "Duployan Duployan"),
Value(UBLOCK_ELBASAN, "Elbasan Elbasan"),
Value(UBLOCK_GEOMETRIC_SHAPES_EXTENDED, "Geometric_Shapes_Ext Geometric_Shapes_Extended"),
Value(UBLOCK_GRANTHA, "Grantha Grantha"),
Value(UBLOCK_KHOJKI, "Khojki Khojki"),
Value(UBLOCK_KHUDAWADI, "Khudawadi Khudawadi"),
Value(UBLOCK_LATIN_EXTENDED_E, "Latin_Ext_E Latin_Extended_E"),
Value(UBLOCK_LINEAR_A, "Linear_A Linear_A"),
Value(UBLOCK_MAHAJANI, "Mahajani Mahajani"),
Value(UBLOCK_MANICHAEAN, "Manichaean Manichaean"),
Value(UBLOCK_MENDE_KIKAKUI, "Mende_Kikakui Mende_Kikakui"),
Value(UBLOCK_MODI, "Modi Modi"),
Value(UBLOCK_MRO, "Mro Mro"),
Value(UBLOCK_MYANMAR_EXTENDED_B, "Myanmar_Ext_B Myanmar_Extended_B"),
Value(UBLOCK_NABATAEAN, "Nabataean Nabataean"),
Value(UBLOCK_OLD_NORTH_ARABIAN, "Old_North_Arabian Old_North_Arabian"),
Value(UBLOCK_OLD_PERMIC, "Old_Permic Old_Permic"),
Value(UBLOCK_ORNAMENTAL_DINGBATS, "Ornamental_Dingbats Ornamental_Dingbats"),
Value(UBLOCK_PAHAWH_HMONG, "Pahawh_Hmong Pahawh_Hmong"),
Value(UBLOCK_PALMYRENE, "Palmyrene Palmyrene"),
Value(UBLOCK_PAU_CIN_HAU, "Pau_Cin_Hau Pau_Cin_Hau"),
Value(UBLOCK_PSALTER_PAHLAVI, "Psalter_Pahlavi Psalter_Pahlavi"),
Value(UBLOCK_SHORTHAND_FORMAT_CONTROLS, "Shorthand_Format_Controls Shorthand_Format_Controls"),
Value(UBLOCK_SIDDHAM, "Siddham Siddham"),
Value(UBLOCK_SINHALA_ARCHAIC_NUMBERS, "Sinhala_Archaic_Numbers Sinhala_Archaic_Numbers"),
Value(UBLOCK_SUPPLEMENTAL_ARROWS_C, "Sup_Arrows_C Supplemental_Arrows_C"),
Value(UBLOCK_TIRHUTA, "Tirhuta Tirhuta"),
Value(UBLOCK_WARANG_CITI, "Warang_Citi Warang_Citi"),
};
static const Value VALUES_ccc[57] = {
@ -385,7 +417,7 @@ static const Value VALUES_gc[30] = {
Value(U_FINAL_PUNCTUATION, "Pf Final_Punctuation"),
};
static const Value VALUES_jg[58] = {
static const Value VALUES_jg[86] = {
Value(U_JG_NO_JOINING_GROUP, "No_Joining_Group No_Joining_Group"),
Value(U_JG_AIN, "Ain Ain"),
Value(U_JG_ALAPH, "Alaph Alaph"),
@ -444,6 +476,34 @@ static const Value VALUES_jg[58] = {
Value(U_JG_FARSI_YEH, "Farsi_Yeh Farsi_Yeh"),
Value(U_JG_NYA, "Nya Nya"),
Value(U_JG_ROHINGYA_YEH, "Rohingya_Yeh Rohingya_Yeh"),
Value(U_JG_MANICHAEAN_ALEPH, "Manichaean_Aleph Manichaean_Aleph"),
Value(U_JG_MANICHAEAN_AYIN, "Manichaean_Ayin Manichaean_Ayin"),
Value(U_JG_MANICHAEAN_BETH, "Manichaean_Beth Manichaean_Beth"),
Value(U_JG_MANICHAEAN_DALETH, "Manichaean_Daleth Manichaean_Daleth"),
Value(U_JG_MANICHAEAN_DHAMEDH, "Manichaean_Dhamedh Manichaean_Dhamedh"),
Value(U_JG_MANICHAEAN_FIVE, "Manichaean_Five Manichaean_Five"),
Value(U_JG_MANICHAEAN_GIMEL, "Manichaean_Gimel Manichaean_Gimel"),
Value(U_JG_MANICHAEAN_HETH, "Manichaean_Heth Manichaean_Heth"),
Value(U_JG_MANICHAEAN_HUNDRED, "Manichaean_Hundred Manichaean_Hundred"),
Value(U_JG_MANICHAEAN_KAPH, "Manichaean_Kaph Manichaean_Kaph"),
Value(U_JG_MANICHAEAN_LAMEDH, "Manichaean_Lamedh Manichaean_Lamedh"),
Value(U_JG_MANICHAEAN_MEM, "Manichaean_Mem Manichaean_Mem"),
Value(U_JG_MANICHAEAN_NUN, "Manichaean_Nun Manichaean_Nun"),
Value(U_JG_MANICHAEAN_ONE, "Manichaean_One Manichaean_One"),
Value(U_JG_MANICHAEAN_PE, "Manichaean_Pe Manichaean_Pe"),
Value(U_JG_MANICHAEAN_QOPH, "Manichaean_Qoph Manichaean_Qoph"),
Value(U_JG_MANICHAEAN_RESH, "Manichaean_Resh Manichaean_Resh"),
Value(U_JG_MANICHAEAN_SADHE, "Manichaean_Sadhe Manichaean_Sadhe"),
Value(U_JG_MANICHAEAN_SAMEKH, "Manichaean_Samekh Manichaean_Samekh"),
Value(U_JG_MANICHAEAN_TAW, "Manichaean_Taw Manichaean_Taw"),
Value(U_JG_MANICHAEAN_TEN, "Manichaean_Ten Manichaean_Ten"),
Value(U_JG_MANICHAEAN_TETH, "Manichaean_Teth Manichaean_Teth"),
Value(U_JG_MANICHAEAN_THAMEDH, "Manichaean_Thamedh Manichaean_Thamedh"),
Value(U_JG_MANICHAEAN_TWENTY, "Manichaean_Twenty Manichaean_Twenty"),
Value(U_JG_MANICHAEAN_WAW, "Manichaean_Waw Manichaean_Waw"),
Value(U_JG_MANICHAEAN_YODH, "Manichaean_Yodh Manichaean_Yodh"),
Value(U_JG_MANICHAEAN_ZAYIN, "Manichaean_Zayin Manichaean_Zayin"),
Value(U_JG_STRAIGHT_WAW, "Straight_Waw Straight_Waw"),
};
static const Value VALUES_jt[6] = {
@ -505,7 +565,7 @@ static const Value VALUES_nt[4] = {
Value(U_NT_NUMERIC, "Nu Numeric"),
};
static const Value VALUES_sc[161] = {
static const Value VALUES_sc[167] = {
Value(USCRIPT_COMMON, "Zyyy Common"),
Value(USCRIPT_INHERITED, "Zinh Inherited Qaai"),
Value(USCRIPT_ARABIC, "Arab Arabic"),
@ -581,7 +641,7 @@ static const Value VALUES_sc[161] = {
Value(USCRIPT_KHUTSURI, "Geok Geok"),
Value(USCRIPT_SIMPLIFIED_HAN, "Hans Hans"),
Value(USCRIPT_TRADITIONAL_HAN, "Hant Hant"),
Value(USCRIPT_PAHAWH_HMONG, "Hmng Hmng"),
Value(USCRIPT_PAHAWH_HMONG, "Hmng Pahawh_Hmong"),
Value(USCRIPT_OLD_HUNGARIAN, "Hung Hung"),
Value(USCRIPT_HARAPPAN_INDUS, "Inds Inds"),
Value(USCRIPT_JAVANESE, "Java Javanese"),
@ -589,13 +649,13 @@ static const Value VALUES_sc[161] = {
Value(USCRIPT_LATIN_FRAKTUR, "Latf Latf"),
Value(USCRIPT_LATIN_GAELIC, "Latg Latg"),
Value(USCRIPT_LEPCHA, "Lepc Lepcha"),
Value(USCRIPT_LINEAR_A, "Lina Lina"),
Value(USCRIPT_LINEAR_A, "Lina Linear_A"),
Value(USCRIPT_MANDAIC, "Mand Mandaic"),
Value(USCRIPT_MAYAN_HIEROGLYPHS, "Maya Maya"),
Value(USCRIPT_MEROITIC_HIEROGLYPHS, "Mero Meroitic_Hieroglyphs"),
Value(USCRIPT_NKO, "Nkoo Nko"),
Value(USCRIPT_ORKHON, "Orkh Old_Turkic"),
Value(USCRIPT_OLD_PERMIC, "Perm Perm"),
Value(USCRIPT_OLD_PERMIC, "Perm Old_Permic"),
Value(USCRIPT_PHAGS_PA, "Phag Phags_Pa"),
Value(USCRIPT_PHOENICIAN, "Phnx Phoenician"),
Value(USCRIPT_MIAO, "Plrd Miao"),
@ -627,9 +687,9 @@ static const Value VALUES_sc[161] = {
Value(USCRIPT_CHAKMA, "Cakm Chakma"),
Value(USCRIPT_KOREAN, "Kore Kore"),
Value(USCRIPT_KAITHI, "Kthi Kaithi"),
Value(USCRIPT_MANICHAEAN, "Mani Mani"),
Value(USCRIPT_MANICHAEAN, "Mani Manichaean"),
Value(USCRIPT_INSCRIPTIONAL_PAHLAVI, "Phli Inscriptional_Pahlavi"),
Value(USCRIPT_PSALTER_PAHLAVI, "Phlp Phlp"),
Value(USCRIPT_PSALTER_PAHLAVI, "Phlp Psalter_Pahlavi"),
Value(USCRIPT_BOOK_PAHLAVI, "Phlv Phlv"),
Value(USCRIPT_INSCRIPTIONAL_PARTHIAN, "Prti Inscriptional_Parthian"),
Value(USCRIPT_SAMARITAN, "Samr Samaritan"),
@ -640,22 +700,22 @@ static const Value VALUES_sc[161] = {
Value(USCRIPT_LISU, "Lisu Lisu"),
Value(USCRIPT_NAKHI_GEBA, "Nkgb Nkgb"),
Value(USCRIPT_OLD_SOUTH_ARABIAN, "Sarb Old_South_Arabian"),
Value(USCRIPT_BASSA_VAH, "Bass Bass"),
Value(USCRIPT_DUPLOYAN_SHORTAND, "Dupl Dupl"),
Value(USCRIPT_ELBASAN, "Elba Elba"),
Value(USCRIPT_GRANTHA, "Gran Gran"),
Value(USCRIPT_BASSA_VAH, "Bass Bassa_Vah"),
Value(USCRIPT_DUPLOYAN_SHORTAND, "Dupl Duployan"),
Value(USCRIPT_ELBASAN, "Elba Elbasan"),
Value(USCRIPT_GRANTHA, "Gran Grantha"),
Value(USCRIPT_KPELLE, "Kpel Kpel"),
Value(USCRIPT_LOMA, "Loma Loma"),
Value(USCRIPT_MENDE, "Mend Mend"),
Value(USCRIPT_MENDE, "Mend Mende_Kikakui"),
Value(USCRIPT_MEROITIC_CURSIVE, "Merc Meroitic_Cursive"),
Value(USCRIPT_OLD_NORTH_ARABIAN, "Narb Narb"),
Value(USCRIPT_NABATAEAN, "Nbat Nbat"),
Value(USCRIPT_PALMYRENE, "Palm Palm"),
Value(USCRIPT_SINDHI, "Sind Sind"),
Value(USCRIPT_WARANG_CITI, "Wara Wara"),
Value(USCRIPT_OLD_NORTH_ARABIAN, "Narb Old_North_Arabian"),
Value(USCRIPT_NABATAEAN, "Nbat Nabataean"),
Value(USCRIPT_PALMYRENE, "Palm Palmyrene"),
Value(USCRIPT_KHUDAWADI, "Sind Khudawadi"),
Value(USCRIPT_WARANG_CITI, "Wara Warang_Citi"),
Value(USCRIPT_AFAKA, "Afak Afak"),
Value(USCRIPT_JURCHEN, "Jurc Jurc"),
Value(USCRIPT_MRO, "Mroo Mroo"),
Value(USCRIPT_MRO, "Mroo Mro"),
Value(USCRIPT_NUSHU, "Nshu Nshu"),
Value(USCRIPT_SHARADA, "Shrd Sharada"),
Value(USCRIPT_SORA_SOMPENG, "Sora Sora_Sompeng"),
@ -663,10 +723,16 @@ static const Value VALUES_sc[161] = {
Value(USCRIPT_TANGUT, "Tang Tang"),
Value(USCRIPT_WOLEAI, "Wole Wole"),
Value(USCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw Hluw"),
Value(USCRIPT_KHOJKI, "Khoj Khoj"),
Value(USCRIPT_TIRHUTA, "Tirh Tirh"),
Value(USCRIPT_CAUCASIAN_ALBANIAN, "Aghb Aghb"),
Value(USCRIPT_MAHAJANI, "Mahj Mahj"),
Value(USCRIPT_KHOJKI, "Khoj Khojki"),
Value(USCRIPT_TIRHUTA, "Tirh Tirhuta"),
Value(USCRIPT_CAUCASIAN_ALBANIAN, "Aghb Caucasian_Albanian"),
Value(USCRIPT_MAHAJANI, "Mahj Mahajani"),
Value(USCRIPT_AHOM, "Ahom Ahom"),
Value(USCRIPT_HATRAN, "Hatr Hatr"),
Value(USCRIPT_MODI, "Modi Modi"),
Value(USCRIPT_MULTANI, "Mult Mult"),
Value(USCRIPT_PAU_CIN_HAU, "Pauc Pau_Cin_Hau"),
Value(USCRIPT_SIDDHAM, "Sidd Siddham"),
};
static const Value VALUES_hst[6] = {
@ -980,16 +1046,16 @@ static const Property PROPERTIES[96] = {
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"),
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 221),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 253),
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57),
Property(UCHAR_DECOMPOSITION_TYPE, "dt Decomposition_Type", VALUES_dt, 18),
Property(UCHAR_EAST_ASIAN_WIDTH, "ea East_Asian_Width", VALUES_ea, 6),
Property(UCHAR_GENERAL_CATEGORY, "gc General_Category", VALUES_gc, 30),
Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 58),
Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 86),
Property(UCHAR_JOINING_TYPE, "jt Joining_Type", VALUES_jt, 6),
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 40),
Property(UCHAR_NUMERIC_TYPE, "nt Numeric_Type", VALUES_nt, 4),
Property(UCHAR_SCRIPT, "sc Script", VALUES_sc, 161),
Property(UCHAR_SCRIPT, "sc Script", VALUES_sc, 167),
Property(UCHAR_HANGUL_SYLLABLE_TYPE, "hst Hangul_Syllable_Type", VALUES_hst, 6),
Property(UCHAR_NFD_QUICK_CHECK, "NFD_QC NFD_Quick_Check", VALUES_NFD_QC, 2),
Property(UCHAR_NFKD_QUICK_CHECK, "NFKD_QC NFKD_Quick_Check", VALUES_NFKD_QC, 2),

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2009-2013 International Business Machines
# Copyright (c) 2009-2014 International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: preparseucd.py
@ -46,27 +46,13 @@ _terms_of_use = ""
# Script codes from ISO 15924 http://www.unicode.org/iso15924/codechanges.html
# that are not yet in the UCD.
_scripts_only_in_iso15924 = (
"Blis", "Cirt", "Cyrs",
"Afak", "Ahom", "Blis", "Cirt", "Cyrs",
"Egyd", "Egyh", "Geok",
"Hans", "Hant", "Hmng", "Hung",
"Inds", "Jpan", "Latf", "Latg", "Lina",
"Maya", "Moon", "Perm", "Roro",
"Hans", "Hant", "Hatr", "Hluw", "Hung",
"Inds", "Jpan", "Jurc", "Kore", "Kpel", "Latf", "Latg", "Loma",
"Maya", "Moon", "Mult", "Nkgb", "Nshu", "Phlv", "Roro",
"Sara", "Sgnw", "Syre", "Syrj", "Syrn",
"Teng", "Visp", "Zxxx",
"Kore", "Mani", "Phlp", "Phlv", "Zmth", "Zsym",
"Nkgb",
"Bass", "Dupl", "Elba", "Gran",
"Kpel", "Loma", "Mend", "Narb", "Nbat",
"Palm", "Sind", "Wara",
"Afak", "Jurc", "Mroo", "Nshu", "Tang", "Wole",
"Hluw", "Khoj", "Tirh",
"Aghb", "Mahj"
"Tang", "Teng", "Visp", "Wole", "Zmth", "Zsym", "Zxxx"
)
# Properties --------------------------------------------------------------- ***
@ -2081,7 +2067,7 @@ def main():
CompactBlocks()
# Write the ppucd.txt output file.
out_path = os.path.join(unidata_path, "ppucd.txt")
with open(out_path, "w") as out_file:
with codecs.open(out_path, "w", "UTF-8") as out_file:
WritePreparsedUCD(out_file)
out_file.flush()