diff --git a/tools/unicode/c/genprops/corepropsbuilder.cpp b/tools/unicode/c/genprops/corepropsbuilder.cpp index 531595c3e7..26b254ad19 100644 --- a/tools/unicode/c/genprops/corepropsbuilder.cpp +++ b/tools/unicode/c/genprops/corepropsbuilder.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2015, International Business Machines +* Copyright (C) 1999-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -45,7 +45,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure precedes the actual data. It contains platform properties values and the file format version. -The following is a description of format version 7.1 . +The following is a description of format version 7.2 . Data contents: @@ -244,6 +244,12 @@ Unicode 6.2 adds sexagesimal (base-60) numeric values: The encoding of numeric values was extended to handle such values. +--- Changes in format version 7.2 --- + +ICU 57 adds 4 Emoji properties to vector word 2. +http://bugs.icu-project.org/trac/ticket/11802 +http://www.unicode.org/reports/tr51/#Emoji_Properties + ----------------------------------------------------------------------------- */ U_NAMESPACE_USE @@ -259,8 +265,8 @@ static UDataInfo dataInfo={ 0, { 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */ - { 7, 1, 0, 0 }, /* formatVersion */ - { 6, 2, 0, 0 } /* dataVersion */ + { 7, 2, 0, 0 }, /* formatVersion */ + { 8, 0, 0, 0 } /* dataVersion */ }; class CorePropsBuilder : public PropsBuilder { @@ -528,6 +534,11 @@ propToBinaries[]={ { UCHAR_ID_START, 1, UPROPS_ID_START }, { UCHAR_ID_CONTINUE, 1, UPROPS_ID_CONTINUE }, { UCHAR_GRAPHEME_BASE, 1, UPROPS_GRAPHEME_BASE }, + + { UCHAR_EMOJI, 2, UPROPS_2_EMOJI }, + { UCHAR_EMOJI_PRESENTATION, 2, UPROPS_2_EMOJI_PRESENTATION }, + { UCHAR_EMOJI_MODIFIER, 2, UPROPS_2_EMOJI_MODIFIER }, + { UCHAR_EMOJI_MODIFIER_BASE, 2, UPROPS_2_EMOJI_MODIFIER_BASE }, }; struct PropToEnum { diff --git a/tools/unicode/c/genprops/pnames_data.h b/tools/unicode/c/genprops/pnames_data.h index e89e782b0e..fca71f9e38 100644 --- a/tools/unicode/c/genprops/pnames_data.h +++ b/tools/unicode/c/genprops/pnames_data.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2002-2015, International Business Machines Corporation and + * Copyright (C) 2002-2016, International Business Machines Corporation and * others. All Rights Reserved. * * machine-generated by: icu/tools/unicode/py/preparseucd.py @@ -997,7 +997,7 @@ static const Value VALUES_gcm[38] = { Value((int32_t)U_GC_ZS_MASK, "Zs Space_Separator"), }; -static const Property PROPERTIES[96] = { +static const Property PROPERTIES[100] = { Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"), Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"), Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"), @@ -1055,6 +1055,10 @@ static const Property PROPERTIES[96] = { Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded"), Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"), Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"), + Property(UCHAR_EMOJI, "Emoji Emoji"), + Property(UCHAR_EMOJI_PRESENTATION, "Emoji_Presentation Emoji_Presentation"), + Property(UCHAR_EMOJI_MODIFIER, "Emoji_Modifier Emoji_Modifier"), + Property(UCHAR_EMOJI_MODIFIER_BASE, "Emoji_Modifier_Base Emoji_Modifier_Base"), Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23), Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 263), Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57), diff --git a/tools/unicode/py/preparseucd.py b/tools/unicode/py/preparseucd.py index e68d5bd1f5..00c63ba923 100755 --- a/tools/unicode/py/preparseucd.py +++ b/tools/unicode/py/preparseucd.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (c) 2009-2015 International Business Machines +# Copyright (c) 2009-2016 International Business Machines # Corporation and others. All Rights Reserved. # # file name: preparseucd.py @@ -516,6 +516,11 @@ def ParsePropertyAliases(in_file): AddBinaryProperty("nfcinert", "NFC_Inert") AddBinaryProperty("nfkcinert", "NFKC_Inert") AddBinaryProperty("segstart", "Segment_Starter") + # http://www.unicode.org/reports/tr51/#Emoji_Properties + AddBinaryProperty("Emoji", "Emoji") + AddBinaryProperty("Emoji_Presentation", "Emoji_Presentation") + AddBinaryProperty("Emoji_Modifier", "Emoji_Modifier") + AddBinaryProperty("Emoji_Modifier_Base", "Emoji_Modifier_Base") # C/POSIX character classes that do not have Unicode property [value] aliases. # See uchar.h. AddPOSIXBinaryProperty("alnum") @@ -1535,6 +1540,7 @@ _files = { "DerivedNormalizationProps.txt": (CopyAndStrip, ParseNamedProperties), "DerivedNumericValues.txt": (DontCopy, ParseDerivedNumericValues), "EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth), + "emoji-data.txt": (DontCopy, ParseNamedProperties), "GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty), "GraphemeBreakTest.txt": (PrependBOM, "testdata"), "IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),