ICU-11802 add 4 Emoji properties from emoji-data.txt 2.0

X-SVN-Rev: 38182
This commit is contained in:
Markus Scherer 2016-01-21 04:34:33 +00:00
parent 0013251fcb
commit 0390f4c86c
3 changed files with 28 additions and 7 deletions

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Copyright (C) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -45,7 +45,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 7.1 .
The following is a description of format version 7.2 .
Data contents:
@ -244,6 +244,12 @@ Unicode 6.2 adds sexagesimal (base-60) numeric values:
The encoding of numeric values was extended to handle such values.
--- Changes in format version 7.2 ---
ICU 57 adds 4 Emoji properties to vector word 2.
http://bugs.icu-project.org/trac/ticket/11802
http://www.unicode.org/reports/tr51/#Emoji_Properties
----------------------------------------------------------------------------- */
U_NAMESPACE_USE
@ -259,8 +265,8 @@ static UDataInfo dataInfo={
0,
{ 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
{ 7, 1, 0, 0 }, /* formatVersion */
{ 6, 2, 0, 0 } /* dataVersion */
{ 7, 2, 0, 0 }, /* formatVersion */
{ 8, 0, 0, 0 } /* dataVersion */
};
class CorePropsBuilder : public PropsBuilder {
@ -528,6 +534,11 @@ propToBinaries[]={
{ UCHAR_ID_START, 1, UPROPS_ID_START },
{ UCHAR_ID_CONTINUE, 1, UPROPS_ID_CONTINUE },
{ UCHAR_GRAPHEME_BASE, 1, UPROPS_GRAPHEME_BASE },
{ UCHAR_EMOJI, 2, UPROPS_2_EMOJI },
{ UCHAR_EMOJI_PRESENTATION, 2, UPROPS_2_EMOJI_PRESENTATION },
{ UCHAR_EMOJI_MODIFIER, 2, UPROPS_2_EMOJI_MODIFIER },
{ UCHAR_EMOJI_MODIFIER_BASE, 2, UPROPS_2_EMOJI_MODIFIER_BASE },
};
struct PropToEnum {

View File

@ -1,5 +1,5 @@
/**
* Copyright (C) 2002-2015, International Business Machines Corporation and
* Copyright (C) 2002-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*
* machine-generated by: icu/tools/unicode/py/preparseucd.py
@ -997,7 +997,7 @@ static const Value VALUES_gcm[38] = {
Value((int32_t)U_GC_ZS_MASK, "Zs Space_Separator"),
};
static const Property PROPERTIES[96] = {
static const Property PROPERTIES[100] = {
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
@ -1055,6 +1055,10 @@ static const Property PROPERTIES[96] = {
Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded"),
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"),
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
Property(UCHAR_EMOJI, "Emoji Emoji"),
Property(UCHAR_EMOJI_PRESENTATION, "Emoji_Presentation Emoji_Presentation"),
Property(UCHAR_EMOJI_MODIFIER, "Emoji_Modifier Emoji_Modifier"),
Property(UCHAR_EMOJI_MODIFIER_BASE, "Emoji_Modifier_Base Emoji_Modifier_Base"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 263),
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57),

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2009-2015 International Business Machines
# Copyright (c) 2009-2016 International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: preparseucd.py
@ -516,6 +516,11 @@ def ParsePropertyAliases(in_file):
AddBinaryProperty("nfcinert", "NFC_Inert")
AddBinaryProperty("nfkcinert", "NFKC_Inert")
AddBinaryProperty("segstart", "Segment_Starter")
# http://www.unicode.org/reports/tr51/#Emoji_Properties
AddBinaryProperty("Emoji", "Emoji")
AddBinaryProperty("Emoji_Presentation", "Emoji_Presentation")
AddBinaryProperty("Emoji_Modifier", "Emoji_Modifier")
AddBinaryProperty("Emoji_Modifier_Base", "Emoji_Modifier_Base")
# C/POSIX character classes that do not have Unicode property [value] aliases.
# See uchar.h.
AddPOSIXBinaryProperty("alnum")
@ -1535,6 +1540,7 @@ _files = {
"DerivedNormalizationProps.txt": (CopyAndStrip, ParseNamedProperties),
"DerivedNumericValues.txt": (DontCopy, ParseDerivedNumericValues),
"EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth),
"emoji-data.txt": (DontCopy, ParseNamedProperties),
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
"GraphemeBreakTest.txt": (PrependBOM, "testdata"),
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),