ICU-11802 add 4 Emoji properties from emoji-data.txt 2.0

X-SVN-Rev: 38183
This commit is contained in:
Markus Scherer 2016-01-21 04:39:21 +00:00
parent 0390f4c86c
commit c10fdc6e29
16 changed files with 3224 additions and 3108 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 1999-2015, International Business Machines
* Copyright (C) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: ubidi_props_data.h

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 1999-2015, International Business Machines
* Copyright (C) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: ucase_props_data.h

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1997-2015, International Business Machines
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@ -397,8 +397,36 @@ typedef enum UProperty {
UCHAR_CHANGES_WHEN_CASEMAPPED=55,
/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
/**
* Binary property Emoji.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @draft ICU 57
*/
UCHAR_EMOJI=57,
/**
* Binary property Emoji_Presentation.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @draft ICU 57
*/
UCHAR_EMOJI_PRESENTATION=58,
/**
* Binary property Emoji_Modifier.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @draft ICU 57
*/
UCHAR_EMOJI_MODIFIER=59,
/**
* Binary property Emoji_Modifier_Base.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @draft ICU 57
*/
UCHAR_EMOJI_MODIFIER_BASE=60,
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
UCHAR_BINARY_LIMIT=57,
UCHAR_BINARY_LIMIT=61,
/** Enumerated property Bidi_Class.
Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2014, International Business Machines
* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -270,7 +270,11 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED
{ UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded },
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED
{ UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }
{ UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded },
{ 2, U_MASK(UPROPS_2_EMOJI), defaultContains },
{ 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains },
{ 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains },
{ 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains },
};
U_CAPI UBool U_EXPORT2

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -187,13 +187,21 @@ enum {
/*
* Properties in vector word 2
* Bits
* 31..26 reserved
* 31..28 http://www.unicode.org/reports/tr51/#Emoji_Properties
* 27..26 reserved
* 25..20 Line Break
* 19..15 Sentence Break
* 14..10 Word Break
* 9.. 5 Grapheme Cluster Break
* 4.. 0 Decomposition Type
*/
enum {
UPROPS_2_EMOJI=28,
UPROPS_2_EMOJI_PRESENTATION,
UPROPS_2_EMOJI_MODIFIER,
UPROPS_2_EMOJI_MODIFIER_BASE,
};
#define UPROPS_LB_MASK 0x03f00000
#define UPROPS_LB_SHIFT 20

Binary file not shown.

Binary file not shown.

View File

@ -1,4 +1,4 @@
* Copyright (C) 2004-2015, International Business Machines
* Copyright (C) 2004-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: changes.txt
@ -45,6 +45,35 @@ Khitan scripts will be encoded later.
---------------------------------------------------------------------------- ***
Emoji properties added in ICU 57: http://bugs.icu-project.org/trac/ticket/11802
Edit preparseucd.py to add & parse new properties.
They share the UCD property namespace but are not listed in PropertyAliases.txt.
Add emoji-data.txt to the input files, from http://www.unicode.org/Public/emoji/
Initial data from emoji/2.0/
ICU_ROOT=~/svn.icu/trunk
ICU_SRC_DIR=$ICU_ROOT/src
ICUDT=icudt56b
export LD_LIBRARY_PATH=$ICU_ROOT/dbg/lib
SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
UNIDATA=$ICU_SRC_DIR/source/data/unidata
Add binary-property constants to uchar.h enum UProperty & UProperty.java.
~/svn.icutools/trunk/src/unicode$ py/preparseucd.py ~/unidata/uni80/20151217 $ICU_SRC_DIR ~/svn.icutools/trunk/src
(Needs to be run after uchar.h additions, so that the new properties can be picked up by genprops.)
Data structure: uprops.h/.cpp, corepropsbuilder.cpp, UCharacterProperty.java
make install, then icutools cmake & make, then
~/svn.icutools/trunk/dbg/unicode/c$ make && genprops/genprops $ICU_SRC_DIR
Generate Java data as usual, only update pnames.icu & uprops.icu.
---------------------------------------------------------------------------- ***
Unicode 8.0 update for ICU 56
* Command-line environment setup

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2015, International Business Machines
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfc.txt

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2015, International Business Machines
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfkc.txt

View File

@ -1,5 +1,5 @@
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# Copyright (c) 1991-2016 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2014, International Business Machines Corporation and
* Copyright (c) 1997-2016, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -60,6 +59,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
TESTCASE_AUTO(TestPatternProperties);
TESTCASE_AUTO(TestScriptMetadata);
TESTCASE_AUTO(TestBidiPairedBracketType);
TESTCASE_AUTO(TestEmojiProperties);
TESTCASE_AUTO_END;
}
@ -507,3 +507,18 @@ void UnicodeTest::TestBidiPairedBracketType() {
assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open));
assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close));
}
void UnicodeTest::TestEmojiProperties() {
assertFalse("space is not Emoji", u_hasBinaryProperty(0x20, UCHAR_EMOJI));
assertTrue("shooting star is Emoji", u_hasBinaryProperty(0x1F320, UCHAR_EMOJI));
IcuTestErrorCode errorCode(*this, "TestEmojiProperties()");
UnicodeSet emoji("[:Emoji:]", errorCode);
assertTrue("lots of Emoji", emoji.size() > 700);
assertTrue("shooting star is Emoji_Presentation",
u_hasBinaryProperty(0x1F320, UCHAR_EMOJI_PRESENTATION));
assertTrue("Fitzpatrick 6 is Emoji_Modifier",
u_hasBinaryProperty(0x1F3FF, UCHAR_EMOJI_MODIFIER));
assertTrue("happy person is Emoji_Modifier_Base",
u_hasBinaryProperty(0x1F64B, UCHAR_EMOJI_MODIFIER_BASE));
}

View File

@ -1,6 +1,5 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2013, International Business Machines Corporation and
* Copyright (c) 1997-2016, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -39,6 +38,7 @@ public:
void TestPatternProperties();
void TestScriptMetadata();
void TestBidiPairedBracketType();
void TestEmojiProperties();
private: