ICU-11802 add 4 Emoji properties from emoji-data.txt 2.0
X-SVN-Rev: 38183
This commit is contained in:
parent
0390f4c86c
commit
c10fdc6e29
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Copyright (C) 1999-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* file name: ubidi_props_data.h
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Copyright (C) 1999-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* file name: ucase_props_data.h
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2015, International Business Machines
|
||||
* Copyright (C) 1997-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
@ -397,8 +397,36 @@ typedef enum UProperty {
|
||||
UCHAR_CHANGES_WHEN_CASEMAPPED=55,
|
||||
/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
|
||||
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
|
||||
/**
|
||||
* Binary property Emoji.
|
||||
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
*
|
||||
* @draft ICU 57
|
||||
*/
|
||||
UCHAR_EMOJI=57,
|
||||
/**
|
||||
* Binary property Emoji_Presentation.
|
||||
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
*
|
||||
* @draft ICU 57
|
||||
*/
|
||||
UCHAR_EMOJI_PRESENTATION=58,
|
||||
/**
|
||||
* Binary property Emoji_Modifier.
|
||||
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
*
|
||||
* @draft ICU 57
|
||||
*/
|
||||
UCHAR_EMOJI_MODIFIER=59,
|
||||
/**
|
||||
* Binary property Emoji_Modifier_Base.
|
||||
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
*
|
||||
* @draft ICU 57
|
||||
*/
|
||||
UCHAR_EMOJI_MODIFIER_BASE=60,
|
||||
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
|
||||
UCHAR_BINARY_LIMIT=57,
|
||||
UCHAR_BINARY_LIMIT=61,
|
||||
|
||||
/** Enumerated property Bidi_Class.
|
||||
Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2014, International Business Machines
|
||||
* Copyright (C) 2002-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -270,7 +270,11 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED
|
||||
{ UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded },
|
||||
{ UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED
|
||||
{ UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }
|
||||
{ UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded },
|
||||
{ 2, U_MASK(UPROPS_2_EMOJI), defaultContains },
|
||||
{ 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains },
|
||||
{ 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains },
|
||||
{ 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains },
|
||||
};
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2012, International Business Machines
|
||||
* Copyright (C) 2002-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -187,13 +187,21 @@ enum {
|
||||
/*
|
||||
* Properties in vector word 2
|
||||
* Bits
|
||||
* 31..26 reserved
|
||||
* 31..28 http://www.unicode.org/reports/tr51/#Emoji_Properties
|
||||
* 27..26 reserved
|
||||
* 25..20 Line Break
|
||||
* 19..15 Sentence Break
|
||||
* 14..10 Word Break
|
||||
* 9.. 5 Grapheme Cluster Break
|
||||
* 4.. 0 Decomposition Type
|
||||
*/
|
||||
enum {
|
||||
UPROPS_2_EMOJI=28,
|
||||
UPROPS_2_EMOJI_PRESENTATION,
|
||||
UPROPS_2_EMOJI_MODIFIER,
|
||||
UPROPS_2_EMOJI_MODIFIER_BASE,
|
||||
};
|
||||
|
||||
#define UPROPS_LB_MASK 0x03f00000
|
||||
#define UPROPS_LB_SHIFT 20
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
@ -1,4 +1,4 @@
|
||||
* Copyright (C) 2004-2015, International Business Machines
|
||||
* Copyright (C) 2004-2016, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* file name: changes.txt
|
||||
@ -45,6 +45,35 @@ Khitan scripts will be encoded later.
|
||||
|
||||
---------------------------------------------------------------------------- ***
|
||||
|
||||
Emoji properties added in ICU 57: http://bugs.icu-project.org/trac/ticket/11802
|
||||
|
||||
Edit preparseucd.py to add & parse new properties.
|
||||
They share the UCD property namespace but are not listed in PropertyAliases.txt.
|
||||
|
||||
Add emoji-data.txt to the input files, from http://www.unicode.org/Public/emoji/
|
||||
Initial data from emoji/2.0/
|
||||
|
||||
ICU_ROOT=~/svn.icu/trunk
|
||||
ICU_SRC_DIR=$ICU_ROOT/src
|
||||
ICUDT=icudt56b
|
||||
export LD_LIBRARY_PATH=$ICU_ROOT/dbg/lib
|
||||
SRC_DATA_IN=$ICU_SRC_DIR/source/data/in
|
||||
UNIDATA=$ICU_SRC_DIR/source/data/unidata
|
||||
|
||||
Add binary-property constants to uchar.h enum UProperty & UProperty.java.
|
||||
|
||||
~/svn.icutools/trunk/src/unicode$ py/preparseucd.py ~/unidata/uni80/20151217 $ICU_SRC_DIR ~/svn.icutools/trunk/src
|
||||
(Needs to be run after uchar.h additions, so that the new properties can be picked up by genprops.)
|
||||
|
||||
Data structure: uprops.h/.cpp, corepropsbuilder.cpp, UCharacterProperty.java
|
||||
|
||||
make install, then icutools cmake & make, then
|
||||
~/svn.icutools/trunk/dbg/unicode/c$ make && genprops/genprops $ICU_SRC_DIR
|
||||
|
||||
Generate Java data as usual, only update pnames.icu & uprops.icu.
|
||||
|
||||
---------------------------------------------------------------------------- ***
|
||||
|
||||
Unicode 8.0 update for ICU 56
|
||||
|
||||
* Command-line environment setup
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 1999-2015, International Business Machines
|
||||
# Copyright (C) 1999-2016, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: nfc.txt
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 1999-2015, International Business Machines
|
||||
# Copyright (C) 1999-2016, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: nfkc.txt
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2015 Unicode, Inc.
|
||||
# Copyright (c) 1991-2016 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,5 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2014, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
@ -60,6 +59,7 @@ void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name,
|
||||
TESTCASE_AUTO(TestPatternProperties);
|
||||
TESTCASE_AUTO(TestScriptMetadata);
|
||||
TESTCASE_AUTO(TestBidiPairedBracketType);
|
||||
TESTCASE_AUTO(TestEmojiProperties);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
@ -507,3 +507,18 @@ void UnicodeTest::TestBidiPairedBracketType() {
|
||||
assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open));
|
||||
assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close));
|
||||
}
|
||||
|
||||
void UnicodeTest::TestEmojiProperties() {
|
||||
assertFalse("space is not Emoji", u_hasBinaryProperty(0x20, UCHAR_EMOJI));
|
||||
assertTrue("shooting star is Emoji", u_hasBinaryProperty(0x1F320, UCHAR_EMOJI));
|
||||
IcuTestErrorCode errorCode(*this, "TestEmojiProperties()");
|
||||
UnicodeSet emoji("[:Emoji:]", errorCode);
|
||||
assertTrue("lots of Emoji", emoji.size() > 700);
|
||||
|
||||
assertTrue("shooting star is Emoji_Presentation",
|
||||
u_hasBinaryProperty(0x1F320, UCHAR_EMOJI_PRESENTATION));
|
||||
assertTrue("Fitzpatrick 6 is Emoji_Modifier",
|
||||
u_hasBinaryProperty(0x1F3FF, UCHAR_EMOJI_MODIFIER));
|
||||
assertTrue("happy person is Emoji_Modifier_Base",
|
||||
u_hasBinaryProperty(0x1F64B, UCHAR_EMOJI_MODIFIER_BASE));
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2013, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2016, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
@ -39,6 +38,7 @@ public:
|
||||
void TestPatternProperties();
|
||||
void TestScriptMetadata();
|
||||
void TestBidiPairedBracketType();
|
||||
void TestEmojiProperties();
|
||||
|
||||
private:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user