[emoji] Add emoji Extended_Pictographic table and function

Part of https://github.com/harfbuzz/harfbuzz/issues/1159

.
This commit is contained in:
Behdad Esfahbod 2018-10-03 17:46:48 +02:00
parent 1dc601b04a
commit 1e8f195b96
7 changed files with 399 additions and 9 deletions

View File

@ -289,13 +289,15 @@ harfbuzz-gobject.def: $(HB_GOBJECT_headers)
GENERATORS = \
gen-arabic-table.py \
gen-indic-table.py \
gen-use-table.py \
gen-def.py \
gen-emoji-table.py \
gen-indic-table.py \
gen-os2-unicode-ranges.py \
gen-use-table.py \
$(NULL)
EXTRA_DIST += $(GENERATORS)
unicode-tables: arabic-table indic-table use-table
unicode-tables: arabic-table indic-table use-table emoji-table
arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-arabic-table.hh \
@ -309,9 +311,13 @@ use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.tx
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
emoji-table: gen-emoji-table.py emoji-data.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-unicode-emoji-table.hh \
|| ($(RM) $(srcdir)/hb-unicode-emoji-table.hh; false)
built-sources: $(BUILT_SOURCES)
.PHONY: unicode-tables arabic-table indic-table use-table built-sources
.PHONY: unicode-tables arabic-table indic-table use-table emoji-table built-sources
RAGEL_GENERATED = \
$(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \

64
src/gen-emoji-table.py Executable file
View File

@ -0,0 +1,64 @@
#!/usr/bin/python
from __future__ import print_function, division, absolute_import
import sys
import os.path
from collections import OrderedDict
if len (sys.argv) != 2:
print("usage: ./gen-emoji-table.py emoji-data.txt", file=sys.stderr)
sys.exit (1)
f = open(sys.argv[1])
header = [f.readline () for _ in range(10)]
sets = OrderedDict()
for line in f.readlines():
line = line.strip()
if not line or line[0] == '#':
continue
rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
rang = [int(s, 16) for s in rang.split('..')]
if len(rang) > 1:
start, end = rang
else:
start = end = rang[0]
if typ not in sets:
sets[typ] = set()
sets[typ].add((start, end))
print ("/* == Start of generated table == */")
print ("/*")
print (" * The following tables are generated by running:")
print (" *")
print (" * ./gen-emoji-table.py emoji-data.txt")
print (" *")
print (" * on file with this header:")
print (" *")
for l in header:
print (" * %s" % (l.strip()))
print (" */")
print ()
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
print ()
print ('#include "hb-unicode.hh"')
print ()
for typ,s in sets.items():
if typ != "Extended_Pictographic": continue
print()
print("static const struct hb_unicode_range_t _hb_unicode_emoji_%s_table[] =" % typ)
print("{")
for pair in sorted(s):
print(" {0x%04X, 0x%04X}," % pair)
print("};")
print ()
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
print ()
print ("/* == End of generated table == */")

View File

@ -81,7 +81,7 @@ struct os2
hb_codepoint_t cp = HB_SET_VALUE_INVALID;
while (codepoints->next (&cp)) {
unsigned int bit = hb_get_unicode_range_bit (cp);
unsigned int bit = _hb_ot_os2_get_unicode_range_bit (cp);
if (bit < 128)
{
unsigned int block = bit / 32;

View File

@ -44,7 +44,7 @@ struct OS2Range
else if (cp <= range->end)
return 0;
else
return 1;
return +1;
}
hb_codepoint_t start;
@ -227,11 +227,11 @@ static OS2Range _hb_os2_unicode_ranges[] =
};
/**
* hb_get_unicode_range_bit:
* _hb_ot_os2_get_unicode_range_bit:
* Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint.
**/
static unsigned int
hb_get_unicode_range_bit (hb_codepoint_t cp)
_hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp)
{
OS2Range *range = (OS2Range*) hb_bsearch_r (&cp, _hb_os2_unicode_ranges,
ARRAY_LENGTH (_hb_os2_unicode_ranges),

View File

@ -0,0 +1,269 @@
/* == Start of generated table == */
/*
* The following tables are generated by running:
*
* ./gen-emoji-table.py emoji-data.txt
*
* on file with this header:
*
* # emoji-data.txt
* # Date: 2018-02-07, 07:55:18 GMT
* # © 2018 Unicode®, Inc.
* # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
* # For terms of use, see http://www.unicode.org/terms_of_use.html
* #
* # Emoji Data for UTS #51
* # Version: 11.0
* #
* # For documentation and usage, see http://www.unicode.org/reports/tr51
*/
#ifndef HB_UNICODE_EMOJI_TABLE_HH
#define HB_UNICODE_EMOJI_TABLE_HH
#include "hb-unicode.hh"
static const struct hb_unicode_range_t _hb_unicode_emoji_Extended_Pictographic_table[] =
{
{0x00A9, 0x00A9},
{0x00AE, 0x00AE},
{0x203C, 0x203C},
{0x2049, 0x2049},
{0x2122, 0x2122},
{0x2139, 0x2139},
{0x2194, 0x2199},
{0x21A9, 0x21AA},
{0x231A, 0x231B},
{0x2328, 0x2328},
{0x2388, 0x2388},
{0x23CF, 0x23CF},
{0x23E9, 0x23F3},
{0x23F8, 0x23FA},
{0x24C2, 0x24C2},
{0x25AA, 0x25AB},
{0x25B6, 0x25B6},
{0x25C0, 0x25C0},
{0x25FB, 0x25FE},
{0x2600, 0x2605},
{0x2607, 0x2612},
{0x2614, 0x2615},
{0x2616, 0x2617},
{0x2618, 0x2618},
{0x2619, 0x2619},
{0x261A, 0x266F},
{0x2670, 0x2671},
{0x2672, 0x267D},
{0x267E, 0x267F},
{0x2680, 0x2685},
{0x2690, 0x2691},
{0x2692, 0x269C},
{0x269D, 0x269D},
{0x269E, 0x269F},
{0x26A0, 0x26A1},
{0x26A2, 0x26B1},
{0x26B2, 0x26B2},
{0x26B3, 0x26BC},
{0x26BD, 0x26BF},
{0x26C0, 0x26C3},
{0x26C4, 0x26CD},
{0x26CE, 0x26CE},
{0x26CF, 0x26E1},
{0x26E2, 0x26E2},
{0x26E3, 0x26E3},
{0x26E4, 0x26E7},
{0x26E8, 0x26FF},
{0x2700, 0x2700},
{0x2701, 0x2704},
{0x2705, 0x2705},
{0x2708, 0x2709},
{0x270A, 0x270B},
{0x270C, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271D, 0x271D},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274C, 0x274C},
{0x274E, 0x274E},
{0x2753, 0x2755},
{0x2757, 0x2757},
{0x2763, 0x2767},
{0x2795, 0x2797},
{0x27A1, 0x27A1},
{0x27B0, 0x27B0},
{0x27BF, 0x27BF},
{0x2934, 0x2935},
{0x2B05, 0x2B07},
{0x2B1B, 0x2B1C},
{0x2B50, 0x2B50},
{0x2B55, 0x2B55},
{0x3030, 0x3030},
{0x303D, 0x303D},
{0x3297, 0x3297},
{0x3299, 0x3299},
{0x1F000, 0x1F02B},
{0x1F02C, 0x1F02F},
{0x1F030, 0x1F093},
{0x1F094, 0x1F09F},
{0x1F0A0, 0x1F0AE},
{0x1F0AF, 0x1F0B0},
{0x1F0B1, 0x1F0BE},
{0x1F0BF, 0x1F0BF},
{0x1F0C0, 0x1F0C0},
{0x1F0C1, 0x1F0CF},
{0x1F0D0, 0x1F0D0},
{0x1F0D1, 0x1F0DF},
{0x1F0E0, 0x1F0F5},
{0x1F0F6, 0x1F0FF},
{0x1F10D, 0x1F10F},
{0x1F12F, 0x1F12F},
{0x1F16C, 0x1F16F},
{0x1F170, 0x1F171},
{0x1F17E, 0x1F17E},
{0x1F17F, 0x1F17F},
{0x1F18E, 0x1F18E},
{0x1F191, 0x1F19A},
{0x1F1AD, 0x1F1E5},
{0x1F201, 0x1F202},
{0x1F203, 0x1F20F},
{0x1F21A, 0x1F21A},
{0x1F22F, 0x1F22F},
{0x1F232, 0x1F23A},
{0x1F23C, 0x1F23F},
{0x1F249, 0x1F24F},
{0x1F250, 0x1F251},
{0x1F252, 0x1F25F},
{0x1F260, 0x1F265},
{0x1F266, 0x1F2FF},
{0x1F300, 0x1F320},
{0x1F321, 0x1F32C},
{0x1F32D, 0x1F32F},
{0x1F330, 0x1F335},
{0x1F336, 0x1F336},
{0x1F337, 0x1F37C},
{0x1F37D, 0x1F37D},
{0x1F37E, 0x1F37F},
{0x1F380, 0x1F393},
{0x1F394, 0x1F39F},
{0x1F3A0, 0x1F3C4},
{0x1F3C5, 0x1F3C5},
{0x1F3C6, 0x1F3CA},
{0x1F3CB, 0x1F3CE},
{0x1F3CF, 0x1F3D3},
{0x1F3D4, 0x1F3DF},
{0x1F3E0, 0x1F3F0},
{0x1F3F1, 0x1F3F7},
{0x1F3F8, 0x1F3FA},
{0x1F400, 0x1F43E},
{0x1F43F, 0x1F43F},
{0x1F440, 0x1F440},
{0x1F441, 0x1F441},
{0x1F442, 0x1F4F7},
{0x1F4F8, 0x1F4F8},
{0x1F4F9, 0x1F4FC},
{0x1F4FD, 0x1F4FE},
{0x1F4FF, 0x1F4FF},
{0x1F500, 0x1F53D},
{0x1F546, 0x1F54A},
{0x1F54B, 0x1F54F},
{0x1F550, 0x1F567},
{0x1F568, 0x1F579},
{0x1F57A, 0x1F57A},
{0x1F57B, 0x1F5A3},
{0x1F5A4, 0x1F5A4},
{0x1F5A5, 0x1F5FA},
{0x1F5FB, 0x1F5FF},
{0x1F600, 0x1F600},
{0x1F601, 0x1F610},
{0x1F611, 0x1F611},
{0x1F612, 0x1F614},
{0x1F615, 0x1F615},
{0x1F616, 0x1F616},
{0x1F617, 0x1F617},
{0x1F618, 0x1F618},
{0x1F619, 0x1F619},
{0x1F61A, 0x1F61A},
{0x1F61B, 0x1F61B},
{0x1F61C, 0x1F61E},
{0x1F61F, 0x1F61F},
{0x1F620, 0x1F625},
{0x1F626, 0x1F627},
{0x1F628, 0x1F62B},
{0x1F62C, 0x1F62C},
{0x1F62D, 0x1F62D},
{0x1F62E, 0x1F62F},
{0x1F630, 0x1F633},
{0x1F634, 0x1F634},
{0x1F635, 0x1F640},
{0x1F641, 0x1F642},
{0x1F643, 0x1F644},
{0x1F645, 0x1F64F},
{0x1F680, 0x1F6C5},
{0x1F6C6, 0x1F6CF},
{0x1F6D0, 0x1F6D0},
{0x1F6D1, 0x1F6D2},
{0x1F6D3, 0x1F6D4},
{0x1F6D5, 0x1F6DF},
{0x1F6E0, 0x1F6EC},
{0x1F6ED, 0x1F6EF},
{0x1F6F0, 0x1F6F3},
{0x1F6F4, 0x1F6F6},
{0x1F6F7, 0x1F6F8},
{0x1F6F9, 0x1F6F9},
{0x1F6FA, 0x1F6FF},
{0x1F774, 0x1F77F},
{0x1F7D5, 0x1F7D8},
{0x1F7D9, 0x1F7FF},
{0x1F80C, 0x1F80F},
{0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F},
{0x1F888, 0x1F88F},
{0x1F8AE, 0x1F8FF},
{0x1F90C, 0x1F90F},
{0x1F910, 0x1F918},
{0x1F919, 0x1F91E},
{0x1F91F, 0x1F91F},
{0x1F920, 0x1F927},
{0x1F928, 0x1F92F},
{0x1F930, 0x1F930},
{0x1F931, 0x1F932},
{0x1F933, 0x1F93A},
{0x1F93C, 0x1F93E},
{0x1F93F, 0x1F93F},
{0x1F940, 0x1F945},
{0x1F947, 0x1F94B},
{0x1F94C, 0x1F94C},
{0x1F94D, 0x1F94F},
{0x1F950, 0x1F95E},
{0x1F95F, 0x1F96B},
{0x1F96C, 0x1F970},
{0x1F971, 0x1F972},
{0x1F973, 0x1F976},
{0x1F977, 0x1F979},
{0x1F97A, 0x1F97A},
{0x1F97B, 0x1F97B},
{0x1F97C, 0x1F97F},
{0x1F980, 0x1F984},
{0x1F985, 0x1F991},
{0x1F992, 0x1F997},
{0x1F998, 0x1F9A2},
{0x1F9A3, 0x1F9AF},
{0x1F9B0, 0x1F9B9},
{0x1F9BA, 0x1F9BF},
{0x1F9C0, 0x1F9C0},
{0x1F9C1, 0x1F9C2},
{0x1F9C3, 0x1F9CF},
{0x1F9D0, 0x1F9E6},
{0x1F9E7, 0x1F9FF},
{0x1FA00, 0x1FA5F},
{0x1FA60, 0x1FA6D},
{0x1FA6E, 0x1FFFD},
};
#endif /* HB_UNICODE_EMOJI_TABLE_HH */
/* == End of generated table == */

View File

@ -564,3 +564,19 @@ _hb_modified_combining_class[256] =
241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
};
/*
* Emoji
*/
#include "hb-unicode-emoji-table.hh"
bool
_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
{
return hb_bsearch_r (&cp, _hb_unicode_emoji_Extended_Pictographic_table,
ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table),
sizeof (hb_unicode_range_t),
hb_unicode_range_t::cmp, nullptr);
}

View File

@ -286,7 +286,9 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
DECLARE_NULL_INSTANCE (hb_unicode_funcs_t);
/* Modified combining marks */
/*
* Modified combining marks
*/
/* Hebrew
*
@ -384,4 +386,37 @@ DECLARE_NULL_INSTANCE (hb_unicode_funcs_t);
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
/*
* Ranges, used for bsearch tables.
*/
struct hb_unicode_range_t
{
static int
cmp (const void *_key, const void *_item, void *_arg)
{
hb_codepoint_t cp = *((hb_codepoint_t *) _key);
const hb_unicode_range_t *range = (hb_unicode_range_t *) _item;
if (cp < range->start)
return -1;
else if (cp <= range->end)
return 0;
else
return +1;
}
hb_codepoint_t start;
hb_codepoint_t end;
};
/*
* Emoji.
*/
HB_INTERNAL bool
_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp);
#endif /* HB_UNICODE_HH */