[emoji] Add emoji Extended_Pictographic table and function
Part of https://github.com/harfbuzz/harfbuzz/issues/1159 .
This commit is contained in:
parent
1dc601b04a
commit
1e8f195b96
@ -289,13 +289,15 @@ harfbuzz-gobject.def: $(HB_GOBJECT_headers)
|
||||
|
||||
GENERATORS = \
|
||||
gen-arabic-table.py \
|
||||
gen-indic-table.py \
|
||||
gen-use-table.py \
|
||||
gen-def.py \
|
||||
gen-emoji-table.py \
|
||||
gen-indic-table.py \
|
||||
gen-os2-unicode-ranges.py \
|
||||
gen-use-table.py \
|
||||
$(NULL)
|
||||
EXTRA_DIST += $(GENERATORS)
|
||||
|
||||
unicode-tables: arabic-table indic-table use-table
|
||||
unicode-tables: arabic-table indic-table use-table emoji-table
|
||||
|
||||
arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
|
||||
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-arabic-table.hh \
|
||||
@ -309,9 +311,13 @@ use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.tx
|
||||
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|
||||
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
|
||||
|
||||
emoji-table: gen-emoji-table.py emoji-data.txt
|
||||
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-unicode-emoji-table.hh \
|
||||
|| ($(RM) $(srcdir)/hb-unicode-emoji-table.hh; false)
|
||||
|
||||
built-sources: $(BUILT_SOURCES)
|
||||
|
||||
.PHONY: unicode-tables arabic-table indic-table use-table built-sources
|
||||
.PHONY: unicode-tables arabic-table indic-table use-table emoji-table built-sources
|
||||
|
||||
RAGEL_GENERATED = \
|
||||
$(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \
|
||||
|
64
src/gen-emoji-table.py
Executable file
64
src/gen-emoji-table.py
Executable file
@ -0,0 +1,64 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from __future__ import print_function, division, absolute_import
|
||||
import sys
|
||||
import os.path
|
||||
from collections import OrderedDict
|
||||
|
||||
if len (sys.argv) != 2:
|
||||
print("usage: ./gen-emoji-table.py emoji-data.txt", file=sys.stderr)
|
||||
sys.exit (1)
|
||||
|
||||
f = open(sys.argv[1])
|
||||
header = [f.readline () for _ in range(10)]
|
||||
|
||||
sets = OrderedDict()
|
||||
for line in f.readlines():
|
||||
line = line.strip()
|
||||
if not line or line[0] == '#':
|
||||
continue
|
||||
rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
|
||||
|
||||
rang = [int(s, 16) for s in rang.split('..')]
|
||||
if len(rang) > 1:
|
||||
start, end = rang
|
||||
else:
|
||||
start = end = rang[0]
|
||||
|
||||
if typ not in sets:
|
||||
sets[typ] = set()
|
||||
sets[typ].add((start, end))
|
||||
|
||||
|
||||
|
||||
print ("/* == Start of generated table == */")
|
||||
print ("/*")
|
||||
print (" * The following tables are generated by running:")
|
||||
print (" *")
|
||||
print (" * ./gen-emoji-table.py emoji-data.txt")
|
||||
print (" *")
|
||||
print (" * on file with this header:")
|
||||
print (" *")
|
||||
for l in header:
|
||||
print (" * %s" % (l.strip()))
|
||||
print (" */")
|
||||
print ()
|
||||
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
|
||||
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
|
||||
print ()
|
||||
print ('#include "hb-unicode.hh"')
|
||||
print ()
|
||||
|
||||
for typ,s in sets.items():
|
||||
if typ != "Extended_Pictographic": continue
|
||||
print()
|
||||
print("static const struct hb_unicode_range_t _hb_unicode_emoji_%s_table[] =" % typ)
|
||||
print("{")
|
||||
for pair in sorted(s):
|
||||
print(" {0x%04X, 0x%04X}," % pair)
|
||||
print("};")
|
||||
|
||||
print ()
|
||||
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
|
||||
print ()
|
||||
print ("/* == End of generated table == */")
|
@ -81,7 +81,7 @@ struct os2
|
||||
|
||||
hb_codepoint_t cp = HB_SET_VALUE_INVALID;
|
||||
while (codepoints->next (&cp)) {
|
||||
unsigned int bit = hb_get_unicode_range_bit (cp);
|
||||
unsigned int bit = _hb_ot_os2_get_unicode_range_bit (cp);
|
||||
if (bit < 128)
|
||||
{
|
||||
unsigned int block = bit / 32;
|
||||
|
@ -44,7 +44,7 @@ struct OS2Range
|
||||
else if (cp <= range->end)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return +1;
|
||||
}
|
||||
|
||||
hb_codepoint_t start;
|
||||
@ -227,11 +227,11 @@ static OS2Range _hb_os2_unicode_ranges[] =
|
||||
};
|
||||
|
||||
/**
|
||||
* hb_get_unicode_range_bit:
|
||||
* _hb_ot_os2_get_unicode_range_bit:
|
||||
* Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint.
|
||||
**/
|
||||
static unsigned int
|
||||
hb_get_unicode_range_bit (hb_codepoint_t cp)
|
||||
_hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp)
|
||||
{
|
||||
OS2Range *range = (OS2Range*) hb_bsearch_r (&cp, _hb_os2_unicode_ranges,
|
||||
ARRAY_LENGTH (_hb_os2_unicode_ranges),
|
||||
|
269
src/hb-unicode-emoji-table.hh
Normal file
269
src/hb-unicode-emoji-table.hh
Normal file
@ -0,0 +1,269 @@
|
||||
/* == Start of generated table == */
|
||||
/*
|
||||
* The following tables are generated by running:
|
||||
*
|
||||
* ./gen-emoji-table.py emoji-data.txt
|
||||
*
|
||||
* on file with this header:
|
||||
*
|
||||
* # emoji-data.txt
|
||||
* # Date: 2018-02-07, 07:55:18 GMT
|
||||
* # © 2018 Unicode®, Inc.
|
||||
* # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
* # For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
* #
|
||||
* # Emoji Data for UTS #51
|
||||
* # Version: 11.0
|
||||
* #
|
||||
* # For documentation and usage, see http://www.unicode.org/reports/tr51
|
||||
*/
|
||||
|
||||
#ifndef HB_UNICODE_EMOJI_TABLE_HH
|
||||
#define HB_UNICODE_EMOJI_TABLE_HH
|
||||
|
||||
#include "hb-unicode.hh"
|
||||
|
||||
|
||||
static const struct hb_unicode_range_t _hb_unicode_emoji_Extended_Pictographic_table[] =
|
||||
{
|
||||
{0x00A9, 0x00A9},
|
||||
{0x00AE, 0x00AE},
|
||||
{0x203C, 0x203C},
|
||||
{0x2049, 0x2049},
|
||||
{0x2122, 0x2122},
|
||||
{0x2139, 0x2139},
|
||||
{0x2194, 0x2199},
|
||||
{0x21A9, 0x21AA},
|
||||
{0x231A, 0x231B},
|
||||
{0x2328, 0x2328},
|
||||
{0x2388, 0x2388},
|
||||
{0x23CF, 0x23CF},
|
||||
{0x23E9, 0x23F3},
|
||||
{0x23F8, 0x23FA},
|
||||
{0x24C2, 0x24C2},
|
||||
{0x25AA, 0x25AB},
|
||||
{0x25B6, 0x25B6},
|
||||
{0x25C0, 0x25C0},
|
||||
{0x25FB, 0x25FE},
|
||||
{0x2600, 0x2605},
|
||||
{0x2607, 0x2612},
|
||||
{0x2614, 0x2615},
|
||||
{0x2616, 0x2617},
|
||||
{0x2618, 0x2618},
|
||||
{0x2619, 0x2619},
|
||||
{0x261A, 0x266F},
|
||||
{0x2670, 0x2671},
|
||||
{0x2672, 0x267D},
|
||||
{0x267E, 0x267F},
|
||||
{0x2680, 0x2685},
|
||||
{0x2690, 0x2691},
|
||||
{0x2692, 0x269C},
|
||||
{0x269D, 0x269D},
|
||||
{0x269E, 0x269F},
|
||||
{0x26A0, 0x26A1},
|
||||
{0x26A2, 0x26B1},
|
||||
{0x26B2, 0x26B2},
|
||||
{0x26B3, 0x26BC},
|
||||
{0x26BD, 0x26BF},
|
||||
{0x26C0, 0x26C3},
|
||||
{0x26C4, 0x26CD},
|
||||
{0x26CE, 0x26CE},
|
||||
{0x26CF, 0x26E1},
|
||||
{0x26E2, 0x26E2},
|
||||
{0x26E3, 0x26E3},
|
||||
{0x26E4, 0x26E7},
|
||||
{0x26E8, 0x26FF},
|
||||
{0x2700, 0x2700},
|
||||
{0x2701, 0x2704},
|
||||
{0x2705, 0x2705},
|
||||
{0x2708, 0x2709},
|
||||
{0x270A, 0x270B},
|
||||
{0x270C, 0x2712},
|
||||
{0x2714, 0x2714},
|
||||
{0x2716, 0x2716},
|
||||
{0x271D, 0x271D},
|
||||
{0x2721, 0x2721},
|
||||
{0x2728, 0x2728},
|
||||
{0x2733, 0x2734},
|
||||
{0x2744, 0x2744},
|
||||
{0x2747, 0x2747},
|
||||
{0x274C, 0x274C},
|
||||
{0x274E, 0x274E},
|
||||
{0x2753, 0x2755},
|
||||
{0x2757, 0x2757},
|
||||
{0x2763, 0x2767},
|
||||
{0x2795, 0x2797},
|
||||
{0x27A1, 0x27A1},
|
||||
{0x27B0, 0x27B0},
|
||||
{0x27BF, 0x27BF},
|
||||
{0x2934, 0x2935},
|
||||
{0x2B05, 0x2B07},
|
||||
{0x2B1B, 0x2B1C},
|
||||
{0x2B50, 0x2B50},
|
||||
{0x2B55, 0x2B55},
|
||||
{0x3030, 0x3030},
|
||||
{0x303D, 0x303D},
|
||||
{0x3297, 0x3297},
|
||||
{0x3299, 0x3299},
|
||||
{0x1F000, 0x1F02B},
|
||||
{0x1F02C, 0x1F02F},
|
||||
{0x1F030, 0x1F093},
|
||||
{0x1F094, 0x1F09F},
|
||||
{0x1F0A0, 0x1F0AE},
|
||||
{0x1F0AF, 0x1F0B0},
|
||||
{0x1F0B1, 0x1F0BE},
|
||||
{0x1F0BF, 0x1F0BF},
|
||||
{0x1F0C0, 0x1F0C0},
|
||||
{0x1F0C1, 0x1F0CF},
|
||||
{0x1F0D0, 0x1F0D0},
|
||||
{0x1F0D1, 0x1F0DF},
|
||||
{0x1F0E0, 0x1F0F5},
|
||||
{0x1F0F6, 0x1F0FF},
|
||||
{0x1F10D, 0x1F10F},
|
||||
{0x1F12F, 0x1F12F},
|
||||
{0x1F16C, 0x1F16F},
|
||||
{0x1F170, 0x1F171},
|
||||
{0x1F17E, 0x1F17E},
|
||||
{0x1F17F, 0x1F17F},
|
||||
{0x1F18E, 0x1F18E},
|
||||
{0x1F191, 0x1F19A},
|
||||
{0x1F1AD, 0x1F1E5},
|
||||
{0x1F201, 0x1F202},
|
||||
{0x1F203, 0x1F20F},
|
||||
{0x1F21A, 0x1F21A},
|
||||
{0x1F22F, 0x1F22F},
|
||||
{0x1F232, 0x1F23A},
|
||||
{0x1F23C, 0x1F23F},
|
||||
{0x1F249, 0x1F24F},
|
||||
{0x1F250, 0x1F251},
|
||||
{0x1F252, 0x1F25F},
|
||||
{0x1F260, 0x1F265},
|
||||
{0x1F266, 0x1F2FF},
|
||||
{0x1F300, 0x1F320},
|
||||
{0x1F321, 0x1F32C},
|
||||
{0x1F32D, 0x1F32F},
|
||||
{0x1F330, 0x1F335},
|
||||
{0x1F336, 0x1F336},
|
||||
{0x1F337, 0x1F37C},
|
||||
{0x1F37D, 0x1F37D},
|
||||
{0x1F37E, 0x1F37F},
|
||||
{0x1F380, 0x1F393},
|
||||
{0x1F394, 0x1F39F},
|
||||
{0x1F3A0, 0x1F3C4},
|
||||
{0x1F3C5, 0x1F3C5},
|
||||
{0x1F3C6, 0x1F3CA},
|
||||
{0x1F3CB, 0x1F3CE},
|
||||
{0x1F3CF, 0x1F3D3},
|
||||
{0x1F3D4, 0x1F3DF},
|
||||
{0x1F3E0, 0x1F3F0},
|
||||
{0x1F3F1, 0x1F3F7},
|
||||
{0x1F3F8, 0x1F3FA},
|
||||
{0x1F400, 0x1F43E},
|
||||
{0x1F43F, 0x1F43F},
|
||||
{0x1F440, 0x1F440},
|
||||
{0x1F441, 0x1F441},
|
||||
{0x1F442, 0x1F4F7},
|
||||
{0x1F4F8, 0x1F4F8},
|
||||
{0x1F4F9, 0x1F4FC},
|
||||
{0x1F4FD, 0x1F4FE},
|
||||
{0x1F4FF, 0x1F4FF},
|
||||
{0x1F500, 0x1F53D},
|
||||
{0x1F546, 0x1F54A},
|
||||
{0x1F54B, 0x1F54F},
|
||||
{0x1F550, 0x1F567},
|
||||
{0x1F568, 0x1F579},
|
||||
{0x1F57A, 0x1F57A},
|
||||
{0x1F57B, 0x1F5A3},
|
||||
{0x1F5A4, 0x1F5A4},
|
||||
{0x1F5A5, 0x1F5FA},
|
||||
{0x1F5FB, 0x1F5FF},
|
||||
{0x1F600, 0x1F600},
|
||||
{0x1F601, 0x1F610},
|
||||
{0x1F611, 0x1F611},
|
||||
{0x1F612, 0x1F614},
|
||||
{0x1F615, 0x1F615},
|
||||
{0x1F616, 0x1F616},
|
||||
{0x1F617, 0x1F617},
|
||||
{0x1F618, 0x1F618},
|
||||
{0x1F619, 0x1F619},
|
||||
{0x1F61A, 0x1F61A},
|
||||
{0x1F61B, 0x1F61B},
|
||||
{0x1F61C, 0x1F61E},
|
||||
{0x1F61F, 0x1F61F},
|
||||
{0x1F620, 0x1F625},
|
||||
{0x1F626, 0x1F627},
|
||||
{0x1F628, 0x1F62B},
|
||||
{0x1F62C, 0x1F62C},
|
||||
{0x1F62D, 0x1F62D},
|
||||
{0x1F62E, 0x1F62F},
|
||||
{0x1F630, 0x1F633},
|
||||
{0x1F634, 0x1F634},
|
||||
{0x1F635, 0x1F640},
|
||||
{0x1F641, 0x1F642},
|
||||
{0x1F643, 0x1F644},
|
||||
{0x1F645, 0x1F64F},
|
||||
{0x1F680, 0x1F6C5},
|
||||
{0x1F6C6, 0x1F6CF},
|
||||
{0x1F6D0, 0x1F6D0},
|
||||
{0x1F6D1, 0x1F6D2},
|
||||
{0x1F6D3, 0x1F6D4},
|
||||
{0x1F6D5, 0x1F6DF},
|
||||
{0x1F6E0, 0x1F6EC},
|
||||
{0x1F6ED, 0x1F6EF},
|
||||
{0x1F6F0, 0x1F6F3},
|
||||
{0x1F6F4, 0x1F6F6},
|
||||
{0x1F6F7, 0x1F6F8},
|
||||
{0x1F6F9, 0x1F6F9},
|
||||
{0x1F6FA, 0x1F6FF},
|
||||
{0x1F774, 0x1F77F},
|
||||
{0x1F7D5, 0x1F7D8},
|
||||
{0x1F7D9, 0x1F7FF},
|
||||
{0x1F80C, 0x1F80F},
|
||||
{0x1F848, 0x1F84F},
|
||||
{0x1F85A, 0x1F85F},
|
||||
{0x1F888, 0x1F88F},
|
||||
{0x1F8AE, 0x1F8FF},
|
||||
{0x1F90C, 0x1F90F},
|
||||
{0x1F910, 0x1F918},
|
||||
{0x1F919, 0x1F91E},
|
||||
{0x1F91F, 0x1F91F},
|
||||
{0x1F920, 0x1F927},
|
||||
{0x1F928, 0x1F92F},
|
||||
{0x1F930, 0x1F930},
|
||||
{0x1F931, 0x1F932},
|
||||
{0x1F933, 0x1F93A},
|
||||
{0x1F93C, 0x1F93E},
|
||||
{0x1F93F, 0x1F93F},
|
||||
{0x1F940, 0x1F945},
|
||||
{0x1F947, 0x1F94B},
|
||||
{0x1F94C, 0x1F94C},
|
||||
{0x1F94D, 0x1F94F},
|
||||
{0x1F950, 0x1F95E},
|
||||
{0x1F95F, 0x1F96B},
|
||||
{0x1F96C, 0x1F970},
|
||||
{0x1F971, 0x1F972},
|
||||
{0x1F973, 0x1F976},
|
||||
{0x1F977, 0x1F979},
|
||||
{0x1F97A, 0x1F97A},
|
||||
{0x1F97B, 0x1F97B},
|
||||
{0x1F97C, 0x1F97F},
|
||||
{0x1F980, 0x1F984},
|
||||
{0x1F985, 0x1F991},
|
||||
{0x1F992, 0x1F997},
|
||||
{0x1F998, 0x1F9A2},
|
||||
{0x1F9A3, 0x1F9AF},
|
||||
{0x1F9B0, 0x1F9B9},
|
||||
{0x1F9BA, 0x1F9BF},
|
||||
{0x1F9C0, 0x1F9C0},
|
||||
{0x1F9C1, 0x1F9C2},
|
||||
{0x1F9C3, 0x1F9CF},
|
||||
{0x1F9D0, 0x1F9E6},
|
||||
{0x1F9E7, 0x1F9FF},
|
||||
{0x1FA00, 0x1FA5F},
|
||||
{0x1FA60, 0x1FA6D},
|
||||
{0x1FA6E, 0x1FFFD},
|
||||
};
|
||||
|
||||
#endif /* HB_UNICODE_EMOJI_TABLE_HH */
|
||||
|
||||
/* == End of generated table == */
|
@ -564,3 +564,19 @@ _hb_modified_combining_class[256] =
|
||||
241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Emoji
|
||||
*/
|
||||
|
||||
#include "hb-unicode-emoji-table.hh"
|
||||
|
||||
bool
|
||||
_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
|
||||
{
|
||||
return hb_bsearch_r (&cp, _hb_unicode_emoji_Extended_Pictographic_table,
|
||||
ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table),
|
||||
sizeof (hb_unicode_range_t),
|
||||
hb_unicode_range_t::cmp, nullptr);
|
||||
}
|
||||
|
@ -286,7 +286,9 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
|
||||
DECLARE_NULL_INSTANCE (hb_unicode_funcs_t);
|
||||
|
||||
|
||||
/* Modified combining marks */
|
||||
/*
|
||||
* Modified combining marks
|
||||
*/
|
||||
|
||||
/* Hebrew
|
||||
*
|
||||
@ -384,4 +386,37 @@ DECLARE_NULL_INSTANCE (hb_unicode_funcs_t);
|
||||
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
|
||||
|
||||
|
||||
/*
|
||||
* Ranges, used for bsearch tables.
|
||||
*/
|
||||
|
||||
struct hb_unicode_range_t
|
||||
{
|
||||
static int
|
||||
cmp (const void *_key, const void *_item, void *_arg)
|
||||
{
|
||||
hb_codepoint_t cp = *((hb_codepoint_t *) _key);
|
||||
const hb_unicode_range_t *range = (hb_unicode_range_t *) _item;
|
||||
|
||||
if (cp < range->start)
|
||||
return -1;
|
||||
else if (cp <= range->end)
|
||||
return 0;
|
||||
else
|
||||
return +1;
|
||||
}
|
||||
|
||||
hb_codepoint_t start;
|
||||
hb_codepoint_t end;
|
||||
};
|
||||
|
||||
/*
|
||||
* Emoji.
|
||||
*/
|
||||
|
||||
HB_INTERNAL bool
|
||||
_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp);
|
||||
|
||||
|
||||
#endif /* HB_UNICODE_HH */
|
||||
|
Loading…
Reference in New Issue
Block a user