Redo the handling of Emoji data

Use the data files from https://github.com/milesj/emojibase.git
as source for our Emoji data. Slightly change our data format by
adding a group to each item, in both the Emoji data and in the
setting for recent-emoji.

Install translated versions of the data as separate resource
bundles in $prefix/gtk-4.0/emoji, and load them when appropriate.

Currently, we have data for de, en, es, fr, zh, with data taken
from Unicode 13 and CLDR 13.

Fixes: #950 #1511
This commit is contained in:
Matthias Clasen 2020-10-03 22:26:56 -04:00
parent 7ef5921022
commit 452c909b64
17 changed files with 215 additions and 50067 deletions

23
gtk/emoji/README.md Normal file
View File

@ -0,0 +1,23 @@
Emoji data
==========
We use Emoji data from Unicode and the CLDR, stored in a GVariant.
The immediate source for our data is the json files from
https://github.com/milesj/emojibase.git
To convert the data from that repository to a GVariant that GTK
can use, the convert-emoji tool can be used:
convert-emoji $emojibase/packages/data/de/data.raw.json de.data
for example (for German).
To make these usable by GTK, we wrap them in a resource bundle
that has the GVariant as
/org/gtk/libgtk/emoji/de.data
and install the resulting resource bundle at this location:
/usr/share/gtk-4.0/emoji/de.gresource

View File

@ -18,26 +18,40 @@
/* Build with gcc -o convert-emoji convert-emoji.c `pkg-config --cflags --libs json-glib-1.0`
*/
/* The format of the generated data is: a(auss).
/* Reads data from the json files in emojibase, expecting
* language-specific data.raw.json as input
*/
/* The format of the generated data is: a(ausasu).
* Each member of the array has the following fields:
* au - sequence of unicode codepoints. If the
* sequence contains a 0, it marks the point
* where skin tone modifiers should be inserted
* s - name, e.g. "man worker"
* as - keywords, e.g. "man", "worker"
* u - the group that this item belongs to:
* 0: smileys-emotion
* 1: people-body
* 2: component
* 3: animals-nature
* 4: food-drink
* 5: travel-places
* 6: activities
* 7: objects
* 8: symbols
* 9: flags
*/
#include <json-glib/json-glib.h>
#include <string.h>
gboolean
parse_code (GVariantBuilder *b,
const char *code,
GString *name_key)
const char *code)
{
g_auto(GStrv) strv = NULL;
int j;
strv = g_strsplit (code, " ", -1);
strv = g_strsplit (code, "-", -1);
for (j = 0; strv[j]; j++)
{
guint32 u;
@ -52,12 +66,7 @@ parse_code (GVariantBuilder *b,
if (0x1f3fb <= u && u <= 0x1f3ff)
g_variant_builder_add (b, "u", 0);
else
{
g_variant_builder_add (b, "u", u);
if (j > 0)
g_string_append_c (name_key, '-');
g_string_append_printf (name_key, "%x", u);
}
g_variant_builder_add (b, "u", u);
}
return TRUE;
@ -68,10 +77,10 @@ main (int argc, char *argv[])
{
JsonParser *parser;
JsonNode *root;
JsonArray *array;
JsonObject *ro;
JsonArray *array;
JsonNode *node;
const char *name;
const char *unicode;
JsonObjectIter iter;
GError *error = NULL;
guint length, i;
@ -81,40 +90,14 @@ main (int argc, char *argv[])
GHashTable *names;
GString *name_key;
if (argc != 4)
if (argc != 3)
{
g_print ("Usage: emoji-convert INPUT INPUT1 OUTPUT\n");
g_print ("Usage: emoji-convert INPUT OUTPUT\n");
return 1;
}
parser = json_parser_new ();
if (!json_parser_load_from_file (parser, argv[2], &error))
{
g_error ("%s", error->message);
return 1;
}
root = json_parser_get_root (parser);
ro = json_node_get_object (root);
json_object_iter_init (&iter, ro);
names = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify)json_object_unref);
name_key = g_string_new ("");
while (json_object_iter_next (&iter, &name, &node))
{
JsonObject *obj = json_node_get_object (node);
const char *unicode;
unicode = json_object_get_string_member (obj, "unicode");
g_hash_table_insert (names, g_strdup (unicode), json_object_ref (obj));
}
g_object_unref (parser);
parser = json_parser_new ();
if (!json_parser_load_from_file (parser, argv[1], &error))
{
g_error ("%s", error->message);
@ -125,101 +108,59 @@ main (int argc, char *argv[])
array = json_node_get_array (root);
length = json_array_get_length (array);
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a(ausas)"));
i = 0;
while (i < length)
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a(ausasu)"));
for (i = 0; i < length; i++)
{
JsonNode *node = json_array_get_element (array, i);
JsonObject *obj = json_node_get_object (node);
JsonObject *obj = json_array_get_object_element (array, i);
GVariantBuilder b1;
GVariantBuilder b2;
guint group;
const char *name;
char *code;
int j, k;
gboolean skip;
gboolean has_variations;
JsonObject *obj2;
JsonArray *kw;
char **name_tokens;
i++;
if (!json_object_has_member (obj, "group"))
continue;
group = json_object_get_int_member (obj, "group");
name = json_object_get_string_member (obj, "annotation");
if (json_object_has_member (obj, "skins"))
{
JsonArray *a2 = json_object_get_array_member (obj, "skins");
JsonNode *n2 = json_array_get_element (a2, 0);
JsonObject *o2 = json_node_get_object (n2);
code = g_strdup (json_object_get_string_member (o2, "hexcode"));
}
else
{
code = g_strdup (json_object_get_string_member (obj, "hexcode"));
}
g_variant_builder_init (&b1, G_VARIANT_TYPE ("au"));
name = json_object_get_string_member (obj, "name");
code = g_strdup (json_object_get_string_member (obj, "code"));
has_variations = FALSE;
while (i < length)
{
JsonNode *node2 = json_array_get_element (array, i);
JsonObject *obj2 = json_node_get_object (node2);
const char *name2;
const char *code2;
name2 = json_object_get_string_member (obj2, "name");
code2 = json_object_get_string_member (obj2, "code");
if (!strstr (name2, "skin tone") || !g_str_has_prefix (name2, name))
break;
if (!has_variations)
{
has_variations = TRUE;
g_free (code);
code = g_strdup (code2);
}
i++;
}
g_string_set_size (name_key, 0);
if (!parse_code (&b1, code, name_key))
if (!parse_code (&b1, code))
return 1;
g_variant_builder_init (&b2, G_VARIANT_TYPE ("as"));
name_tokens = g_str_tokenize_and_fold (name, "en", NULL);
for (j = 0; j < g_strv_length (name_tokens); j++)
g_variant_builder_add (&b2, "s", name_tokens[j]);
obj2 = g_hash_table_lookup (names, name_key->str);
if (obj2)
if (json_object_has_member (obj, "tags"))
{
kw = json_object_get_array_member (obj2, "keywords");
for (k = 0; k < json_array_get_length (kw); k++)
{
char **folded;
char **ascii;
folded = g_str_tokenize_and_fold (json_array_get_string_element (kw, k), "en", &ascii);
for (j = 0; j < g_strv_length (folded); j++)
{
if (!g_strv_contains ((const char * const *)name_tokens, folded[j]))
g_variant_builder_add (&b2, "s", folded[j]);
}
for (j = 0; j < g_strv_length (ascii); j++)
{
if (!g_strv_contains ((const char * const *)name_tokens, ascii[j]))
g_variant_builder_add (&b2, "s", ascii[j]);
}
g_strfreev (folded);
g_strfreev (ascii);
}
JsonArray *tags = json_object_get_array_member (obj, "tags");
for (int j = 0; j < json_array_get_length (tags); j++)
g_variant_builder_add (&b2, "s", json_array_get_string_element (tags, j));
}
g_strfreev (name_tokens);
g_variant_builder_add (&builder, "(ausas)", &b1, name, &b2);
g_variant_builder_add (&builder, "(ausasu)", &b1, name, &b2, group);
}
v = g_variant_builder_end (&builder);
if (g_str_has_suffix (argv[3], ".json"))
if (g_str_has_suffix (argv[2], ".json"))
{
JsonNode *node;
char *out;
node = json_gvariant_serialize (v);
out = json_to_string (node, TRUE);
if (!g_file_set_contents (argv[3], out, -1, &error))
if (!g_file_set_contents (argv[2], out, -1, &error))
{
g_error ("%s", error->message);
return 1;
@ -230,7 +171,7 @@ main (int argc, char *argv[])
GBytes *bytes;
bytes = g_variant_get_data_as_bytes (v);
if (!g_file_set_contents (argv[3], g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes), &error))
if (!g_file_set_contents (argv[2], g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes), &error))
{
g_error ("%s", error->message);
return 1;

BIN
gtk/emoji/de.data Normal file

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

BIN
gtk/emoji/en.data Normal file

Binary file not shown.

BIN
gtk/emoji/es.data Normal file

Binary file not shown.

BIN
gtk/emoji/fr.data Normal file

Binary file not shown.

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<gresources>
<gresource prefix="/org/gtk/libgtk/emoji/">
<file>@lang@.data</file>
</gresource>
</gresources>

BIN
gtk/emoji/zh.data Normal file

Binary file not shown.

View File

@ -86,7 +86,7 @@ for f in get_files('inspector', '.ui'):
xml += '''
<file>inspector/logo.png</file>
<file>inspector/inspector.css</file>
<file>emoji/emoji.data</file>
<file>emoji/en.data</file>
</gresource>
</gresources>'''

View File

@ -38,6 +38,8 @@
#include "gtknative.h"
#include "gtkwidgetprivate.h"
#include "gdk/gdkprofilerprivate.h"
#include "gtkmain.h"
#include "gtkprivate.h"
/**
* SECTION:gtkemojichooser
@ -184,7 +186,7 @@ typedef struct {
GtkWidget *box;
GtkWidget *heading;
GtkWidget *button;
const char *first;
int group;
gunichar label;
gboolean empty;
} EmojiSection;
@ -337,8 +339,8 @@ add_recent_item (GtkEmojiChooser *chooser,
g_variant_ref (item);
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a((ausas)u)"));
g_variant_builder_add (&builder, "(@(ausas)u)", item, modifier);
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a((ausasu)u)"));
g_variant_builder_add (&builder, "(@(ausasu)u)", item, modifier);
children = NULL;
for (child = gtk_widget_get_last_child (chooser->recent.box);
@ -363,7 +365,7 @@ add_recent_item (GtkEmojiChooser *chooser,
continue;
}
g_variant_builder_add (&builder, "(@(ausas)u)", item2, modifier2);
g_variant_builder_add (&builder, "(@(ausasu)u)", item2, modifier2);
}
g_list_free (children);
@ -591,6 +593,79 @@ add_emoji (GtkWidget *box,
gtk_flow_box_insert (GTK_FLOW_BOX (box), child, prepend ? 0 : -1);
}
GBytes *
get_emoji_data (void)
{
GBytes *bytes;
const char *lang;
char q[10];
char *path;
GError *error = NULL;
lang = pango_language_to_string (gtk_get_default_language ());
if (strchr (lang, '-'))
{
int i;
for (i = 0; lang[i] != '-' && i < 9; i++)
q[i] = lang[i];
q[i] = '\0';
lang = q;
}
path = g_strconcat ("/org/gtk/libgtk/emoji/", lang, ".data", NULL);
bytes = g_resources_lookup_data (path, 0, &error);
if (bytes)
{
g_debug ("Found emoji data for %s in resource %s", lang, path);
g_free (path);
return bytes;
}
if (g_error_matches (error, G_RESOURCE_ERROR, G_RESOURCE_ERROR_NOT_FOUND))
{
char *filename;
GMappedFile *file;
g_clear_error (&error);
filename = g_strconcat ("/usr/share/gtk-4.0/emoji/", lang, ".gresource", NULL);
file = g_mapped_file_new (filename, FALSE, NULL);
if (file)
{
GBytes *data;
GResource *resource;
data = g_mapped_file_get_bytes (file);
g_mapped_file_unref (file);
resource = g_resource_new_from_data (data, NULL);
g_bytes_unref (data);
g_debug ("Registering resource for Emoji data for %s from file %s", lang, filename);
g_resources_register (resource);
g_resource_unref (resource);
bytes = g_resources_lookup_data (path, 0, NULL);
if (bytes)
{
g_debug ("Found emoji data for %s in resource %s", lang, path);
g_free (path);
g_free (filename);
return bytes;
}
}
g_free (filename);
}
g_clear_error (&error);
g_free (path);
return g_resources_lookup_data ("/org/gtk/libgtk/emoji/en.data", 0, NULL);
}
static gboolean
populate_emoji_chooser (gpointer data)
{
@ -602,8 +677,11 @@ populate_emoji_chooser (gpointer data)
if (!chooser->data)
{
GBytes *bytes = g_resources_lookup_data ("/org/gtk/libgtk/emoji/emoji.data", 0, NULL);
chooser->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(ausas)"), bytes, TRUE));
GBytes *bytes;
bytes = get_emoji_data ();
chooser->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(ausasu)"), bytes, TRUE));
g_bytes_unref (bytes);
}
@ -615,25 +693,27 @@ populate_emoji_chooser (gpointer data)
while ((item = g_variant_iter_next_value (chooser->iter)))
{
const char *name;
guint group;
g_variant_get_child (item, 1, "&s", &name);
g_variant_get_child (item, 3, "u", &group);
if (strcmp (name, chooser->body.first) == 0)
if (group == chooser->people.group)
chooser->box = chooser->people.box;
else if (group == chooser->body.group)
chooser->box = chooser->body.box;
else if (strcmp (name, chooser->nature.first) == 0)
else if (group == chooser->nature.group)
chooser->box = chooser->nature.box;
else if (strcmp (name, chooser->food.first) == 0)
else if (group == chooser->food.group)
chooser->box = chooser->food.box;
else if (strcmp (name, chooser->travel.first) == 0)
else if (group == chooser->travel.group)
chooser->box = chooser->travel.box;
else if (strcmp (name, chooser->activities.first) == 0)
else if (group == chooser->activities.group)
chooser->box = chooser->activities.box;
else if (strcmp (name, chooser->objects.first) == 0)
else if (group == chooser->objects.group)
chooser->box = chooser->objects.box;
else if (strcmp (name, chooser->symbols.first) == 0)
else if (group == chooser->symbols.group)
chooser->box = chooser->symbols.box;
else if (strcmp (name, chooser->flags.first) == 0)
else if (group == chooser->flags.group)
chooser->box = chooser->flags.box;
add_emoji (chooser->box, FALSE, item, 0, chooser);
@ -848,11 +928,11 @@ stop_search (GtkEntry *entry,
static void
setup_section (GtkEmojiChooser *chooser,
EmojiSection *section,
const char *first,
const char *icon)
EmojiSection *section,
int group,
const char *icon)
{
section->first = first;
section->group = group;
gtk_button_set_icon_name (GTK_BUTTON (section->button), icon);
@ -898,16 +978,16 @@ gtk_emoji_chooser_init (GtkEmojiChooser *chooser)
adj = gtk_scrolled_window_get_vadjustment (GTK_SCROLLED_WINDOW (chooser->scrolled_window));
g_signal_connect (adj, "value-changed", G_CALLBACK (adj_value_changed), chooser);
setup_section (chooser, &chooser->recent, NULL, "emoji-recent-symbolic");
setup_section (chooser, &chooser->people, "grinning face", "emoji-people-symbolic");
setup_section (chooser, &chooser->body, "selfie", "emoji-body-symbolic");
setup_section (chooser, &chooser->nature, "monkey face", "emoji-nature-symbolic");
setup_section (chooser, &chooser->food, "grapes", "emoji-food-symbolic");
setup_section (chooser, &chooser->travel, "globe showing Europe-Africa", "emoji-travel-symbolic");
setup_section (chooser, &chooser->activities, "jack-o-lantern", "emoji-activities-symbolic");
setup_section (chooser, &chooser->objects, "muted speaker", "emoji-objects-symbolic");
setup_section (chooser, &chooser->symbols, "ATM sign", "emoji-symbols-symbolic");
setup_section (chooser, &chooser->flags, "chequered flag", "emoji-flags-symbolic");
setup_section (chooser, &chooser->recent, -1, "emoji-recent-symbolic");
setup_section (chooser, &chooser->people, 0, "emoji-people-symbolic");
setup_section (chooser, &chooser->body, 1, "emoji-body-symbolic");
setup_section (chooser, &chooser->nature, 3, "emoji-nature-symbolic");
setup_section (chooser, &chooser->food, 4, "emoji-food-symbolic");
setup_section (chooser, &chooser->travel, 5, "emoji-travel-symbolic");
setup_section (chooser, &chooser->activities, 6, "emoji-activities-symbolic");
setup_section (chooser, &chooser->objects, 7, "emoji-objects-symbolic");
setup_section (chooser, &chooser->symbols, 8, "emoji-symbols-symbolic");
setup_section (chooser, &chooser->flags, 9, "emoji-flags-symbolic");
populate_recent_section (chooser);

View File

@ -623,8 +623,8 @@ gtk_emoji_completion_init (GtkEmojiCompletion *completion)
gtk_widget_init_template (GTK_WIDGET (completion));
bytes = g_resources_lookup_data ("/org/gtk/libgtk/emoji/emoji.data", 0, NULL);
completion->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(ausas)"), bytes, TRUE));
bytes = get_emoji_data ();
completion->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(ausasu)"), bytes, TRUE));
g_bytes_unref (bytes);

View File

@ -126,6 +126,8 @@ void gtk_set_display_debug_flags (GdkDisplay *display,
guint gtk_get_display_debug_flags (GdkDisplay *display);
gboolean gtk_get_any_display_debug_flag_set (void);
GBytes *get_emoji_data (void);
#ifdef G_ENABLE_DEBUG
#define GTK_DISPLAY_DEBUG_CHECK(display,type) (gtk_get_any_display_debug_flag_set () && G_UNLIKELY (gtk_get_display_debug_flags (display) & GTK_DEBUG_##type))

View File

@ -861,6 +861,23 @@ gtkresources = gnome.compile_resources('gtkresources',
extra_args: '--manual-register',
)
foreach lang : [ 'de', 'fr', 'es', 'zh' ]
conf = configuration_data()
conf.set('lang', lang)
resxml = configure_file(input: 'emoji/gresource.xml.in',
output: lang + '.gresource.xml',
configuration: conf
)
gnome.compile_resources(lang,
resxml,
source_dir: 'emoji',
gresource_bundle: true,
install: true,
install_dir: join_paths(gtk_datadir, 'gtk-4.0', 'emoji')
)
endforeach
gtk_x11_sources = files([
'gtkapplication-x11.c',
'gtkmountoperation-x11.c',

View File

@ -2,7 +2,7 @@
<schemalist>
<schema id='org.gtk.gtk4.Settings.EmojiChooser' path='/org/gtk/gtk4/settings/emoji-chooser/'>
<key name='recent-emoji' type='a((ausas)u)'>
<key name='recent-emoji' type='a((ausasu)u)'>
<default>[]</default>
<summary>Recently used Emoji</summary>
<description>