Do case-folding and tokenization when creating emoji.data

We can avoid doing the extra work of case-folding and
tokenization whenever we filter in the Emoji chooser.
This commit is contained in:
Matthias Clasen 2020-04-15 00:28:53 -04:00 committed by Marco Trevisan (Treviño)
parent 78cc9113c1
commit 32e972e95b
3 changed files with 61 additions and 6 deletions

View File

@ -143,6 +143,7 @@ main (int argc, char *argv[])
gboolean has_variations;
JsonObject *obj2;
JsonArray *kw;
char **name_tokens;
i++;
@ -179,17 +180,40 @@ main (int argc, char *argv[])
return 1;
g_variant_builder_init (&b2, G_VARIANT_TYPE ("as"));
name_tokens = g_str_tokenize_and_fold (name, "en", NULL);
for (j = 0; j < g_strv_length (name_tokens); j++)
g_variant_builder_add (&b2, "s", name_tokens[j]);
obj2 = g_hash_table_lookup (names, name_key->str);
if (obj2)
{
shortname = json_object_get_string_member (obj2, "shortname");
kw = json_object_get_array_member (obj2, "keywords");
for (k = 0; k < json_array_get_length (kw); k++)
g_variant_builder_add (&b2, "s", json_array_get_string_element (kw, k));
{
char **folded;
char **ascii;
folded = g_str_tokenize_and_fold (json_array_get_string_element (kw, k), "en", &ascii);
for (j = 0; j < g_strv_length (folded); j++)
{
if (!g_strv_contains ((const char * const *)name_tokens, folded[j]))
g_variant_builder_add (&b2, "s", folded[j]);
}
for (j = 0; j < g_strv_length (ascii); j++)
{
if (!g_strv_contains ((const char * const *)name_tokens, ascii[j]))
g_variant_builder_add (&b2, "s", ascii[j]);
}
g_strfreev (folded);
g_strfreev (ascii);
}
}
else
shortname = "";
g_strfreev (name_tokens);
g_variant_builder_add (&builder, "(aussas)", &b1, name, shortname, &b2);
}

Binary file not shown.

View File

@ -577,6 +577,31 @@ adj_value_changed (GtkAdjustment *adj,
}
}
static gboolean
match_tokens (const char **term_tokens,
const char **hit_tokens)
{
int i, j;
gboolean matched;
matched = TRUE;
for (i = 0; term_tokens[i]; i++)
{
for (j = 0; hit_tokens[j]; j++)
if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
goto one_matched;
matched = FALSE;
break;
one_matched:
continue;
}
return matched;
}
static gboolean
filter_func (GtkFlowBoxChild *child,
gpointer data)
@ -587,8 +612,9 @@ filter_func (GtkFlowBoxChild *child,
const char *text;
const char *name;
const char **keywords;
char **term_tokens;
char **name_tokens;
gboolean res;
int i;
res = TRUE;
@ -602,12 +628,17 @@ filter_func (GtkFlowBoxChild *child,
if (!emoji_data)
goto out;
term_tokens = g_str_tokenize_and_fold (text, "en", NULL);
g_variant_get_child (emoji_data, 1, "&s", &name);
res = g_str_match_string (text, name, TRUE);
name_tokens = g_str_tokenize_and_fold (name, "en", NULL);
g_variant_get_child (emoji_data, 3, "^a&s", &keywords);
for (i = 0; !res && keywords[i]; i++)
res = g_str_match_string (text, keywords[i], TRUE);
res = match_tokens ((const char **)term_tokens, (const char **)name_tokens) ||
match_tokens ((const char **)term_tokens, keywords);
g_strfreev (term_tokens);
g_strfreev (name_tokens);
out:
if (res)