diff --git a/gtk/compose/chars b/gtk/compose/chars index 9f9b15314f..f3897230ea 100644 Binary files a/gtk/compose/chars and b/gtk/compose/chars differ diff --git a/gtk/compose/gtkcomposedata.h b/gtk/compose/gtkcomposedata.h index 057347a36b..442696218c 100644 --- a/gtk/compose/gtkcomposedata.h +++ b/gtk/compose/gtkcomposedata.h @@ -3,7 +3,7 @@ #define MAX_SEQ_LEN 5 #define N_INDEX_SIZE 30 -#define DATA_SIZE 16521 -#define N_CHARS 1572 +#define DATA_SIZE 16447 +#define N_CHARS 1241 #endif diff --git a/gtk/compose/sequences b/gtk/compose/sequences index 1db7a6cddb..39984d4125 100644 Binary files a/gtk/compose/sequences and b/gtk/compose/sequences differ diff --git a/gtk/gtkcomposetable.c b/gtk/gtkcomposetable.c index ef8d01266a..464bf1876a 100644 --- a/gtk/gtkcomposetable.c +++ b/gtk/gtkcomposetable.c @@ -1462,77 +1462,6 @@ gtk_compose_table_foreach (const GtkComposeTable *table, #define IS_DEAD_KEY(k) \ ((k) >= GDK_KEY_dead_grave && (k) <= GDK_KEY_dead_greek) -/* This function receives a sequence of Unicode characters and tries to - * normalize it (NFC). We check for the case where the resulting string - * has length 1 (single character). - * NFC normalisation normally rearranges diacritic marks, unless these - * belong to the same Canonical Combining Class. - * If they belong to the same canonical combining class, we produce all - * permutations of the diacritic marks, then attempt to normalize. - */ -static gboolean -check_normalize_nfc (gunichar *combination_buffer, - int n_compose) -{ - gunichar *combination_buffer_temp; - char *combination_utf8_temp = NULL; - char *nfc_temp = NULL; - int n_combinations; - gunichar temp_swap; - int i; - - combination_buffer_temp = g_alloca (n_compose * sizeof (gunichar)); - - n_combinations = 1; - - for (i = 1; i < n_compose; i++) - n_combinations *= i; - - /* Xorg reuses dead_tilde for the perispomeni diacritic mark. - * We check if base character belongs to Greek Unicode block, - * and if so, we replace tilde with perispomeni. - */ - if (combination_buffer[0] >= 0x390 && combination_buffer[0] <= 0x3FF) - { - for (i = 1; i < n_compose; i++ ) - if (combination_buffer[i] == 0x303) - combination_buffer[i] = 0x342; - } - - memcpy (combination_buffer_temp, combination_buffer, n_compose * sizeof (gunichar) ); - - for (i = 0; i < n_combinations; i++) - { - g_unicode_canonical_ordering (combination_buffer_temp, n_compose); - combination_utf8_temp = g_ucs4_to_utf8 (combination_buffer_temp, n_compose, NULL, NULL, NULL); - nfc_temp = g_utf8_normalize (combination_utf8_temp, -1, G_NORMALIZE_NFC); - - if (g_utf8_strlen (nfc_temp, -1) == 1) - { - memcpy (combination_buffer, combination_buffer_temp, n_compose * sizeof (gunichar) ); - - g_free (combination_utf8_temp); - g_free (nfc_temp); - - return TRUE; - } - - g_free (combination_utf8_temp); - g_free (nfc_temp); - - if (n_compose > 2) - { - temp_swap = combination_buffer_temp[i % (n_compose - 1) + 1]; - combination_buffer_temp[i % (n_compose - 1) + 1] = combination_buffer_temp[(i+1) % (n_compose - 1) + 1]; - combination_buffer_temp[(i+1) % (n_compose - 1) + 1] = temp_swap; - } - else - break; - } - - return FALSE; -} - gboolean gtk_check_algorithmically (const guint16 *compose_buffer, int n_compose, @@ -1540,40 +1469,49 @@ gtk_check_algorithmically (const guint16 *compose_buffer, { int i; - gunichar *combination_buffer; - char *combination_utf8, *nfc; - - combination_buffer = alloca (sizeof (gunichar) * (n_compose + 1)); g_string_set_size (output, 0); for (i = 0; i < n_compose && IS_DEAD_KEY (compose_buffer[i]); i++) ; - /* Allow at most 2 dead keys */ - if (i > 2) - return FALSE; - - /* Can't combine if there's no base character */ + /* Can't combine if there's no base character: incomplete sequence */ if (i == n_compose) return TRUE; if (i > 0 && i == n_compose - 1) { - combination_buffer[0] = gdk_keyval_to_unicode (compose_buffer[i]); - combination_buffer[n_compose] = 0; + GString *input; + char *nfc; + gunichar ch; + + ch = gdk_keyval_to_unicode (compose_buffer[i]); + + /* We don't allow combining with non-letters */ + if (!g_unichar_isalpha (ch)) + return FALSE; + + input = g_string_sized_new (4 * n_compose); + + g_string_append_unichar (input, ch); + i--; while (i >= 0) { switch (compose_buffer[i]) { #define CASE(keysym, unicode) \ - case GDK_KEY_dead_##keysym: combination_buffer[i+1] = unicode; break + case GDK_KEY_dead_##keysym: g_string_append_unichar (input, unicode); break CASE (grave, 0x0300); CASE (acute, 0x0301); CASE (circumflex, 0x0302); - CASE (tilde, 0x0303); /* Also used with perispomeni, 0x342. */ + case GDK_KEY_dead_tilde: + if (g_unichar_get_script (ch) == G_UNICODE_SCRIPT_GREEK) + g_string_append_unichar (input, 0x342); /* combining perispomeni */ + else + g_string_append_unichar (input, 0x303); /* combining tilde */ + break; CASE (macron, 0x0304); CASE (breve, 0x0306); CASE (abovedot, 0x0307); @@ -1591,7 +1529,7 @@ gtk_check_algorithmically (const guint16 *compose_buffer, CASE (horn, 0x031B); /* Legacy use for psili, 0x313 (or 0x343). */ CASE (stroke, 0x335); CASE (abovecomma, 0x0313); /* Equivalent to psili */ - CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */ + CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */ CASE (doublegrave, 0x30F); CASE (belowring, 0x325); CASE (belowmacron, 0x331); @@ -1619,26 +1557,20 @@ gtk_check_algorithmically (const guint16 *compose_buffer, CASE (capital_schwa, 0x1DEA); #undef CASE default: - combination_buffer[i+1] = gdk_keyval_to_unicode (compose_buffer[i]); + g_string_append_unichar (input, gdk_keyval_to_unicode (compose_buffer[i])); } i--; } - /* If the buffer normalizes to a single character, then modify the order - * of combination_buffer accordingly, if necessary, and return TRUE. - */ - if (check_normalize_nfc (combination_buffer, n_compose)) - { - combination_utf8 = g_ucs4_to_utf8 (combination_buffer, -1, NULL, NULL, NULL); - nfc = g_utf8_normalize (combination_utf8, -1, G_NORMALIZE_NFC); + nfc = g_utf8_normalize (input->str, input->len, G_NORMALIZE_NFC); - g_string_assign (output, nfc); + g_string_assign (output, nfc); - g_free (combination_utf8); - g_free (nfc); + g_free (nfc); - return TRUE; - } + g_string_free (input, TRUE); + + return TRUE; } return FALSE; diff --git a/testsuite/gtk/compose/system.expected b/testsuite/gtk/compose/system.expected index aac5ac82ea..c807827bbf 100644 --- a/testsuite/gtk/compose/system.expected +++ b/testsuite/gtk/compose/system.expected @@ -1,14 +1,12 @@ -# n_sequences: 4909 +# n_sequences: 4874 # max_seq_len: 5 # n_index_size: 30 -# data_size: 16521 -# n_chars: 1572 +# data_size: 16447 +# n_chars: 1241 : "ΐ" # U390 : "ΰ" # U3b0 : "`" # U60 - : "M̀" : "Ǜ" # U1db - : "m̀" : "ǜ" # U1dc : "̀" # U300 : "Ɔ̀" @@ -23,16 +21,6 @@ : "ɩ̀" : "ʊ̀" : "ʋ̀" - : "Ŋ̀" - : "ŋ̀" - : "а̀" - : "о̀" - : "р̀" - : "у̀" - : "А̀" - : "О̀" - : "Р̀" - : "У̀" : "ἂ" # U1f02 : "ἃ" # U1f03 : "Ἂ" # U1f0a @@ -131,9 +119,7 @@ : "ḕ" # U1e15 : "ṑ" # U1e51 : "'" # U27 - : "J́" : "Ǘ" # U1d7 - : "j́" : "ǘ" # U1d8 : "́" # U301 : "Ɔ́" @@ -148,28 +134,7 @@ : "ɩ́" : "ʊ́" : "ʋ́" - : "Ŋ́" - : "ŋ́" - : "ю́" - : "а́" - : "е́" - : "и́" - : "о́" - : "я́" - : "р́" - : "у́" - : "ы́" - : "э́" : "Ю́́" - : "А́" - : "Е́" - : "И́" - : "О́" - : "Я́" - : "Р́" - : "У́" - : "Ы́" - : "Э́" : "ἄ" # U1f04 : "ἅ" # U1f05 : "Ἄ" # U1f0c @@ -210,6 +175,8 @@ : "ǝ̃́" : "ɔ̃́" : "ɛ̃́" + : "Ṥ" # U1e64 + : "ṥ" # U1e65 : "΅" # U385 : "Ǿ" # U1fe : "ǿ" # U1ff @@ -327,18 +294,6 @@ : "ɩ̂" : "ʊ̂" : "ʋ̂" - : "а̂" - : "е̂" - : "и̂" - : "о̂" - : "р̂" - : "у̂" - : "А̂" - : "Е̂" - : "И̂" - : "О̂" - : "Р̂" - : "У̂" : "Ậ" # U1eac : "ậ" # U1ead : "Ệ" # U1ec6 @@ -374,6 +329,30 @@ : "⁸" # U2078 : "⁹" # U2079 : "⁼" # U207c + : "Ầ" # U1ea6 + : "Ề" # U1ec0 + : "Ồ" # U1ed2 + : "ầ" # U1ea7 + : "ề" # U1ec1 + : "ồ" # U1ed3 + : "Ấ" # U1ea4 + : "Ế" # U1ebe + : "Ố" # U1ed0 + : "ấ" # U1ea5 + : "ế" # U1ebf + : "ố" # U1ed1 + : "Ẫ" # U1eaa + : "Ễ" # U1ec4 + : "Ỗ" # U1ed6 + : "ẫ" # U1eab + : "ễ" # U1ec5 + : "ỗ" # U1ed7 + : "Ẩ" # U1ea8 + : "Ể" # U1ec2 + : "Ổ" # U1ed4 + : "ẩ" # U1ea9 + : "ể" # U1ec3 + : "ổ" # U1ed5 : "Ậ" # U1eac : "Ệ" # U1ec6 : "Ộ" # U1ed8 @@ -463,6 +442,14 @@ : "Ὦ" # U1f6e : "Ὧ" # U1f6f : "~" # U7e + : "Ṍ" # U1e4c + : "Ṹ" # U1e78 + : "ṍ" # U1e4d + : "ṹ" # U1e79 + : "Ȭ" # U22c + : "ȭ" # U22d + : "Ṏ" # U1e4e + : "ṏ" # U1e4f : "ῗ" # U1fd7 : "ῧ" # U1fe7 : "Ἇ" # U1f0f @@ -528,21 +515,19 @@ : "ɩ̄" : "ʊ̄" : "ʋ̄" - : "а̄" - : "е̄" - : "о̄" - : "р̄" - : "А̄" - : "Е̄" - : "О̄" - : "Р̄" : "Ḹ" # U1e38 : "ḹ" # U1e39 : "Ṝ" # U1e5c : "ṝ" # U1e5d : "¯" # Uaf - : "Ǖ" # U1d5 - : "ǖ" # U1d6 + : "Ḕ" # U1e14 + : "Ṑ" # U1e50 + : "ḕ" # U1e15 + : "ṑ" # U1e51 + : "Ḗ" # U1e16 + : "Ṓ" # U1e52 + : "ḗ" # U1e17 + : "ṓ" # U1e53 : "Ᾱ" # U1fb9 : "Ῑ" # U1fd9 : "Ῡ" # U1fe9 @@ -580,6 +565,14 @@ : "Ặ" # U1eb6 : "ặ" # U1eb7 : "˘" # U2d8 + : "Ằ" # U1eb0 + : "ằ" # U1eb1 + : "Ắ" # U1eae + : "ắ" # U1eaf + : "Ẵ" # U1eb4 + : "ẵ" # U1eb5 + : "Ẳ" # U1eb2 + : "ẳ" # U1eb3 : "Ặ" # U1eb6 : "ặ" # U1eb7 : "Ḝ" # U1e1c @@ -600,6 +593,10 @@ : "Ṩ" # U1e68 : "ṩ" # U1e69 : "˙" # U2d9 + : "Ǡ" # U1e0 + : "Ȱ" # U230 + : "ǡ" # U1e1 + : "ȱ" # U231 : "ɟ" # U25f : "Ṩ" # U1e68 : "ṩ" # U1e69 @@ -631,9 +628,21 @@ : "Ӫ" # U4ea : "ӫ" # U4eb : "¨" # Ua8 + : "Ǜ" # U1db + : "ǜ" # U1dc : "΅" # U385 - : "Ṻ" # U1e7a - : "ṻ" # U1e7b + : "Ḯ" # U1e2e + : "Ǘ" # U1d7 + : "ḯ" # U1e2f + : "ǘ" # U1d8 + : "ΐ" # U390 + : "ΰ" # U3b0 + : "Ǟ" # U1de + : "Ȫ" # U22a + : "ǟ" # U1df + : "ȫ" # U22b + : "Ǚ" # U1d9 + : "ǚ" # U1da : "⩷" # U2a77 : "Ṻ" # U1e7a : "ṻ" # U1e7b @@ -646,6 +655,8 @@ : "Ǻ" # U1fa : "ǻ" # U1fb : "°" # Ub0 + : "Ǻ" # U1fa + : "ǻ" # U1fb : "˝" # U2dd : "̋" # U30b : "˝" # U2dd @@ -682,6 +693,8 @@ : "ʊ̌" : "ʋ̌" : "ˇ" # U2c7 + : "Ṧ" # U1e66 + : "ṧ" # U1e67 : "Ǚ" # U1d9 : "ǚ" # U1da : "¸" # Ub8 @@ -787,42 +800,6 @@ : "ᾧ" # U1fa7 : "ᾮ" # U1fae : "ᾯ" # U1faf - : "ᾊ" # U1f8a - : "ᾚ" # U1f9a - : "ᾪ" # U1faa - : "ᾂ" # U1f82 - : "ᾒ" # U1f92 - : "ᾢ" # U1fa2 - : "ᾋ" # U1f8b - : "ᾛ" # U1f9b - : "ᾫ" # U1fab - : "ᾃ" # U1f83 - : "ᾓ" # U1f93 - : "ᾣ" # U1fa3 - : "ᾌ" # U1f8c - : "ᾜ" # U1f9c - : "ᾬ" # U1fac - : "ᾄ" # U1f84 - : "ᾔ" # U1f94 - : "ᾤ" # U1fa4 - : "ᾍ" # U1f8d - : "ᾝ" # U1f9d - : "ᾭ" # U1fad - : "ᾅ" # U1f85 - : "ᾕ" # U1f95 - : "ᾥ" # U1fa5 - : "ᾎ" # U1f8e - : "ᾞ" # U1f9e - : "ᾮ" # U1fae - : "ᾆ" # U1f86 - : "ᾖ" # U1f96 - : "ᾦ" # U1fa6 - : "ᾏ" # U1f8f - : "ᾟ" # U1f9f - : "ᾯ" # U1faf - : "ᾇ" # U1f87 - : "ᾗ" # U1f97 - : "ᾧ" # U1fa7 : "ᾴ" # U1fb4 : "ῄ" # U1fc4 : "ῴ" # U1ff4 @@ -1175,18 +1152,6 @@ : "ϼ" # U3fc : "Ѷ" # U476 : "ѷ" # U477 - : "а̏" - : "е̏" - : "и̏" - : "о̏" - : "р̏" - : "у̏" - : "А̏" - : "Е̏" - : "И̏" - : "О̏" - : "Р̏" - : "У̏" : "⫰" # U2af0 : "⨦" # U2a26 : "⩷" # U2a77 diff --git a/testsuite/gtk/composetable.c b/testsuite/gtk/composetable.c index 5adb9caca4..dec41af467 100644 --- a/testsuite/gtk/composetable.c +++ b/testsuite/gtk/composetable.c @@ -341,7 +341,7 @@ match_algorithmic (void) ret = gtk_check_algorithmically (buffer, 3, output); g_assert_true (ret); - g_assert_cmpstr (output->str, ==, "ἇ"); + g_assert_cmpstr (output->str, ==, "ᾶ\xcc\x94"); buffer[0] = GDK_KEY_dead_perispomeni; buffer[1] = GDK_KEY_dead_dasia; @@ -379,7 +379,8 @@ match_algorithmic (void) buffer[2] = GDK_KEY_dead_grave; ret = gtk_check_algorithmically (buffer, 3, output); - g_assert_false (ret); + g_assert_true (ret); + g_assert_cmpstr (output->str, ==, ""); buffer[0] = GDK_KEY_dead_diaeresis; buffer[1] = GDK_KEY_a;