Adjust normalizer for out-of-order marks

We are going to implement Unicode Arabic Mark Ordering Algorithm:

  http://www.unicode.org/reports/tr53/tr53-1.pdf

which will reorder marks out of their sorted ccc order.  Adjust
normalizer to stop combining as soon as dangerous ordering is
detected.
This commit is contained in:
Behdad Esfahbod 2017-10-04 13:20:33 +02:00
parent a252ad61f0
commit 7f9e7f8689

View File

@ -369,46 +369,58 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
buffer->clear_output ();
count = buffer->len;
unsigned int starter = 0;
bool combine = true;
buffer->next_glyph ();
while (buffer->idx < count && !buffer->in_error)
{
hb_codepoint_t composed, glyph;
if (/* We don't try to compose a non-mark character with it's preceding starter.
if (combine &&
/* We don't try to compose a non-mark character with it's preceding starter.
* This is both an optimization to avoid trying to compose every two neighboring
* glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul
* fonts are not designed to mix-and-match pre-composed syllables and Jamo. */
HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())) &&
/* If there's anything between the starter and this char, they should have CCC
* smaller than this character's. */
(starter == buffer->out_len - 1 ||
_hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_glyph_info_get_modified_combining_class (&buffer->cur())) &&
/* And compose. */
c.compose (&c,
buffer->out_info[starter].codepoint,
buffer->cur().codepoint,
&composed) &&
/* And the font has glyph for the composite. */
font->get_nominal_glyph (composed, &glyph))
HB_UNICODE_GENERAL_CATEGORY_IS_MARK (_hb_glyph_info_get_general_category (&buffer->cur())))
{
/* Composes. */
buffer->next_glyph (); /* Copy to out-buffer. */
if (unlikely (buffer->in_error))
return;
buffer->merge_out_clusters (starter, buffer->out_len);
buffer->out_len--; /* Remove the second composable. */
/* Modify starter and carry on. */
buffer->out_info[starter].codepoint = composed;
buffer->out_info[starter].glyph_index() = glyph;
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
if (/* If there's anything between the starter and this char, they should have CCC
* smaller than this character's. */
(starter == buffer->out_len - 1 ||
_hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_glyph_info_get_modified_combining_class (&buffer->cur())) &&
/* And compose. */
c.compose (&c,
buffer->out_info[starter].codepoint,
buffer->cur().codepoint,
&composed) &&
/* And the font has glyph for the composite. */
font->get_nominal_glyph (composed, &glyph))
{
/* Composes. */
buffer->next_glyph (); /* Copy to out-buffer. */
if (unlikely (buffer->in_error))
return;
buffer->merge_out_clusters (starter, buffer->out_len);
buffer->out_len--; /* Remove the second composable. */
/* Modify starter and carry on. */
buffer->out_info[starter].codepoint = composed;
buffer->out_info[starter].glyph_index() = glyph;
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer);
continue;
continue;
}
else if (/* We sometimes custom-tailor the sorted order of marks. In that case, stop
* trying to combine as soon as combining-class drops. */
starter < buffer->out_len - 1 &&
_hb_glyph_info_get_modified_combining_class (&buffer->prev()) > _hb_glyph_info_get_modified_combining_class (&buffer->cur()))
combine = false;
}
/* Blocked, or doesn't compose. */
buffer->next_glyph ();
if (_hb_glyph_info_get_modified_combining_class (&buffer->prev()) == 0)
{
starter = buffer->out_len - 1;
combine = true;
}
}
buffer->swap_buffers ();