Improve the script itemization algorithm to match Unicode 8.0
Override preceding Common-s with a subsequent non-Inherited, non-Common script. This produces longer script runs, which automagically improves the shaping quality (as we don't lose the context anymore), the shaping performance (as we're typically shape a fewer runs), and the fallback font selection (when the font supports more than just a single language/script). Task-number: QTBUG-29930 Change-Id: I1c55af30bd397871d7f1f6e062605517f5a7e5a1 Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com> Reviewed-by: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@qt.io>
This commit is contained in:
parent
e81f52ecc7
commit
3df159ba17
@ -685,10 +685,10 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
|
||||
Q_CORE_EXPORT void initScripts(const ushort *string, int length, uchar *scripts)
|
||||
{
|
||||
int sor = 0;
|
||||
int eor = -1;
|
||||
int eor = 0;
|
||||
uchar script = QChar::Script_Common;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
eor = i;
|
||||
|
||||
for (int i = 0; i < length; ++i, eor = i) {
|
||||
uint ucs4 = string[i];
|
||||
if (QChar::isHighSurrogate(ucs4) && i + 1 < length) {
|
||||
ushort low = string[i + 1];
|
||||
@ -700,60 +700,37 @@ Q_CORE_EXPORT void initScripts(const ushort *string, int length, uchar *scripts)
|
||||
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
|
||||
|
||||
if (Q_LIKELY(prop->script == script || prop->script <= QChar::Script_Inherited))
|
||||
uchar nscript = prop->script;
|
||||
|
||||
if (Q_LIKELY(nscript == script || nscript <= QChar::Script_Common))
|
||||
continue;
|
||||
|
||||
// inherit preceding Common-s
|
||||
if (Q_UNLIKELY(script <= QChar::Script_Common)) {
|
||||
// also covers a case where the base character of Common script followed
|
||||
// by one or more combining marks of non-Inherited, non-Common script
|
||||
script = nscript;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Never break between a combining mark (gc= Mc, Mn or Me) and its base character.
|
||||
// Thus, a combining mark — whatever its script property value is — should inherit
|
||||
// the script property value of its base character.
|
||||
static const int test = (FLAG(QChar::Mark_NonSpacing) | FLAG(QChar::Mark_SpacingCombining) | FLAG(QChar::Mark_Enclosing));
|
||||
if (Q_UNLIKELY(FLAG(prop->category) & test)) {
|
||||
// In cases where the base character itself has the Common script property value,
|
||||
// and it is followed by one or more combining marks with a specific script property value,
|
||||
// it may be even better for processing to let the base acquire the script property value
|
||||
// from the first mark. This approach can be generalized by treating all the characters
|
||||
// of a combining character sequence as having the script property value
|
||||
// of the first non-Inherited, non-Common character in the sequence if there is one,
|
||||
// and otherwise treating all the characters as having the Common script property value.
|
||||
if (Q_LIKELY(script > QChar::Script_Common || prop->script <= QChar::Script_Common))
|
||||
continue;
|
||||
if (Q_UNLIKELY(FLAG(prop->category) & test))
|
||||
continue;
|
||||
|
||||
script = QChar::Script(prop->script);
|
||||
}
|
||||
Q_ASSERT(script > QChar::Script_Common);
|
||||
Q_ASSERT(sor < eor);
|
||||
::memset(scripts + sor, script, (eor - sor) * sizeof(uchar));
|
||||
sor = eor;
|
||||
|
||||
#if 0 // ### Disabled due to regressions. The font selection algorithm is not prepared for this change.
|
||||
if (Q_LIKELY(script != QChar::Script_Common)) {
|
||||
// override preceding Common-s
|
||||
while (sor > 0 && scripts[sor - 1] == QChar::Script_Common)
|
||||
--sor;
|
||||
} else {
|
||||
// see if we are inheriting preceding run
|
||||
if (sor > 0)
|
||||
script = scripts[sor - 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
while (sor < eor)
|
||||
scripts[sor++] = script;
|
||||
|
||||
script = prop->script;
|
||||
script = nscript;
|
||||
}
|
||||
eor = length;
|
||||
|
||||
#if 0 // ### Disabled due to regressions. The font selection algorithm is not prepared for this change.
|
||||
if (Q_LIKELY(script != QChar::Script_Common)) {
|
||||
// override preceding Common-s
|
||||
while (sor > 0 && scripts[sor - 1] == QChar::Script_Common)
|
||||
--sor;
|
||||
} else {
|
||||
// see if we are inheriting preceding run
|
||||
if (sor > 0)
|
||||
script = scripts[sor - 1];
|
||||
}
|
||||
#endif
|
||||
|
||||
while (sor < eor)
|
||||
scripts[sor++] = script;
|
||||
Q_ASSERT(script >= QChar::Script_Common);
|
||||
Q_ASSERT(eor == length);
|
||||
::memset(scripts + sor, script, (eor - sor) * sizeof(uchar));
|
||||
}
|
||||
|
||||
} // namespace QUnicodeTools
|
||||
|
@ -1225,29 +1225,21 @@ void tst_QTextScriptEngine::thaiWithZWJ()
|
||||
QTextLayout layout(s, font);
|
||||
QTextEngine *e = layout.engine();
|
||||
e->itemize();
|
||||
QCOMPARE(e->layoutData->items.size(), 11);
|
||||
QCOMPARE(e->layoutData->items.size(), 3);
|
||||
|
||||
for (int item = 0; item < e->layoutData->items.size(); ++item)
|
||||
e->shape(item);
|
||||
|
||||
QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(7)); // Thai: The ZWJ and ZWNJ characters are inherited, so should be part of the thai script
|
||||
QCOMPARE(e->layoutData->items[1].num_glyphs, ushort(1)); // Common: The smart quotes cannot be handled by thai, so should be a separate item
|
||||
QCOMPARE(e->layoutData->items[2].num_glyphs, ushort(1)); // Thai: Thai character
|
||||
QCOMPARE(e->layoutData->items[3].num_glyphs, ushort(1)); // Common: Ellipsis
|
||||
QCOMPARE(e->layoutData->items[4].num_glyphs, ushort(1)); // Thai: Thai character
|
||||
QCOMPARE(e->layoutData->items[5].num_glyphs, ushort(1)); // Common: Smart quote
|
||||
QCOMPARE(e->layoutData->items[6].num_glyphs, ushort(1)); // Thai: Thai character
|
||||
QCOMPARE(e->layoutData->items[7].num_glyphs, ushort(1)); // Common: \xA0 = non-breaking space. Could be useful to have in thai, but not currently implemented
|
||||
QCOMPARE(e->layoutData->items[8].num_glyphs, ushort(1)); // Thai: Thai character
|
||||
QCOMPARE(e->layoutData->items[9].num_glyphs, ushort(1)); // Japanese: Kanji for tree
|
||||
QCOMPARE(e->layoutData->items[10].num_glyphs, ushort(2)); // Thai: Thai character followed by superscript "a" which is of inherited type
|
||||
QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(15)); // Thai, Inherited and Common
|
||||
QCOMPARE(e->layoutData->items[1].num_glyphs, ushort(1)); // Japanese: Kanji for tree
|
||||
QCOMPARE(e->layoutData->items[2].num_glyphs, ushort(2)); // Thai: Thai character followed by superscript "a" which is of inherited type
|
||||
|
||||
//A quick sanity check - check all the characters are individual clusters
|
||||
unsigned short *logClusters = e->layoutData->logClustersPtr;
|
||||
for (int i = 0; i < 7; i++)
|
||||
for (int i = 0; i < 15; i++)
|
||||
QCOMPARE(logClusters[i], ushort(i));
|
||||
for (int i = 0; i < 10; i++)
|
||||
QCOMPARE(logClusters[i+7], ushort(0));
|
||||
for (int i = 0; i < 3; i++)
|
||||
QCOMPARE(logClusters[i+15], ushort(0));
|
||||
|
||||
// A thai implementation could either remove the ZWJ and ZWNJ characters, or hide them.
|
||||
// The current implementation hides them, so we test for that.
|
||||
|
Loading…
Reference in New Issue
Block a user