Move Unicode script itemization code from text engine to UnicodeTools

This is still the same trivial implementation with the only difference in that
that it properly handles surrogate pairs and combining marks.

This temporarily makes QTextEngine::itemize() insignificatly slower due to
using intermediate buffer, until refactoring is done.

Change-Id: I7987d6306b0b5cdb21b837968e292dd70abfe223
Reviewed-by: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@digia.com>
This commit is contained in:
Konstantin Ritt 2013-03-12 18:37:07 +02:00 committed by The Qt Project
parent a8e933a74c
commit c20422af13
4 changed files with 71 additions and 34 deletions

View File

@ -53,39 +53,24 @@ public:
static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, QCharAttributes *attributes)
{
QVarLengthArray<QUnicodeTools::ScriptItem> scriptItems;
const ushort *string = reinterpret_cast<const ushort *>(chars);
const ushort *unicode = string;
// correctly assign script, isTab and isObject to the script analysis
const ushort *uc = unicode;
const ushort *e = uc + length;
uchar script = QChar::Script_Common;
uchar lastScript = QChar::Script_Common;
const ushort *start = uc;
while (uc < e) {
int s = QChar::script(*uc);
if (s != QChar::Script_Inherited)
script = s;
if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9)
script = QChar::Script_Common;
if (script != lastScript) {
if (uc != start) {
QVarLengthArray<QUnicodeTools::ScriptItem> scriptItems;
{
QVarLengthArray<uchar> scripts(length);
QUnicodeTools::initScripts(string, length, scripts.data());
int start = 0;
for (int i = start + 1; i <= length; ++i) {
if (i == length || scripts[i] != scripts[start]) {
QUnicodeTools::ScriptItem item;
item.position = start - string;
item.script = lastScript;
item.position = start;
item.script = scripts[start];
scriptItems.append(item);
start = uc;
start = i;
}
lastScript = script;
}
++uc;
}
if (uc != start) {
QUnicodeTools::ScriptItem item;
item.position = start - string;
item.script = lastScript;
scriptItems.append(item);
}
QUnicodeTools::CharAttributeOptions options = 0;

View File

@ -635,6 +635,51 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
}
}
// ----------------------------------------------------------------------------
//
// The Unicode script property. See http://www.unicode.org/reports/tr24/ (some very old version)
//
// ----------------------------------------------------------------------------
Q_CORE_EXPORT void initScripts(const ushort *string, int length, uchar *scripts)
{
int sor = 0;
int eor = -1;
uchar script = QChar::Script_Common;
for (int i = 0; i < length; ++i) {
eor = i;
uint ucs4 = string[i];
if (QChar::isHighSurrogate(ucs4) && i + 1 < length) {
ushort low = string[i + 1];
if (QChar::isLowSurrogate(low)) {
ucs4 = QChar::surrogateToUcs4(ucs4, low);
++i;
}
}
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
if (Q_LIKELY(prop->script == script || prop->script == QChar::Script_Inherited))
continue;
// Never break between a combining mark (gc= Mc, Mn or Me) and its base character.
// Thus, a combining mark — whatever its script property value is — should inherit
// the script property value of its base character.
static const int test = (FLAG(QChar::Mark_NonSpacing) | FLAG(QChar::Mark_SpacingCombining) | FLAG(QChar::Mark_Enclosing));
if (Q_UNLIKELY(FLAG(prop->category) & test))
continue;
while (sor < eor)
scripts[sor++] = script;
script = prop->script;
}
eor = length;
while (sor < eor)
scripts[sor++] = script;
}
} // namespace QUnicodeTools
QT_END_NAMESPACE

View File

@ -96,6 +96,9 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
const ScriptItem *items, int numItems,
QCharAttributes *attributes, CharAttributeOptions options = DefaultOptionsCompat);
Q_CORE_EXPORT void initScripts(const ushort *string, int length, uchar *scripts);
} // namespace QUnicodeTools
QT_END_NAMESPACE

View File

@ -1309,6 +1309,8 @@ void QTextEngine::itemize() const
if (!length)
return;
const ushort *string = reinterpret_cast<const ushort *>(layoutData->string.unicode());
bool ignore = ignoreBidi;
bool rtl = isRightToLeft();
@ -1342,9 +1344,15 @@ void QTextEngine::itemize() const
layoutData->hasBidi = bidiItemize(const_cast<QTextEngine *>(this), analysis, control);
}
const ushort *uc = reinterpret_cast<const ushort *>(layoutData->string.unicode());
{
QVarLengthArray<uchar> scripts(length);
QUnicodeTools::initScripts(string, length, scripts.data());
for (int i = 0; i < length; ++i)
analysis[i].script = scripts.at(i);
}
const ushort *uc = string;
const ushort *e = uc + length;
uchar lastScript = QChar::Script_Common;
while (uc < e) {
switch (*uc) {
case QChar::ObjectReplacementCharacter:
@ -1374,13 +1382,9 @@ void QTextEngine::itemize() const
}
// fall through
default:
analysis->script = QChar::script(*uc);
if (analysis->script == QChar::Script_Inherited)
analysis->script = lastScript;
analysis->flags = QScriptAnalysis::None;
break;
}
lastScript = analysis->script;
analysis->script = hbscript_to_script(script_to_hbscript(analysis->script)); // retain the old behavior
++uc;
++analysis;