Update the Unicode Text Breaking Algorithm implementation
to make it conformant to the Unicode 6.1 specifications #14 and #29. The most important changes are: * The implementation has been reworked from scratch to fix all known bugs; * Separate-out the grapheme and the line breaking implementation to eliminate an overhead due to calculating unnecessary breaks; * Stop using deprecated SG class in favor of resolving pairs of surrogates; * A proper support for SMP code points; * Support for extended grapheme clusters (a drop-in replacement for the legacy grapheme clusters as of Unicode 5.1); * The hardcoded tailoring of UBA has been eliminated which breaks the 7 years-old lineBreaking test. Some later, we'll investigate if such a tailoring is still needed. Change-Id: I9f5867b3cec753b4fc120bc5a7e20f9a73d89370 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
parent
f32a7f1e21
commit
d64cb5f707
@ -116,8 +116,9 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
|
||||
\reentrant
|
||||
|
||||
QTextBoundaryFinder allows to find Unicode text boundaries in a
|
||||
string, similar to the Unicode text boundary specification (see
|
||||
http://www.unicode.org/reports/tr29/tr29-11.html).
|
||||
string, accordingly to the Unicode text boundary specification (see
|
||||
\l{http://www.unicode.org/reports/tr14/}{Unicode Standard Annex #14} and
|
||||
\l{http://www.unicode.org/reports/tr29/}{Unicode Standard Annex #29}).
|
||||
|
||||
QTextBoundaryFinder can operate on a QString in four possible
|
||||
modes depending on the value of \a BoundaryType.
|
||||
@ -127,14 +128,18 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
|
||||
Grapheme clusters. The two unicode characters 'A' + diaeresis do
|
||||
for example form one grapheme cluster as the user thinks of them
|
||||
as one character, yet it is in this case represented by two
|
||||
unicode code points.
|
||||
unicode code points
|
||||
(see \l{http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries}).
|
||||
|
||||
Word boundaries are there to locate the start and end of what a
|
||||
language considers to be a word.
|
||||
language considers to be a word
|
||||
(see \l{http://www.unicode.org/reports/tr29/#Word_Boundaries}).
|
||||
|
||||
Line break boundaries give possible places where a line break
|
||||
might happen and sentence boundaries will show the beginning and
|
||||
end of whole sentences.
|
||||
end of whole sentences
|
||||
(see \l{http://www.unicode.org/reports/tr29/#Sentence_Boundaries} and
|
||||
\l{http://www.unicode.org/reports/tr14/}).
|
||||
|
||||
The first position in a string is always a valid boundary and
|
||||
refers to the position before the first character. The last
|
||||
|
@ -43,6 +43,8 @@
|
||||
|
||||
#include "qunicodetables_p.h"
|
||||
|
||||
#define FLAG(x) (1 << (x))
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
Q_AUTOTEST_EXPORT int qt_initcharattributes_default_algorithm_only = 0;
|
||||
@ -51,330 +53,374 @@ namespace QUnicodeTools {
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The line breaking algorithm. See http://www.unicode.org/reports/tr14/tr14-19.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The text boundaries determination algorithm. See http://www.unicode.org/reports/tr29/tr29-11.html
|
||||
// The text boundaries determination algorithm. See http://www.unicode.org/reports/tr29/tr29-19.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
/* The Unicode algorithm does in our opinion allow line breaks at some
|
||||
places they shouldn't be allowed. The following changes were thus
|
||||
made in comparison to the Unicode reference:
|
||||
namespace GB {
|
||||
|
||||
EX->AL from DB to IB
|
||||
SY->AL from DB to IB
|
||||
SY->PO from DB to IB
|
||||
SY->PR from DB to IB
|
||||
SY->OP from DB to IB
|
||||
AL->PR from DB to IB
|
||||
AL->PO from DB to IB
|
||||
PR->PR from DB to IB
|
||||
PO->PO from DB to IB
|
||||
PR->PO from DB to IB
|
||||
PO->PR from DB to IB
|
||||
HY->PO from DB to IB
|
||||
HY->PR from DB to IB
|
||||
HY->OP from DB to IB
|
||||
NU->EX from PB to IB
|
||||
EX->PO from DB to IB
|
||||
*/
|
||||
|
||||
// The following line break classes are not treated by the table:
|
||||
// AI, BK, CB, CR, LF, NL, SA, SG, SP, XX
|
||||
|
||||
enum LineBreakRule {
|
||||
ProhibitedBreak, // PB in table
|
||||
DirectBreak, // DB in table
|
||||
IndirectBreak, // IB in table
|
||||
CombiningIndirectBreak, // CI in table
|
||||
CombiningProhibitedBreak // CP in table
|
||||
};
|
||||
#define DB DirectBreak
|
||||
#define IB IndirectBreak
|
||||
#define CI CombiningIndirectBreak
|
||||
#define CP CombiningProhibitedBreak
|
||||
#define PB ProhibitedBreak
|
||||
static const uchar lineBreakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
|
||||
/* OP CL QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
||||
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
||||
/* CL */ { DB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* QU */ { PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* GL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* NS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* EX */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* SY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IS */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* PR */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* PO */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* NU */ { IB, PB, IB, IB, IB, IB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* AL */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ID */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IN */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* HY */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BA */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BB */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* B2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* CM */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* WJ */ { IB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* H2 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* H3 */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
|
||||
/* JL */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
|
||||
/* JV */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* JT */ { DB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
|
||||
};
|
||||
#undef DB
|
||||
#undef IB
|
||||
#undef CI
|
||||
#undef CP
|
||||
#undef PB
|
||||
|
||||
static const uchar graphemeBreakTable[QUnicodeTables::GraphemeBreakLVT + 1][QUnicodeTables::GraphemeBreakLVT + 1] = {
|
||||
// Other, CR, LF, Control, Extend, L, V, T, LV, LVT
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // Other,
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // CR,
|
||||
{ true , false, true , true , true , true , true , true , true , true }, // LF,
|
||||
{ true , true , true , true , true , true , true , true , true , true }, // Control,
|
||||
{ false, true , true , true , false, false, false, false, false, false }, // Extend,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // L,
|
||||
{ true , true , true , true , true , false, false, true , false, true }, // V,
|
||||
{ true , true , true , true , true , true , false, false, false, false }, // T,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // LV,
|
||||
{ true , true , true , true , true , false, true , true , true , true }, // LVT
|
||||
static const uchar breakTable[QUnicodeTables::GraphemeBreakLVT + 1][QUnicodeTables::GraphemeBreakLVT + 1] = {
|
||||
// Other CR LF Control Extend Prepend S-Mark L V T LV LVT
|
||||
{ true , true , true , true , false, true , false, true , true , true , true , true }, // Other
|
||||
{ true , true , false, true , true , true , true , true , true , true , true , true }, // CR
|
||||
{ true , true , true , true , true , true , true , true , true , true , true , true }, // LF
|
||||
{ true , true , true , true , true , true , true , true , true , true , true , true }, // Control
|
||||
{ true , true , true , true , false, true , false, true , true , true , true , true }, // Extend
|
||||
{ false, true , true , true , false, false, false, false, false, false, false, false }, // Prepend
|
||||
{ true , true , true , true , false, true , false, true , true , true , true , true }, // SpacingMark
|
||||
{ true , true , true , true , false, true , false, false, false, true , false, false }, // L
|
||||
{ true , true , true , true , false, true , false, true , false, false, true , true }, // V
|
||||
{ true , true , true , true , false, true , false, true , true , false, true , true }, // T
|
||||
{ true , true , true , true , false, true , false, true , false, false, true , true }, // LV
|
||||
{ true , true , true , true , false, true , false, true , true , false, true , true }, // LVT
|
||||
};
|
||||
|
||||
static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
} // namespace GB
|
||||
|
||||
static void getGraphemeBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
// ##### can this fail if the first char is a surrogate?
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(string[0]);
|
||||
QUnicodeTables::GraphemeBreak grapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
QUnicodeTables::LineBreakClass cls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
// handle case where input starts with an LF
|
||||
if (cls == QUnicodeTables::LineBreak_LF)
|
||||
cls = QUnicodeTables::LineBreak_BK;
|
||||
|
||||
attributes[0].charStop = true;
|
||||
|
||||
int lcls = cls;
|
||||
for (quint32 i = 1; i < len; ++i) {
|
||||
attributes[i].charStop = true;
|
||||
|
||||
QUnicodeTables::GraphemeBreak lcls = QUnicodeTables::GraphemeBreakLF; // to meet GB1
|
||||
for (quint32 i = 0; i != len; ++i) {
|
||||
quint32 pos = i;
|
||||
uint ucs4 = string[i];
|
||||
prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::GraphemeBreak ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
attributes[i].charStop = graphemeBreakTable[ngrapheme][grapheme];
|
||||
// handle surrogates
|
||||
if (ncls == QUnicodeTables::LineBreak_SG) {
|
||||
if (QChar::isHighSurrogate(string[i]) && i < len - 1 && QChar::isLowSurrogate(string[i+1])) {
|
||||
continue;
|
||||
} else if (QChar::isLowSurrogate(string[i]) && QChar::isHighSurrogate(string[i-1])) {
|
||||
ucs4 = QChar::surrogateToUcs4(string[i-1], string[i]);
|
||||
prop = QUnicodeTables::properties(ucs4);
|
||||
ngrapheme = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
attributes[i].charStop = false;
|
||||
} else {
|
||||
ncls = QUnicodeTables::LineBreak_AL;
|
||||
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
|
||||
ushort low = string[i + 1];
|
||||
if (QChar::isLowSurrogate(low)) {
|
||||
ucs4 = QChar::surrogateToUcs4(ucs4, low);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::GraphemeBreak cls = (QUnicodeTables::GraphemeBreak) prop->graphemeBreak;
|
||||
|
||||
attributes[pos].charStop = GB::breakTable[lcls][cls];
|
||||
|
||||
lcls = cls;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace WB {
|
||||
|
||||
enum Action {
|
||||
NoBreak = 0,
|
||||
Break = 1,
|
||||
Lookup = 2
|
||||
};
|
||||
|
||||
static const uchar breakTable[QUnicodeTables::WordBreakExtendNumLet + 1][QUnicodeTables::WordBreakExtendNumLet + 1] = {
|
||||
// Other CR LF Newline Format Katakana ALetter MidNumLet MidLetter MidNum Numeric ExtendNumLet
|
||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break }, // Other
|
||||
{ Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
|
||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break }, // Format
|
||||
{ Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak }, // Katakana
|
||||
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, Lookup , Lookup , Break , NoBreak, NoBreak }, // ALetter
|
||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
|
||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break }, // MidLetter
|
||||
{ Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break }, // MidNum
|
||||
{ Break , Break , Break , Break , NoBreak, Break , NoBreak, Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
|
||||
{ Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
|
||||
};
|
||||
|
||||
} // namespace WB
|
||||
|
||||
static void getWordBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
QUnicodeTables::WordBreak cls = QUnicodeTables::WordBreakLF; // to meet WB1
|
||||
for (quint32 i = 0; i != len; ++i) {
|
||||
quint32 pos = i;
|
||||
uint ucs4 = string[i];
|
||||
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
|
||||
ushort low = string[i + 1];
|
||||
if (QChar::isLowSurrogate(low)) {
|
||||
ucs4 = QChar::surrogateToUcs4(ucs4, low);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::WordBreak ncls = (QUnicodeTables::WordBreak) prop->wordBreak;
|
||||
|
||||
uchar action = WB::breakTable[cls][ncls];
|
||||
if (ncls == QUnicodeTables::WordBreakFormat) {
|
||||
// WB4: X(Extend|Format)* -> X
|
||||
if (action != WB::Break)
|
||||
continue;
|
||||
} else {
|
||||
if (action == WB::Lookup) {
|
||||
action = WB::Break;
|
||||
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
|
||||
ucs4 = string[lookahead];
|
||||
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
|
||||
ushort low = string[lookahead + 1];
|
||||
if (QChar::isLowSurrogate(low)) {
|
||||
ucs4 = QChar::surrogateToUcs4(ucs4, low);
|
||||
++lookahead;
|
||||
}
|
||||
}
|
||||
|
||||
prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::WordBreak tcls = (QUnicodeTables::WordBreak) prop->wordBreak;
|
||||
if (tcls == QUnicodeTables::WordBreakFormat)
|
||||
continue;
|
||||
if (tcls == cls) {
|
||||
i = lookahead;
|
||||
ncls = tcls;
|
||||
action = WB::NoBreak;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
cls = ncls;
|
||||
if (action == WB::Break)
|
||||
attributes[pos].wordBoundary = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace SB {
|
||||
|
||||
enum State {
|
||||
Initial,
|
||||
Upper,
|
||||
UpATerm,
|
||||
ATerm,
|
||||
ATermC,
|
||||
ACS,
|
||||
STerm,
|
||||
STermC,
|
||||
SCS,
|
||||
BAfterC,
|
||||
BAfter,
|
||||
Break,
|
||||
Lookup
|
||||
};
|
||||
|
||||
static const uchar breakTable[BAfter + 1][QUnicodeTables::SentenceBreakClose + 1] = {
|
||||
// Other CR LF Sep Format Sp Lower Upper OLetter Numeric ATerm SContinue STerm Close
|
||||
{ Initial, BAfterC, BAfter , BAfter , Initial, Initial, Initial, Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial
|
||||
{ Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, UpATerm, STerm , STerm , Initial }, // Upper
|
||||
|
||||
{ Lookup , BAfterC, BAfter , BAfter , UpATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // UpATerm
|
||||
{ Lookup , BAfterC, BAfter , BAfter , ATerm , ACS , Initial, Break , Break , Initial, ATerm , STerm , STerm , ATermC }, // ATerm
|
||||
{ Lookup , BAfterC, BAfter , BAfter , ATermC , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , ATermC }, // ATermC
|
||||
{ Lookup , BAfterC, BAfter , BAfter , ACS , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , Lookup }, // ACS
|
||||
|
||||
{ Break , BAfterC, BAfter , BAfter , STerm , SCS , Break , Break , Break , Break , ATerm , STerm , STerm , STermC }, // STerm,
|
||||
{ Break , BAfterC, BAfter , BAfter , STermC , SCS , Break , Break , Break , Break , ATerm , STerm , STerm , STermC }, // STermC
|
||||
{ Break , BAfterC, BAfter , BAfter , SCS , SCS , Break , Break , Break , Break , ATerm , STerm , STerm , Break }, // SCS
|
||||
{ Break , Break , BAfter , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // BAfterC
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // BAfter
|
||||
};
|
||||
|
||||
} // namespace SB
|
||||
|
||||
static void getSentenceBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
uchar state = SB::BAfter; // to meet SB1
|
||||
for (quint32 i = 0; i != len; ++i) {
|
||||
quint32 pos = i;
|
||||
uint ucs4 = string[i];
|
||||
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
|
||||
ushort low = string[i + 1];
|
||||
if (QChar::isLowSurrogate(low)) {
|
||||
ucs4 = QChar::surrogateToUcs4(ucs4, low);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::SentenceBreak ncls = (QUnicodeTables::SentenceBreak) prop->sentenceBreak;
|
||||
|
||||
Q_ASSERT(state <= SB::BAfter);
|
||||
state = SB::breakTable[state][ncls];
|
||||
if (state == SB::Lookup) { // SB8
|
||||
state = SB::Break;
|
||||
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
|
||||
ucs4 = string[lookahead];
|
||||
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
|
||||
ushort low = string[lookahead + 1];
|
||||
if (QChar::isLowSurrogate(low)) {
|
||||
ucs4 = QChar::surrogateToUcs4(ucs4, low);
|
||||
++lookahead;
|
||||
}
|
||||
}
|
||||
|
||||
prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::SentenceBreak tcls = (QUnicodeTables::SentenceBreak) prop->sentenceBreak;
|
||||
switch (tcls) {
|
||||
case QUnicodeTables::SentenceBreakOther:
|
||||
case QUnicodeTables::SentenceBreakFormat:
|
||||
case QUnicodeTables::SentenceBreakSp:
|
||||
case QUnicodeTables::SentenceBreakNumeric:
|
||||
case QUnicodeTables::SentenceBreakSContinue:
|
||||
case QUnicodeTables::SentenceBreakClose:
|
||||
continue;
|
||||
case QUnicodeTables::SentenceBreakLower:
|
||||
i = lookahead;
|
||||
state = SB::Initial;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (state == SB::Break) {
|
||||
attributes[pos].sentenceBoundary = true;
|
||||
state = SB::breakTable[SB::Initial][ncls];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// The line breaking algorithm. See http://www.unicode.org/reports/tr14/tr14-28.html
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
namespace LB {
|
||||
|
||||
// The following line break classes are not treated by the pair table
|
||||
// and must be resolved outside:
|
||||
// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX
|
||||
|
||||
enum Action {
|
||||
ProhibitedBreak, PB = ProhibitedBreak,
|
||||
DirectBreak, DB = DirectBreak,
|
||||
IndirectBreak, IB = IndirectBreak,
|
||||
CombiningIndirectBreak, CI = CombiningIndirectBreak,
|
||||
CombiningProhibitedBreak, CP = CombiningProhibitedBreak
|
||||
};
|
||||
|
||||
static const uchar breakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
|
||||
/* OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
||||
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
||||
/* CL */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, IB, IB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* CP */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* QU */ { PB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* GL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* NS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* PR */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* PO */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* NU */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* AL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ID */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* IN */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* HY */ { DB, PB, PB, IB, DB, IB, PB, PB, PB, DB, DB, IB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BA */ { DB, PB, PB, IB, DB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* BB */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* B2 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB },
|
||||
/* CM */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||
/* WJ */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||
/* H2 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* H3 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB },
|
||||
/* JL */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB },
|
||||
/* JV */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB },
|
||||
/* JT */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB }
|
||||
};
|
||||
|
||||
} // namespace LB
|
||||
|
||||
static void getLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
uint lucs4 = 0;
|
||||
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
|
||||
QUnicodeTables::LineBreakClass cls = lcls;
|
||||
for (quint32 i = 0; i != len; ++i) {
|
||||
quint32 pos = i;
|
||||
uint ucs4 = string[i];
|
||||
if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
|
||||
ushort low = string[i + 1];
|
||||
if (QChar::isLowSurrogate(low)) {
|
||||
ucs4 = QChar::surrogateToUcs4(ucs4, low);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
|
||||
QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->line_break_class;
|
||||
|
||||
if (ncls == QUnicodeTables::LineBreak_SA) {
|
||||
// LB1: resolve SA to AL, except of those that have Category Mn or Mc be resolved to CM
|
||||
static const int test = FLAG(QChar::Mark_NonSpacing) | FLAG(QChar::Mark_SpacingCombining);
|
||||
if (FLAG(prop->category) & test)
|
||||
ncls = QUnicodeTables::LineBreak_CM;
|
||||
}
|
||||
if (ncls == QUnicodeTables::LineBreak_CM) {
|
||||
// LB10: treat CM that follows SP, BK, CR, LF, NL, or ZW as AL
|
||||
if (lcls == QUnicodeTables::LineBreak_ZW || lcls >= QUnicodeTables::LineBreak_SP)
|
||||
ncls = QUnicodeTables::LineBreak_AL;
|
||||
}
|
||||
|
||||
HB_LineBreakType lineBreakType = HB_NoBreak;
|
||||
|
||||
if (cls >= QUnicodeTables::LineBreak_CR) {
|
||||
if (cls > QUnicodeTables::LineBreak_CR || ncls != QUnicodeTables::LineBreak_LF)
|
||||
if (lcls >= QUnicodeTables::LineBreak_CR) {
|
||||
// LB4: BK!, LB5: (CRxLF|CR|LF|NL)!
|
||||
if (lcls > QUnicodeTables::LineBreak_CR || ncls != QUnicodeTables::LineBreak_LF)
|
||||
lineBreakType = HB_ForcedBreak;
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (ncls == QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
if (ncls >= QUnicodeTables::LineBreak_CR)
|
||||
goto next;
|
||||
|
||||
{
|
||||
int tcls = ncls;
|
||||
// for south east asian chars that require a complex (dictionary analysis), the unicode
|
||||
// standard recommends to treat them as AL. thai_attributes and other attribute methods that
|
||||
// do dictionary analysis can override
|
||||
if (tcls >= QUnicodeTables::LineBreak_SA)
|
||||
tcls = QUnicodeTables::LineBreak_AL;
|
||||
if (cls >= QUnicodeTables::LineBreak_SA)
|
||||
cls = QUnicodeTables::LineBreak_AL;
|
||||
|
||||
int brk = lineBreakTable[cls][tcls];
|
||||
switch (brk) {
|
||||
case DirectBreak:
|
||||
lineBreakType = HB_Break;
|
||||
if (string[i-1] == 0xad) // soft hyphen
|
||||
lineBreakType = HB_SoftHyphen;
|
||||
break;
|
||||
case IndirectBreak:
|
||||
lineBreakType = (lcls == QUnicodeTables::LineBreak_SP) ? HB_Break : HB_NoBreak;
|
||||
break;
|
||||
case CombiningIndirectBreak:
|
||||
lineBreakType = HB_NoBreak;
|
||||
if (lcls == QUnicodeTables::LineBreak_SP){
|
||||
if (i > 1)
|
||||
attributes[i-2].lineBreakType = HB_Break;
|
||||
} else {
|
||||
goto next_no_cls_update;
|
||||
}
|
||||
break;
|
||||
case CombiningProhibitedBreak:
|
||||
lineBreakType = HB_NoBreak;
|
||||
if (lcls != QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
case ProhibitedBreak:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (ncls >= QUnicodeTables::LineBreak_SP) {
|
||||
if (ncls > QUnicodeTables::LineBreak_SP)
|
||||
goto next; // LB6: x(BK|CR|LF|NL)
|
||||
goto next_no_cls_update; // LB7: xSP
|
||||
}
|
||||
|
||||
// for South East Asian chars that require a complex analysis, the Unicode
|
||||
// standard recommends to treat them as AL. tailoring that do dictionary analysis can override
|
||||
if (cls >= QUnicodeTables::LineBreak_SA)
|
||||
cls = QUnicodeTables::LineBreak_AL;
|
||||
|
||||
switch (LB::breakTable[cls][ncls < QUnicodeTables::LineBreak_SA ? ncls : QUnicodeTables::LineBreak_AL]) {
|
||||
case LB::DirectBreak:
|
||||
lineBreakType = HB_Break;
|
||||
if (lucs4 == 0x00ad) // soft hyphen
|
||||
lineBreakType = HB_SoftHyphen;
|
||||
break;
|
||||
case LB::IndirectBreak:
|
||||
if (lcls == QUnicodeTables::LineBreak_SP)
|
||||
lineBreakType = HB_Break;
|
||||
break;
|
||||
case LB::CombiningIndirectBreak:
|
||||
if (lcls != QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
lineBreakType = HB_Break;
|
||||
break;
|
||||
case LB::CombiningProhibitedBreak:
|
||||
if (lcls != QUnicodeTables::LineBreak_SP)
|
||||
goto next_no_cls_update;
|
||||
break;
|
||||
case LB::ProhibitedBreak:
|
||||
// nothing to do
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
next:
|
||||
cls = ncls;
|
||||
lucs4 = ucs4;
|
||||
next_no_cls_update:
|
||||
lcls = ncls;
|
||||
grapheme = ngrapheme;
|
||||
attributes[i-1].lineBreakType = lineBreakType;
|
||||
if (lineBreakType != HB_NoBreak)
|
||||
attributes[pos].lineBreakType = lineBreakType;
|
||||
}
|
||||
|
||||
for (quint32 i = len - 1; i > 0; --i)
|
||||
attributes[i].lineBreakType = attributes[i - 1].lineBreakType;
|
||||
attributes[0].lineBreakType = HB_NoBreak; // LB2
|
||||
}
|
||||
|
||||
|
||||
enum WordBreakRule { NoBreak = 0, Break = 1, Middle = 2 };
|
||||
|
||||
static const uchar wordBreakTable[QUnicodeTables::WordBreakExtendNumLet + 1][QUnicodeTables::WordBreakExtendNumLet + 1] = {
|
||||
// Other Format Katakana ALetter MidLetter MidNum Numeric ExtendNumLet
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // Other
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // Format
|
||||
{ Break , Break , NoBreak, Break , Break , Break , Break , NoBreak }, // Katakana
|
||||
{ Break , Break , Break , NoBreak, Middle , Break , NoBreak, NoBreak }, // ALetter
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
|
||||
{ Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
|
||||
{ Break , Break , Break , NoBreak, Break , Middle , NoBreak, NoBreak }, // Numeric
|
||||
{ Break , Break , NoBreak, NoBreak, Break , Break , NoBreak, NoBreak }, // ExtendNumLet
|
||||
};
|
||||
|
||||
static void calcWordBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
quint32 brk = QUnicodeTables::wordBreakClass(string[0]);
|
||||
|
||||
attributes[0].wordBoundary = true;
|
||||
|
||||
for (quint32 i = 1; i < len; ++i) {
|
||||
if (!attributes[i].charStop) {
|
||||
attributes[i].wordBoundary = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
quint32 nbrk = QUnicodeTables::wordBreakClass(string[i]);
|
||||
if (nbrk == QUnicodeTables::WordBreakFormat) {
|
||||
attributes[i].wordBoundary = (QUnicodeTables::sentenceBreakClass(string[i-1]) == QUnicodeTables::SentenceBreakSep);
|
||||
continue;
|
||||
}
|
||||
|
||||
WordBreakRule rule = (WordBreakRule)wordBreakTable[brk][nbrk];
|
||||
if (rule == Middle) {
|
||||
rule = Break;
|
||||
quint32 lookahead = i + 1;
|
||||
while (lookahead < len) {
|
||||
quint32 testbrk = QUnicodeTables::wordBreakClass(string[lookahead]);
|
||||
if (testbrk == QUnicodeTables::WordBreakFormat
|
||||
&& QUnicodeTables::sentenceBreakClass(string[lookahead]) != QUnicodeTables::SentenceBreakSep) {
|
||||
++lookahead;
|
||||
continue;
|
||||
}
|
||||
if (testbrk == brk) {
|
||||
rule = NoBreak;
|
||||
while (i < lookahead)
|
||||
attributes[i++].wordBoundary = false;
|
||||
nbrk = testbrk;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
attributes[i].wordBoundary = (rule == Break);
|
||||
brk = nbrk;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
enum SentenceBreakState {
|
||||
SB_Initial,
|
||||
SB_Upper,
|
||||
SB_UpATerm,
|
||||
SB_ATerm,
|
||||
SB_ATermC,
|
||||
SB_ACS,
|
||||
SB_STerm,
|
||||
SB_STermC,
|
||||
SB_SCS,
|
||||
SB_BAfter,
|
||||
SB_Break,
|
||||
SB_Lookup
|
||||
};
|
||||
|
||||
static const uchar sentenceBreakTable[SB_Lookup + 1][QUnicodeTables::SentenceBreakClose + 1] = {
|
||||
// Other Sep Format Sp Lower Upper OLetter Numeric ATerm STerm Close
|
||||
{ SB_Initial, SB_BAfter , SB_Initial, SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_ATerm , SB_STerm , SB_Initial }, // SB_Initial,
|
||||
{ SB_Initial, SB_BAfter , SB_Upper , SB_Initial, SB_Initial, SB_Upper , SB_Initial, SB_Initial, SB_UpATerm, SB_STerm , SB_Initial }, // SB_Upper
|
||||
|
||||
{ SB_Lookup , SB_BAfter , SB_UpATerm, SB_ACS , SB_Initial, SB_Upper , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_UpATerm
|
||||
{ SB_Lookup , SB_BAfter , SB_ATerm , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Initial, SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATerm
|
||||
{ SB_Lookup , SB_BAfter , SB_ATermC , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_ATermC }, // SB_ATermC,
|
||||
{ SB_Lookup , SB_BAfter , SB_ACS , SB_ACS , SB_Initial, SB_Break , SB_Break , SB_Lookup , SB_ATerm , SB_STerm , SB_Lookup }, // SB_ACS,
|
||||
|
||||
{ SB_Break , SB_BAfter , SB_STerm , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STerm,
|
||||
{ SB_Break , SB_BAfter , SB_STermC , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_STermC }, // SB_STermC,
|
||||
{ SB_Break , SB_BAfter , SB_SCS , SB_SCS , SB_Break , SB_Break , SB_Break , SB_Break , SB_ATerm , SB_STerm , SB_Break }, // SB_SCS,
|
||||
{ SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break , SB_Break }, // SB_BAfter,
|
||||
};
|
||||
|
||||
static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
quint32 brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[0])];
|
||||
attributes[0].sentenceBoundary = true;
|
||||
for (quint32 i = 1; i < len; ++i) {
|
||||
if (!attributes[i].charStop) {
|
||||
attributes[i].sentenceBoundary = false;
|
||||
continue;
|
||||
}
|
||||
brk = sentenceBreakTable[brk][QUnicodeTables::sentenceBreakClass(string[i])];
|
||||
if (brk == SB_Lookup) {
|
||||
brk = SB_Break;
|
||||
quint32 lookahead = i + 1;
|
||||
while (lookahead < len) {
|
||||
quint32 sbrk = QUnicodeTables::sentenceBreakClass(string[lookahead]);
|
||||
if (sbrk != QUnicodeTables::SentenceBreakOther
|
||||
&& sbrk != QUnicodeTables::SentenceBreakNumeric
|
||||
&& sbrk != QUnicodeTables::SentenceBreakClose) {
|
||||
break;
|
||||
} else if (sbrk == QUnicodeTables::SentenceBreakLower) {
|
||||
brk = SB_Initial;
|
||||
break;
|
||||
}
|
||||
++lookahead;
|
||||
}
|
||||
if (brk == SB_Initial) {
|
||||
while (i < lookahead)
|
||||
attributes[i++].sentenceBoundary = false;
|
||||
}
|
||||
}
|
||||
if (brk == SB_Break) {
|
||||
attributes[i].sentenceBoundary = true;
|
||||
brk = sentenceBreakTable[SB_Initial][QUnicodeTables::sentenceBreakClass(string[i])];
|
||||
} else {
|
||||
attributes[i].sentenceBoundary = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void getWhiteSpaces(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||
{
|
||||
for (quint32 i = 0; i != len; ++i) {
|
||||
@ -400,18 +446,17 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
|
||||
if (length <= 0)
|
||||
return;
|
||||
|
||||
if (!(options & DontClearAttributes)) {
|
||||
if (!(options & DontClearAttributes))
|
||||
::memset(attributes, 0, length * sizeof(HB_CharAttributes));
|
||||
if (options & (WordBreaks | SentenceBreaks))
|
||||
options |= GraphemeBreaks;
|
||||
}
|
||||
|
||||
if (options & (GraphemeBreaks | LineBreaks))
|
||||
calcGraphemeAndLineBreaks(string, length, attributes);
|
||||
if (options & GraphemeBreaks)
|
||||
getGraphemeBreaks(string, length, attributes);
|
||||
if (options & WordBreaks)
|
||||
calcWordBreaks(string, length, attributes);
|
||||
getWordBreaks(string, length, attributes);
|
||||
if (options & SentenceBreaks)
|
||||
calcSentenceBreaks(string, length, attributes);
|
||||
getSentenceBreaks(string, length, attributes);
|
||||
if (options & LineBreaks)
|
||||
getLineBreaks(string, length, attributes);
|
||||
if (options & WhiteSpaces)
|
||||
getWhiteSpaces(string, length, attributes);
|
||||
|
||||
|
@ -209,7 +209,7 @@ void tst_QTextLayout::cleanup()
|
||||
|
||||
void tst_QTextLayout::lineBreaking()
|
||||
{
|
||||
#if defined(Q_WS_X11)
|
||||
#if 0
|
||||
struct Breaks {
|
||||
const char *utf8;
|
||||
uchar breaks[32];
|
||||
@ -286,8 +286,6 @@ void tst_QTextLayout::lineBreaking()
|
||||
QCOMPARE(b->breaks[i], (uchar)0xff);
|
||||
++b;
|
||||
}
|
||||
#else
|
||||
QSKIP("This test can not be run on non-X11 platforms");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user