Line Breaking Algorithm: don't break inside numeric expressions
Change-Id: I8362663454e4c6604ecb6289ae8009d47c78aeb1 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
parent
48d99e74fc
commit
c8dd0de1a6
@ -286,6 +286,77 @@ static void getSentenceBreaks(const ushort *string, quint32 len, HB_CharAttribut
|
|||||||
|
|
||||||
namespace LB {
|
namespace LB {
|
||||||
|
|
||||||
|
namespace NS { // Number Sequence
|
||||||
|
|
||||||
|
// LB25 recommends to not break lines inside numbers of the form
|
||||||
|
// described by the following regular expression:
|
||||||
|
// (PR|PO)?(OP|HY)?NU(NU|SY|IS)*(CL|CP)?(PR|PO)?
|
||||||
|
|
||||||
|
enum Action {
|
||||||
|
None,
|
||||||
|
Start,
|
||||||
|
Continue,
|
||||||
|
Break
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Class {
|
||||||
|
XX,
|
||||||
|
PRPO,
|
||||||
|
OPHY,
|
||||||
|
NU,
|
||||||
|
SYIS,
|
||||||
|
CLCP
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uchar actionTable[CLCP + 1][CLCP + 1] = {
|
||||||
|
// XX PRPO OPHY NU SYIS CLCP
|
||||||
|
{ None , Start , Start , Start , None , None }, // XX
|
||||||
|
{ None , Start , Continue, Continue, None , None }, // PRPO
|
||||||
|
{ None , Start , Start , Continue, None , None }, // OPHY
|
||||||
|
{ Break , Break , Break , Continue, Continue, Continue }, // NU
|
||||||
|
{ Break , Break , Break , Continue, Continue, Continue }, // SYIS
|
||||||
|
{ Break , Continue, Break , Break , Break , Break }, // CLCP
|
||||||
|
};
|
||||||
|
|
||||||
|
inline Class toClass(QUnicodeTables::LineBreakClass lbc, QChar::Category category)
|
||||||
|
{
|
||||||
|
switch (lbc) {
|
||||||
|
case QUnicodeTables::LineBreak_AL:// case QUnicodeTables::LineBreak_AI:
|
||||||
|
// resolve AI math symbols in numerical context to IS
|
||||||
|
if (category == QChar::Symbol_Math)
|
||||||
|
return SYIS;
|
||||||
|
break;
|
||||||
|
case QUnicodeTables::LineBreak_PR: case QUnicodeTables::LineBreak_PO:
|
||||||
|
return PRPO;
|
||||||
|
case QUnicodeTables::LineBreak_OP: case QUnicodeTables::LineBreak_HY:
|
||||||
|
return OPHY;
|
||||||
|
case QUnicodeTables::LineBreak_NU:
|
||||||
|
return NU;
|
||||||
|
case QUnicodeTables::LineBreak_SY: case QUnicodeTables::LineBreak_IS:
|
||||||
|
return SYIS;
|
||||||
|
case QUnicodeTables::LineBreak_CL: case QUnicodeTables::LineBreak_CP:
|
||||||
|
return CLCP;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return XX;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace NS
|
||||||
|
|
||||||
|
/* In order to support the tailored implementation of LB25 properly
|
||||||
|
the following changes were made in the pair table to allow breaks
|
||||||
|
where the numeric expression doesn't match the template (i.e. [^NU](IS|SY)NU):
|
||||||
|
CL->PO from IB to DB
|
||||||
|
CP->PO from IB to DB
|
||||||
|
CL->PR from IB to DB
|
||||||
|
CP->PR from IB to DB
|
||||||
|
PO->OP from IB to DB
|
||||||
|
PR->OP from IB to DB
|
||||||
|
IS->NU from IB to DB
|
||||||
|
SY->NU from IB to DB
|
||||||
|
*/
|
||||||
|
|
||||||
// The following line break classes are not treated by the pair table
|
// The following line break classes are not treated by the pair table
|
||||||
// and must be resolved outside:
|
// and must be resolved outside:
|
||||||
// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX
|
// AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX
|
||||||
@ -301,16 +372,16 @@ enum Action {
|
|||||||
static const uchar breakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
|
static const uchar breakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::LineBreak_JT + 1] = {
|
||||||
/* OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
/* OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT */
|
||||||
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
/* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB },
|
||||||
/* CL */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, IB, IB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* CL */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* CP */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, IB, IB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* CP */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* QU */ { PB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
/* QU */ { PB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||||
/* GL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
/* GL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||||
/* NS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* NS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* IS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* IS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* PR */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
/* PR */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB },
|
||||||
/* PO */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* PO */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* NU */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* NU */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* AL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* AL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
/* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB },
|
||||||
@ -334,6 +405,9 @@ static const uchar breakTable[QUnicodeTables::LineBreak_JT + 1][QUnicodeTables::
|
|||||||
|
|
||||||
static void getLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
static void getLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *attributes)
|
||||||
{
|
{
|
||||||
|
quint32 nestart = 0;
|
||||||
|
LB::NS::Class nelast = LB::NS::XX;
|
||||||
|
|
||||||
uint lucs4 = 0;
|
uint lucs4 = 0;
|
||||||
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
|
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
|
||||||
QUnicodeTables::LineBreakClass cls = lcls;
|
QUnicodeTables::LineBreakClass cls = lcls;
|
||||||
@ -363,6 +437,27 @@ static void getLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *
|
|||||||
ncls = QUnicodeTables::LineBreak_AL;
|
ncls = QUnicodeTables::LineBreak_AL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ncls != QUnicodeTables::LineBreak_CM) {
|
||||||
|
// LB25: do not break lines inside numbers
|
||||||
|
LB::NS::Class necur = LB::NS::toClass(ncls, (QChar::Category)prop->category);
|
||||||
|
switch (LB::NS::actionTable[nelast][necur]) {
|
||||||
|
case LB::NS::Break:
|
||||||
|
// do not change breaks before and after the expression
|
||||||
|
for (quint32 j = nestart + 1; j < pos; ++j)
|
||||||
|
attributes[j].lineBreakType = HB_NoBreak;
|
||||||
|
// fall through
|
||||||
|
case LB::NS::None:
|
||||||
|
nelast = LB::NS::XX; // reset state
|
||||||
|
break;
|
||||||
|
case LB::NS::Start:
|
||||||
|
nestart = i;
|
||||||
|
// fall through
|
||||||
|
default:
|
||||||
|
nelast = necur;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
HB_LineBreakType lineBreakType = HB_NoBreak;
|
HB_LineBreakType lineBreakType = HB_NoBreak;
|
||||||
|
|
||||||
if (lcls >= QUnicodeTables::LineBreak_CR) {
|
if (lcls >= QUnicodeTables::LineBreak_CR) {
|
||||||
@ -417,6 +512,12 @@ static void getLineBreaks(const ushort *string, quint32 len, HB_CharAttributes *
|
|||||||
attributes[pos].lineBreakType = lineBreakType;
|
attributes[pos].lineBreakType = lineBreakType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (LB::NS::actionTable[nelast][LB::NS::XX] == LB::NS::Break) {
|
||||||
|
// LB25: do not break lines inside numbers
|
||||||
|
for (quint32 j = nestart + 1; j < len; ++j)
|
||||||
|
attributes[j].lineBreakType = HB_NoBreak;
|
||||||
|
}
|
||||||
|
|
||||||
attributes[0].lineBreakType = HB_NoBreak; // LB2
|
attributes[0].lineBreakType = HB_NoBreak; // LB2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,31 +288,6 @@ void tst_QTextBoundaryFinder::lineBoundariesDefault()
|
|||||||
if (testString.contains(QChar::ObjectReplacementCharacter))
|
if (testString.contains(QChar::ObjectReplacementCharacter))
|
||||||
QSKIP("QTextBoundaryFinder doesn't handle Object Replacement character");
|
QSKIP("QTextBoundaryFinder doesn't handle Object Replacement character");
|
||||||
|
|
||||||
{ // The Line Break tests use tailoring of numbers described in Example 7
|
|
||||||
// of Section 8.2 Examples of Customization
|
|
||||||
QChar s[][2] = {
|
|
||||||
{ 0x007D, 0x0025 }, // CL x PO
|
|
||||||
{ 0x007D, 0x0024 }, // CL x PR
|
|
||||||
{ 0x0029, 0x0025 }, // CP x PO
|
|
||||||
{ 0x0029, 0x0024 }, // CP x PR
|
|
||||||
{ 0x002C, 0x0030 }, // IS x NU
|
|
||||||
{ 0x002E, 0x0031 }, // IS x NU
|
|
||||||
{ 0x002E, 0x0032 }, // IS x NU
|
|
||||||
{ 0x002E, 0x0033 }, // IS x NU
|
|
||||||
{ 0x002E, 0x0034 }, // IS x NU
|
|
||||||
{ 0x0025, 0x0028 }, // PO x OP
|
|
||||||
{ 0x0024, 0x0028 }, // PR x OP
|
|
||||||
{ 0x005C, 0x0028 }, // PR x OP
|
|
||||||
{ 0x005C, 0x007B }, // PR x OP
|
|
||||||
{ 0x002F, 0x0030 }, // SY x NU
|
|
||||||
};
|
|
||||||
QChar cm(0x0308);
|
|
||||||
for (int i = 0; i < int(sizeof(s) / sizeof(QChar)) / 2; ++i) {
|
|
||||||
if (testString.contains(QString(s[i], 2)) || testString.contains(QString(s[i], 2).insert(1, cm)))
|
|
||||||
QSKIP("QTextBoundaryFinder doesn't handle numerical expressions");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
expectedBreakPositions.prepend(0); // ### QTBF generates a boundary at start of text
|
expectedBreakPositions.prepend(0); // ### QTBF generates a boundary at start of text
|
||||||
doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line, true);
|
doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line, true);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user