QTextBoundaryFinder: don't break after uppercase followed by comma

[ChangeLog][QtCore][QTextBoundaryFinder] Sentence breaking now
no longer breaks between uppercase letters and comma.
This is a deviation from the Unicode specification,
but produces less surprising behavior.

Fixes: QTBUG-75857
Change-Id: If1e78b3be3f20250d01100353ea7da6110985f82
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
wangChuan 2019-05-21 16:08:25 +08:00
parent 1632786f00
commit b0493998c3
2 changed files with 8 additions and 1 deletions

View File

@ -327,7 +327,7 @@ static const uchar breakTable[BAfter + 1][QUnicodeTables::NumSentenceBreakClasse
// Any CR LF Sep Extend Sp Lower Upper OLetter Numeric ATerm SContinue STerm Close
{ Initial, BAfterC, BAfter , BAfter , Initial, Initial, Lower , Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial
{ Initial, BAfterC, BAfter , BAfter , Lower , Initial, Initial, Initial, Initial, Initial, LUATerm, Initial, STerm , Initial }, // Lower
{ Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, LUATerm, STerm , STerm , Initial }, // Upper
{ Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, LUATerm, Initial, STerm , Initial }, // Upper
{ Lookup , BAfterC, BAfter , BAfter , LUATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // LUATerm
{ Lookup , BAfterC, BAfter , BAfter , ATerm , ACS , Initial, Break , Break , Initial, ATerm , STerm , STerm , ATermC }, // ATerm

View File

@ -539,6 +539,13 @@ void tst_QTextBoundaryFinder::sentenceBoundaries_manual_data()
QTest::newRow("data3") << testString << expectedBreakPositions;
}
{
QString testString(QString::fromUtf8("Doing TEST, doing another test."));
QList<int> expectedBreakPositions;
expectedBreakPositions << 0 << 31;
QTest::newRow("data4") << testString << expectedBreakPositions;
}
}
void tst_QTextBoundaryFinder::sentenceBoundaries_manual()