diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp index f160c980cb..45538e5d9e 100644 --- a/src/corelib/text/qunicodetools.cpp +++ b/src/corelib/text/qunicodetools.cpp @@ -165,9 +165,50 @@ static void getGraphemeBreaks(const char16_t *string, qsizetype len, QCharAttrib QUnicodeTables::GraphemeBreakClass cls = (QUnicodeTables::GraphemeBreakClass) prop->graphemeBreakClass; bool shouldBreak = GB::shouldBreakBetweenClasses(lcls, cls); + bool handled = false; switch (state) { case GB::State::Normal: + break; // will deal with it below + + case GB::State::GB11_ExtPicExt: + Q_ASSERT(lcls == QUnicodeTables::GraphemeBreak_Extend); + if (cls == QUnicodeTables::GraphemeBreak_Extend) { + // keep going in the current state + Q_ASSERT(!shouldBreak); // GB9, do not break before Extend + handled = true; + } else if (cls == QUnicodeTables::GraphemeBreak_ZWJ) { + state = GB::State::GB11_ExtPicExtZWJ; + Q_ASSERT(!shouldBreak); // GB9, do not break before ZWJ + handled = true; + } else { + state = GB::State::Normal; + } + break; + + case GB::State::GB11_ExtPicExtZWJ: + Q_ASSERT(lcls == QUnicodeTables::GraphemeBreak_ZWJ); + if (cls == QUnicodeTables::GraphemeBreak_Extended_Pictographic) { + shouldBreak = false; + handled = true; + } + + state = GB::State::Normal; + break; + + case GB::State::GB12_13_RI: + Q_ASSERT(lcls == QUnicodeTables::GraphemeBreak_RegionalIndicator); + if (cls == QUnicodeTables::GraphemeBreak_RegionalIndicator) { + shouldBreak = false; + handled = true; + } + + state = GB::State::Normal; + break; + } + + if (!handled) { + Q_ASSERT(state == GB::State::Normal); if (lcls == QUnicodeTables::GraphemeBreak_Extended_Pictographic) { // GB11 if (cls == QUnicodeTables::GraphemeBreak_Extend) { state = GB::State::GB11_ExtPicExt; @@ -179,35 +220,6 @@ static void getGraphemeBreaks(const char16_t *string, qsizetype len, QCharAttrib } else if (cls == QUnicodeTables::GraphemeBreak_RegionalIndicator) { // GB12, GB13 state = GB::State::GB12_13_RI; } - - break; - case GB::State::GB11_ExtPicExt: - Q_ASSERT(lcls == QUnicodeTables::GraphemeBreak_Extend); - if (cls == QUnicodeTables::GraphemeBreak_Extend) { - // keep going in the current state - Q_ASSERT(!shouldBreak); // GB9, do not break before Extend - } else if (cls == QUnicodeTables::GraphemeBreak_ZWJ) { - state = GB::State::GB11_ExtPicExtZWJ; - Q_ASSERT(!shouldBreak); // GB9, do not break before ZWJ - } - - break; - - case GB::State::GB11_ExtPicExtZWJ: - Q_ASSERT(lcls == QUnicodeTables::GraphemeBreak_ZWJ); - if (cls == QUnicodeTables::GraphemeBreak_Extended_Pictographic) - shouldBreak = false; - - state = GB::State::Normal; - break; - - case GB::State::GB12_13_RI: - Q_ASSERT(lcls == QUnicodeTables::GraphemeBreak_RegionalIndicator); - if (cls == QUnicodeTables::GraphemeBreak_RegionalIndicator) - shouldBreak = false; - - state = GB::State::Normal; - break; } if (shouldBreak) diff --git a/tests/auto/corelib/text/qtextboundaryfinder/tst_qtextboundaryfinder.cpp b/tests/auto/corelib/text/qtextboundaryfinder/tst_qtextboundaryfinder.cpp index b48b751ea0..f9b597f604 100644 --- a/tests/auto/corelib/text/qtextboundaryfinder/tst_qtextboundaryfinder.cpp +++ b/tests/auto/corelib/text/qtextboundaryfinder/tst_qtextboundaryfinder.cpp @@ -51,6 +51,9 @@ private slots: void lineBoundariesDefault(); #endif + void graphemeBoundaries_manual_data(); + void graphemeBoundaries_manual(); + void wordBoundaries_manual_data(); void wordBoundaries_manual(); void sentenceBoundaries_manual_data(); @@ -286,6 +289,104 @@ void tst_QTextBoundaryFinder::lineBoundariesDefault() } #endif // QT_BUILD_INTERNAL +void tst_QTextBoundaryFinder::graphemeBoundaries_manual_data() +{ + QTest::addColumn<QString>("testString"); + QTest::addColumn<QList<int>>("expectedBreakPositions"); + + { + // QTBUG-94951 + QChar s[] = { QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xD83D), QChar(0xDCF2), // U+1F4F2 MOBILE PHONE WITH RIGHTWARDS ARROW AT LEFT + QChar(0xD83D), QChar(0xDCE9), // U+1F4E9 ENVELOPE WITH DOWNWARDS ARROW ABOVE + }; + QString testString(s, sizeof(s)/sizeof(s[0])); + + QList<int> expectedBreakPositions{0, 2, 4, 6}; + QTest::newRow("+EXTPICxEXT+EXTPIC+EXTPIC+") << testString << expectedBreakPositions; + } + + { + QChar s[] = { QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + }; + QString testString(s, sizeof(s)/sizeof(s[0])); + + QList<int> expectedBreakPositions{0, 2, 4}; + QTest::newRow("+EXTPICxEXT+EXTPICxEXT+") << testString << expectedBreakPositions; + } + + { + QChar s[] = { QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + }; + QString testString(s, sizeof(s)/sizeof(s[0])); + + QList<int> expectedBreakPositions{0, 4, 7}; + QTest::newRow("+EXTPICxEXTxEXTxEXT+EXTPICxEXTxEXT+") << testString << expectedBreakPositions; + } + + { + QChar s[] = { QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0x200D), // U+200D ZERO WIDTH JOINER + QChar(0xD83D), QChar(0xDCF2), // U+1F4F2 MOBILE PHONE WITH RIGHTWARDS ARROW AT LEFT + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + }; + QString testString(s, sizeof(s)/sizeof(s[0])); + + QList<int> expectedBreakPositions{0, 7}; + QTest::newRow("+EXTPICxEXTxEXTxZWJxEXTPICxEXTxEXT+") << testString << expectedBreakPositions; + } + + { + QChar s[] = { QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0x200D), // U+200D ZERO WIDTH JOINER + QChar(0x0041), // U+0041 LATIN CAPITAL LETTER A + QChar(0xD83D), QChar(0xDCF2), // U+1F4F2 MOBILE PHONE WITH RIGHTWARDS ARROW AT LEFT + }; + QString testString(s, sizeof(s)/sizeof(s[0])); + + QList<int> expectedBreakPositions{0, 4, 5, 7}; + QTest::newRow("+EXTPICxEXTxEXTxZWJ+Any+EXTPIC+") << testString << expectedBreakPositions; + } + + { + QChar s[] = { QChar(0x2764), // U+2764 HEAVY BLACK HEART + QChar(0xFE0F), // U+FE0F VARIATION SELECTOR-16 + QChar(0xD83C), QChar(0xDDEA), // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E + QChar(0xD83C), QChar(0xDDFA), // U+1F1FA REGIONAL INDICATOR SYMBOL LETTER U + QChar(0xD83C), QChar(0xDDEA), // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E + QChar(0xD83C), QChar(0xDDFA), // U+1F1FA REGIONAL INDICATOR SYMBOL LETTER U + QChar(0xD83C), QChar(0xDDEA), // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E + QChar(0x0041), // U+0041 LATIN CAPITAL LETTER A + }; + QString testString(s, sizeof(s)/sizeof(s[0])); + + QList<int> expectedBreakPositions{0, 2, 6, 10, 12, 13}; + QTest::newRow("+EXTPICxEXT+RIxRI+RIxRI+RI+ANY+") << testString << expectedBreakPositions; + } +} + +void tst_QTextBoundaryFinder::graphemeBoundaries_manual() +{ + QFETCH(QString, testString); + QFETCH(QList<int>, expectedBreakPositions); + + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Grapheme); +} + void tst_QTextBoundaryFinder::wordBoundaries_manual_data() { QTest::addColumn<QString>("testString");