diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp index a1b0f30d01..8a7db6ebe4 100644 --- a/src/corelib/text/qbytearray.cpp +++ b/src/corelib/text/qbytearray.cpp @@ -51,6 +51,7 @@ #include "qstringalgorithms_p.h" #include "qscopedpointer.h" #include "qbytearray_p.h" +#include "qstringconverter_p.h" #include #include @@ -426,6 +427,14 @@ int QtPrivate::compareMemory(QByteArrayView lhs, QByteArrayView rhs) return lhs.size() == rhs.size() ? 0 : lhs.size() > rhs.size() ? 1 : -1; } +/*! + \internal +*/ +bool QtPrivate::isValidUtf8(QByteArrayView s) noexcept +{ + return QUtf8::isValidUtf8(s).isValidUtf8; +} + // the CRC table below is created by the following piece of code #if 0 static void createCRC16Table() // build CRC16 lookup table @@ -2783,6 +2792,15 @@ bool QByteArray::isLower() const return true; } +/*! + \fn QByteArray::isValidUtf8() const + + Returns \c true if this byte array contains valid UTF-8 encoded data, + or \c false otherwise. + + \since 6.3 +*/ + /*! Returns a byte array that contains the first \a len bytes of this byte array. diff --git a/src/corelib/text/qbytearray.h b/src/corelib/text/qbytearray.h index 4f29018f32..0a604f1594 100644 --- a/src/corelib/text/qbytearray.h +++ b/src/corelib/text/qbytearray.h @@ -200,6 +200,11 @@ public: bool isUpper() const; bool isLower() const; + [[nodiscard]] bool isValidUtf8() const noexcept + { + return QtPrivate::isValidUtf8(qToByteArrayViewIgnoringNull(*this)); + } + void truncate(qsizetype pos); void chop(qsizetype n); diff --git a/src/corelib/text/qbytearrayalgorithms.h b/src/corelib/text/qbytearrayalgorithms.h index a78e6e1709..b669f065b9 100644 --- a/src/corelib/text/qbytearrayalgorithms.h +++ b/src/corelib/text/qbytearrayalgorithms.h @@ -74,6 +74,8 @@ qsizetype count(QByteArrayView haystack, QByteArrayView needle) noexcept; [[nodiscard]] Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QByteArrayView trimmed(QByteArrayView s) noexcept; +[[nodiscard]] Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf8(QByteArrayView s) noexcept; + } // namespace QtPrivate /***************************************************************************** diff --git a/src/corelib/text/qbytearrayview.h b/src/corelib/text/qbytearrayview.h index 1cd5b0333e..11db03c62f 100644 --- a/src/corelib/text/qbytearrayview.h +++ b/src/corelib/text/qbytearrayview.h @@ -288,6 +288,8 @@ public: inline int compare(QByteArrayView a, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept; + [[nodiscard]] inline bool isValidUtf8() const noexcept { return QtPrivate::isValidUtf8(*this); } + // // STL compatibility API: // diff --git a/src/corelib/text/qbytearrayview.qdoc b/src/corelib/text/qbytearrayview.qdoc index 624b056744..96bb1ccb56 100644 --- a/src/corelib/text/qbytearrayview.qdoc +++ b/src/corelib/text/qbytearrayview.qdoc @@ -376,6 +376,15 @@ \sa operator==() */ +/*! + \fn QByteArrayView::isValidUtf8() const + + Returns \c true if this byte array view contains valid UTF-8 encoded data, + or \c false otherwise. + + \since 6.3 +*/ + /*! \fn QByteArrayView::const_iterator QByteArrayView::begin() const diff --git a/src/corelib/text/qutf8stringview.h b/src/corelib/text/qutf8stringview.h index eeab604fa8..a6930c2e0f 100644 --- a/src/corelib/text/qutf8stringview.h +++ b/src/corelib/text/qutf8stringview.h @@ -285,6 +285,11 @@ public: constexpr void chop(qsizetype n) { verify(n); m_size -= n; } + [[nodiscard]] inline bool isValidUtf8() const noexcept + { + return QByteArrayView(reinterpret_cast(data()), size()).isValidUtf8(); + } + // // STL compatibility API: // diff --git a/src/corelib/text/qutf8stringview.qdoc b/src/corelib/text/qutf8stringview.qdoc index 683af4e423..deac0882fe 100644 --- a/src/corelib/text/qutf8stringview.qdoc +++ b/src/corelib/text/qutf8stringview.qdoc @@ -678,6 +678,15 @@ \sa sliced(), first(), last(), chopped(), truncate() */ +/*! + \fn QUtf8StringView::isValidUtf8() const + + Returns \c true if this string contains valid UTF-8 encoded data, + or \c false otherwise. + + \since 6.3 +*/ + /*! \fn template qToUtf8StringViewIgnoringNull(const QStringLike &s); \relates QUtf8StringView diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp index 2fdb3ad0b5..b42019bf04 100644 --- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp +++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp @@ -898,6 +898,19 @@ private Q_SLOTS: void indexOf_regexp_QString() { indexOf_contains_lastIndexOf_count_regexp_impl(); } void indexOf_regexp_QStringView_data() { indexOf_contains_lastIndexOf_count_regexp_data(); } void indexOf_regexp_QStringView() { indexOf_contains_lastIndexOf_count_regexp_impl(); } + +private: + void isValidUtf8_data(); + template + void isValidUtf8_impl() const; + +private Q_SLOTS: + void isValidUtf8_QByteArray_data() { isValidUtf8_data(); } + void isValidUtf8_QByteArray() { isValidUtf8_impl(); } + void isValidUtf8_QByteArrayView_data() { isValidUtf8_data(); } + void isValidUtf8_QByteArrayView() { isValidUtf8_impl(); } + void isValidUtf8_QUtf8StringView_data() { isValidUtf8_data(); } + void isValidUtf8_QUtf8StringView() { isValidUtf8_impl(); } }; namespace { @@ -2855,6 +2868,76 @@ void tst_QStringApiSymmetry::indexOf_contains_lastIndexOf_count_regexp_impl() co } } +void tst_QStringApiSymmetry::isValidUtf8_data() +{ + QTest::addColumn("ba"); + QTest::addColumn("valid"); + + int row = 0; + QTest::addRow("valid-%02d", row++) << QByteArray() << true; + QTest::addRow("valid-%02d", row++) << QByteArray("ascii") << true; + QTest::addRow("valid-%02d", row++) + << QByteArray("\xc2\xa2\xe0\xa4\xb9\xf0\x90\x8d\x88") << true; // U+00A2 U+0939 U+10348 + QTest::addRow("valid-%02d", row++) << QByteArray("\xf4\x8f\xbf\xbf") << true; // U+10FFFF + + row = 0; + QTest::addRow("overlong-%02d", row++) << QByteArray("\xc0\x00") << false; + QTest::addRow("overlong-%02d", row++) << QByteArray("\xc1\xff") << false; + QTest::addRow("overlong-%02d", row++) << QByteArray("\xe0\x00\x00") << false; + QTest::addRow("overlong-%02d", row++) << QByteArray("\xe0\xa0\x7f") << false; + QTest::addRow("overlong-%02d", row++) << QByteArray("\xf0\x00\x00\x00") << false; + QTest::addRow("overlong-%02d", row++) << QByteArray("\xf0\x90\x80\x7f") << false; + + row = 0; + QTest::addRow("short-%02d", row++) << QByteArray("\xc2") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xc2") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xc2y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xc2y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xe0\xa4") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xe0\xa4") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xe0\xa4y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xe0\xa4y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xe0") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xe0") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xe0y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xe0y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f\xbf") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f\xbf") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f\xbfy") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f\xbfy") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8fy") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8fy") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xf4") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xf4") << false; + QTest::addRow("short-%02d", row++) << QByteArray("x\xf4y") << false; + QTest::addRow("short-%02d", row++) << QByteArray("\xf4y") << false; + + row = 0; + QTest::addRow("surrogates-%02d", row++) << QByteArray("\xed\x9f\xc0\xee\x80\x7f") << false; + QTest::addRow("surrogates-%02d", row++) << QByteArray("\xed\x9f\xc0") << false; + QTest::addRow("surrogates-%02d", row++) << QByteArray("\xee\x80\x7f") << false; + QTest::addRow("surrogates-%02d", row++) << QByteArray("\xee\x80\x7f\xed\x9f\xc0") << false; + + row = 0; + QTest::addRow("other-%02d", row++) << QByteArray("\xf4\x8f\xbf\xc0") << false; + QTest::addRow("other-%02d", row++) << QByteArray("\xf7\x80\x80\x80") << false; + QTest::addRow("other-%02d", row++) << QByteArray("\xfd\xbf\xbf\xbf\xbf") << false; + QTest::addRow("other-%02d", row++) << QByteArray("\xfe\xbf\xbf\xbf\xbf\xbf") << false; + QTest::addRow("other-%02d", row++) << QByteArray("\xff\xbf\xbf\xbf\xbf\xbf\xbf") << false; + QTest::addRow("other-%02d", row++) << QByteArray("\x80") << false; + QTest::addRow("other-%02d", row++) << QByteArray("\xbf") << false; +} + +template +void tst_QStringApiSymmetry::isValidUtf8_impl() const +{ + QFETCH(QByteArray, ba); + const String string(ba); + QTEST(string.isValidUtf8(), "valid"); +} + QTEST_APPLESS_MAIN(tst_QStringApiSymmetry) #include "tst_qstringapisymmetry.moc"