Add isValidUtf8() methods to QUtf8StringView and QByteArray{,View}

The new methods return true if the string contains valid UTF-8
encoded data, or false otherwise.

[ChangeLog][QtCore][QByteArray] Added isValidUtf8() method.

[ChangeLog][QtCore][QByteArrayView] Added isValidUtf8() method.

[ChangeLog][QtCore][QUtf8StringView] Added isValidUtf8() method.

Task-number: QTBUG-92021
Change-Id: I5d0cb613265d98b1f189c5f5cc09c1f7db302272
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
Ievgenii Meshcheriakov 2021-08-26 13:04:58 +02:00
parent 9e770e3572
commit fe46cd59ce
8 changed files with 133 additions and 0 deletions

View File

@ -51,6 +51,7 @@
#include "qstringalgorithms_p.h"
#include "qscopedpointer.h"
#include "qbytearray_p.h"
#include "qstringconverter_p.h"
#include <qdatastream.h>
#include <qmath.h>
@ -426,6 +427,14 @@ int QtPrivate::compareMemory(QByteArrayView lhs, QByteArrayView rhs)
return lhs.size() == rhs.size() ? 0 : lhs.size() > rhs.size() ? 1 : -1;
}
/*!
\internal
*/
bool QtPrivate::isValidUtf8(QByteArrayView s) noexcept
{
return QUtf8::isValidUtf8(s).isValidUtf8;
}
// the CRC table below is created by the following piece of code
#if 0
static void createCRC16Table() // build CRC16 lookup table
@ -2783,6 +2792,15 @@ bool QByteArray::isLower() const
return true;
}
/*!
\fn QByteArray::isValidUtf8() const
Returns \c true if this byte array contains valid UTF-8 encoded data,
or \c false otherwise.
\since 6.3
*/
/*!
Returns a byte array that contains the first \a len bytes of this byte
array.

View File

@ -200,6 +200,11 @@ public:
bool isUpper() const;
bool isLower() const;
[[nodiscard]] bool isValidUtf8() const noexcept
{
return QtPrivate::isValidUtf8(qToByteArrayViewIgnoringNull(*this));
}
void truncate(qsizetype pos);
void chop(qsizetype n);

View File

@ -74,6 +74,8 @@ qsizetype count(QByteArrayView haystack, QByteArrayView needle) noexcept;
[[nodiscard]] Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QByteArrayView trimmed(QByteArrayView s) noexcept;
[[nodiscard]] Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf8(QByteArrayView s) noexcept;
} // namespace QtPrivate
/*****************************************************************************

View File

@ -288,6 +288,8 @@ public:
inline int compare(QByteArrayView a, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept;
[[nodiscard]] inline bool isValidUtf8() const noexcept { return QtPrivate::isValidUtf8(*this); }
//
// STL compatibility API:
//

View File

@ -376,6 +376,15 @@
\sa operator==()
*/
/*!
\fn QByteArrayView::isValidUtf8() const
Returns \c true if this byte array view contains valid UTF-8 encoded data,
or \c false otherwise.
\since 6.3
*/
/*!
\fn QByteArrayView::const_iterator QByteArrayView::begin() const

View File

@ -285,6 +285,11 @@ public:
constexpr void chop(qsizetype n)
{ verify(n); m_size -= n; }
[[nodiscard]] inline bool isValidUtf8() const noexcept
{
return QByteArrayView(reinterpret_cast<const char *>(data()), size()).isValidUtf8();
}
//
// STL compatibility API:
//

View File

@ -678,6 +678,15 @@
\sa sliced(), first(), last(), chopped(), truncate()
*/
/*!
\fn QUtf8StringView::isValidUtf8() const
Returns \c true if this string contains valid UTF-8 encoded data,
or \c false otherwise.
\since 6.3
*/
/*!
\fn template <typename QStringLike> qToUtf8StringViewIgnoringNull(const QStringLike &s);
\relates QUtf8StringView

View File

@ -898,6 +898,19 @@ private Q_SLOTS:
void indexOf_regexp_QString() { indexOf_contains_lastIndexOf_count_regexp_impl<QString>(); }
void indexOf_regexp_QStringView_data() { indexOf_contains_lastIndexOf_count_regexp_data(); }
void indexOf_regexp_QStringView() { indexOf_contains_lastIndexOf_count_regexp_impl<QStringView>(); }
private:
void isValidUtf8_data();
template<typename String>
void isValidUtf8_impl() const;
private Q_SLOTS:
void isValidUtf8_QByteArray_data() { isValidUtf8_data(); }
void isValidUtf8_QByteArray() { isValidUtf8_impl<QByteArray>(); }
void isValidUtf8_QByteArrayView_data() { isValidUtf8_data(); }
void isValidUtf8_QByteArrayView() { isValidUtf8_impl<QByteArrayView>(); }
void isValidUtf8_QUtf8StringView_data() { isValidUtf8_data(); }
void isValidUtf8_QUtf8StringView() { isValidUtf8_impl<QUtf8StringView>(); }
};
namespace {
@ -2855,6 +2868,76 @@ void tst_QStringApiSymmetry::indexOf_contains_lastIndexOf_count_regexp_impl() co
}
}
void tst_QStringApiSymmetry::isValidUtf8_data()
{
QTest::addColumn<QByteArray>("ba");
QTest::addColumn<bool>("valid");
int row = 0;
QTest::addRow("valid-%02d", row++) << QByteArray() << true;
QTest::addRow("valid-%02d", row++) << QByteArray("ascii") << true;
QTest::addRow("valid-%02d", row++)
<< QByteArray("\xc2\xa2\xe0\xa4\xb9\xf0\x90\x8d\x88") << true; // U+00A2 U+0939 U+10348
QTest::addRow("valid-%02d", row++) << QByteArray("\xf4\x8f\xbf\xbf") << true; // U+10FFFF
row = 0;
QTest::addRow("overlong-%02d", row++) << QByteArray("\xc0\x00") << false;
QTest::addRow("overlong-%02d", row++) << QByteArray("\xc1\xff") << false;
QTest::addRow("overlong-%02d", row++) << QByteArray("\xe0\x00\x00") << false;
QTest::addRow("overlong-%02d", row++) << QByteArray("\xe0\xa0\x7f") << false;
QTest::addRow("overlong-%02d", row++) << QByteArray("\xf0\x00\x00\x00") << false;
QTest::addRow("overlong-%02d", row++) << QByteArray("\xf0\x90\x80\x7f") << false;
row = 0;
QTest::addRow("short-%02d", row++) << QByteArray("\xc2") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xc2") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xc2y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xc2y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xe0\xa4") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xe0\xa4") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xe0\xa4y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xe0\xa4y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xe0") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xe0") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xe0y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xe0y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f\xbf") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f\xbf") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f\xbfy") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f\xbfy") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8fy") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8fy") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xf4") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xf4") << false;
QTest::addRow("short-%02d", row++) << QByteArray("x\xf4y") << false;
QTest::addRow("short-%02d", row++) << QByteArray("\xf4y") << false;
row = 0;
QTest::addRow("surrogates-%02d", row++) << QByteArray("\xed\x9f\xc0\xee\x80\x7f") << false;
QTest::addRow("surrogates-%02d", row++) << QByteArray("\xed\x9f\xc0") << false;
QTest::addRow("surrogates-%02d", row++) << QByteArray("\xee\x80\x7f") << false;
QTest::addRow("surrogates-%02d", row++) << QByteArray("\xee\x80\x7f\xed\x9f\xc0") << false;
row = 0;
QTest::addRow("other-%02d", row++) << QByteArray("\xf4\x8f\xbf\xc0") << false;
QTest::addRow("other-%02d", row++) << QByteArray("\xf7\x80\x80\x80") << false;
QTest::addRow("other-%02d", row++) << QByteArray("\xfd\xbf\xbf\xbf\xbf") << false;
QTest::addRow("other-%02d", row++) << QByteArray("\xfe\xbf\xbf\xbf\xbf\xbf") << false;
QTest::addRow("other-%02d", row++) << QByteArray("\xff\xbf\xbf\xbf\xbf\xbf\xbf") << false;
QTest::addRow("other-%02d", row++) << QByteArray("\x80") << false;
QTest::addRow("other-%02d", row++) << QByteArray("\xbf") << false;
}
template<typename String>
void tst_QStringApiSymmetry::isValidUtf8_impl() const
{
QFETCH(QByteArray, ba);
const String string(ba);
QTEST(string.isValidUtf8(), "valid");
}
QTEST_APPLESS_MAIN(tst_QStringApiSymmetry)
#include "tst_qstringapisymmetry.moc"