Introduce QString(View)::isValidUtf16
QString(View)s can be built or manipulated in ways that make them contain/refer to improperly encoded UTF-16 data. Problem is, we don't have public APIs to check whether a string contains valid UTF-16. This knowledge is precious if the string is to be fed in algorithms, regular expressions, etc. that expect validated input (e.g. QRegularExpression can be faster if it can assume valid UTF-16, otherwise it has to employ extra checks). Add a function that does the validation. [ChangeLog][QtCore][QStringView] Added QStringView::isValidUtf16. [ChangeLog][QtCore][QString] Added QString::isValidUtf16. Change-Id: Idd699183f6ec08013046c76c6a5a7c524b6c6fbc Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
556712f511
commit
a2ddd96ac8
@ -591,6 +591,20 @@ bool QtPrivate::isLatin1(QStringView s) noexcept
|
||||
return true;
|
||||
}
|
||||
|
||||
bool QtPrivate::isValidUtf16(QStringView s) noexcept
|
||||
{
|
||||
Q_CONSTEXPR uint InvalidCodePoint = UINT_MAX;
|
||||
|
||||
QStringIterator i(s);
|
||||
while (i.hasNext()) {
|
||||
uint c = i.next(InvalidCodePoint);
|
||||
if (c == InvalidCodePoint)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// conversion between Latin 1 and UTF-16
|
||||
void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept
|
||||
{
|
||||
@ -9046,6 +9060,21 @@ bool QString::isRightToLeft() const
|
||||
return QtPrivate::isRightToLeft(QStringView(*this));
|
||||
}
|
||||
|
||||
/*!
|
||||
\fn bool QString::isValidUtf16() const noexcept
|
||||
\since 5.15
|
||||
|
||||
Returns \c true if the string contains valid UTF-16 encoded data,
|
||||
or \c false otherwise.
|
||||
|
||||
Note that this function does not perform any special validation of the
|
||||
data; it merely checks if it can be successfully decoded from UTF-16.
|
||||
The data is assumed to be in host byte order; the presence of a BOM
|
||||
is meaningless.
|
||||
|
||||
\sa QStringView::isValidUtf16()
|
||||
*/
|
||||
|
||||
/*! \fn QChar *QString::data()
|
||||
|
||||
Returns a pointer to the data stored in the QString. The pointer
|
||||
|
@ -919,6 +919,8 @@ public:
|
||||
|
||||
bool isSimpleText() const;
|
||||
bool isRightToLeft() const;
|
||||
Q_REQUIRED_RESULT bool isValidUtf16() const noexcept
|
||||
{ return QStringView(*this).isValidUtf16(); }
|
||||
|
||||
QString(int size, Qt::Initialization);
|
||||
Q_DECL_CONSTEXPR inline QString(QStringDataPtr dd) : d(dd.ptr) {}
|
||||
|
@ -99,6 +99,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1String
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QStringView s) noexcept;
|
||||
Q_REQUIRED_RESULT Q_DECL_CONSTEXPR inline bool isLatin1(QLatin1String s) noexcept;
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLatin1(QStringView s) noexcept;
|
||||
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept;
|
||||
|
||||
} // namespace QtPRivate
|
||||
|
||||
|
@ -864,6 +864,21 @@ QT_BEGIN_NAMESPACE
|
||||
\sa QString::isRightToLeft()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn bool QStringView::isValidUtf16() const
|
||||
\since 5.15
|
||||
|
||||
Returns \c true if the string contains valid UTF-16 encoded data,
|
||||
or \c false otherwise.
|
||||
|
||||
Note that this function does not perform any special validation of the
|
||||
data; it merely checks if it can be successfully decoded from UTF-16.
|
||||
The data is assumed to be in host byte order; the presence of a BOM
|
||||
is meaningless.
|
||||
|
||||
\sa QString::isValidUtf16()
|
||||
*/
|
||||
|
||||
/*!
|
||||
\fn QStringView::toWCharArray(wchar_t *array) const
|
||||
\since 5.14
|
||||
|
@ -294,6 +294,8 @@ public:
|
||||
|
||||
Q_REQUIRED_RESULT bool isRightToLeft() const noexcept
|
||||
{ return QtPrivate::isRightToLeft(*this); }
|
||||
Q_REQUIRED_RESULT bool isValidUtf16() const noexcept
|
||||
{ return QtPrivate::isValidUtf16(*this); }
|
||||
|
||||
Q_REQUIRED_RESULT inline int toWCharArray(wchar_t *array) const; // defined in qstring.h
|
||||
|
||||
|
@ -596,6 +596,8 @@ private slots:
|
||||
void assignQChar();
|
||||
void isRightToLeft_data();
|
||||
void isRightToLeft();
|
||||
void isValidUtf16_data();
|
||||
void isValidUtf16();
|
||||
void unicodeStrings();
|
||||
};
|
||||
|
||||
@ -7025,6 +7027,52 @@ void tst_QString::isRightToLeft()
|
||||
QCOMPARE(unicode.isRightToLeft(), rtl);
|
||||
}
|
||||
|
||||
void tst_QString::isValidUtf16_data()
|
||||
{
|
||||
QTest::addColumn<QString>("string");
|
||||
QTest::addColumn<bool>("valid");
|
||||
|
||||
int row = 0;
|
||||
QTest::addRow("valid-%02d", row++) << QString() << true;
|
||||
QTest::addRow("valid-%02d", row++) << QString("") << true;
|
||||
QTest::addRow("valid-%02d", row++) << QString("abc def") << true;
|
||||
QTest::addRow("valid-%02d", row++) << QString("àbç") << true;
|
||||
QTest::addRow("valid-%02d", row++) << QString("ßẞ") << true;
|
||||
QTest::addRow("valid-%02d", row++) << QString("𝐀𝐁𝐂abc𝐃𝐄𝐅def") << true;
|
||||
QTest::addRow("valid-%02d", row++) << QString("abc𝐀𝐁𝐂def𝐃𝐄𝐅") << true;
|
||||
QTest::addRow("valid-%02d", row++) << (QString("abc") + QChar(0x0000) + QString("def")) << true;
|
||||
QTest::addRow("valid-%02d", row++) << (QString("abc") + QChar(0xFFFF) + QString("def")) << true;
|
||||
// check that BOM presence doesn't make any difference
|
||||
QTest::addRow("valid-%02d", row++) << (QString() + QChar(0xFEFF) + QString("abc𝐀𝐁𝐂def𝐃𝐄𝐅")) << true;
|
||||
QTest::addRow("valid-%02d", row++) << (QString() + QChar(0xFFFE) + QString("abc𝐀𝐁𝐂def𝐃𝐄𝐅")) << true;
|
||||
|
||||
row = 0;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QChar(0xD800)) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QString("abc") + QChar(0xD800)) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QChar(0xD800) + QString("def")) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QString("abc") + QChar(0xD800) + QString("def")) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QChar(0xD800) + QChar(0xD800)) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QString("abc") + QChar(0xD800) + QChar(0xD800)) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QChar(0xD800) + QChar(0xD800) + QString("def")) << false;
|
||||
QTest::addRow("stray-high-%02d", row++) << (QString() + QString("abc") + QChar(0xD800) + QChar(0xD800) + QString("def")) << false;
|
||||
|
||||
row = 0;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QChar(0xDC00)) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QString("abc") + QChar(0xDC00)) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QChar(0xDC00) + QString("def")) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QString("abc") + QChar(0xDC00) + QString("def")) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QChar(0xDC00) + QChar(0xDC00)) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QString("abc") + QChar(0xDC00) + QChar(0xDC00)) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QChar(0xDC00) + QChar(0xDC00) + QString("def")) << false;
|
||||
QTest::addRow("stray-low-%02d", row++) << (QString() + QString("abc") + QChar(0xDC00) + QChar(0xDC00) + QString("def")) << false;
|
||||
}
|
||||
|
||||
void tst_QString::isValidUtf16()
|
||||
{
|
||||
QFETCH(QString, string);
|
||||
QTEST(string.isValidUtf16(), "valid");
|
||||
}
|
||||
|
||||
QTEST_APPLESS_MAIN(tst_QString)
|
||||
|
||||
#include "tst_qstring.moc"
|
||||
|
Loading…
Reference in New Issue
Block a user