Make sure that QUrl::FullyDecoded mode uses U+FFFD for bad UTF-8
It's a good practice to always replace bad UTF-8 sequences with the replacement character. It could be considered a security issue too. Change-Id: I9e7d72e4c4102cdb8334449b5e7f882228a9048f Reviewed-by: David Faure (KDE) <faure@kde.org>
This commit is contained in:
parent
1fa5ea7a6a
commit
7b964c77fa
@ -507,6 +507,27 @@ non_trivial:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*!
|
||||
\since 5.0
|
||||
\internal
|
||||
|
||||
This function decodes a percent-encoded string located from \a begin to \a
|
||||
end, by appending each character to \a appendTo. It returns the number of
|
||||
characters appended. Each percent-encoded sequence is decoded as follows:
|
||||
|
||||
\list
|
||||
\li from %00 to %7F: the exact decoded value is appended;
|
||||
\li from %80 to %FF: QChar::ReplacementCharacter is appended;
|
||||
\li bad encoding: original input is copied to the output, undecoded.
|
||||
\endlist
|
||||
|
||||
Given the above, it's important for the input to already have all UTF-8
|
||||
percent sequences decoded by qt_urlRecode (that is, the input should not
|
||||
have been processed with QUrl::EncodeUnicode).
|
||||
|
||||
The input should also be a valid percent-encoded sequence (the output of
|
||||
qt_urlRecode is always valid).
|
||||
*/
|
||||
static int decode(QString &appendTo, const ushort *begin, const ushort *end)
|
||||
{
|
||||
const int origSize = appendTo.size();
|
||||
@ -537,6 +558,8 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end)
|
||||
|
||||
++input;
|
||||
*output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]);
|
||||
if (output[-1] >= 0x80)
|
||||
output[-1] = QChar::ReplacementCharacter;
|
||||
input += 2;
|
||||
}
|
||||
|
||||
|
@ -1035,6 +1035,15 @@ void tst_QUrlInternal::encodingRecodeInvalidUtf8()
|
||||
if (!qt_urlRecode(output, input.constData(), input.constData() + input.length(), QUrl::FullyEncoded))
|
||||
output += input;
|
||||
QCOMPARE(output, QTest::currentDataTag() + input);
|
||||
|
||||
// verify for security reasons that all bad UTF-8 data got replaced by QChar::ReplacementCharacter
|
||||
output = QTest::currentDataTag();
|
||||
if (!qt_urlRecode(output, input.constData(), input.constData() + input.length(), QUrl::FullyEncoded))
|
||||
output += input;
|
||||
for (int i = strlen(QTest::currentDataTag()); i < output.length(); ++i) {
|
||||
QVERIFY2(output.at(i).unicode() < 0x80 || output.at(i) == QChar::ReplacementCharacter,
|
||||
qPrintable(QString("Character at i == %1 was U+%2").arg(i).arg(output.at(i).unicode(), 4, 16, QLatin1Char('0'))));
|
||||
}
|
||||
}
|
||||
|
||||
void tst_QUrlInternal::recodeByteArray_data()
|
||||
|
Loading…
Reference in New Issue
Block a user