Make sure that QUrl::FullyDecoded mode uses U+FFFD for bad UTF-8

It's a good practice to always replace bad UTF-8 sequences with the
replacement character. It could be considered a security issue too.

Change-Id: I9e7d72e4c4102cdb8334449b5e7f882228a9048f
Reviewed-by: David Faure (KDE) <faure@kde.org>
This commit is contained in:
Thiago Macieira 2013-07-19 20:16:47 -07:00 committed by The Qt Project
parent 1fa5ea7a6a
commit 7b964c77fa
2 changed files with 32 additions and 0 deletions

View File

@ -507,6 +507,27 @@ non_trivial:
return 0;
}
/*!
\since 5.0
\internal
This function decodes a percent-encoded string located from \a begin to \a
end, by appending each character to \a appendTo. It returns the number of
characters appended. Each percent-encoded sequence is decoded as follows:
\list
\li from %00 to %7F: the exact decoded value is appended;
\li from %80 to %FF: QChar::ReplacementCharacter is appended;
\li bad encoding: original input is copied to the output, undecoded.
\endlist
Given the above, it's important for the input to already have all UTF-8
percent sequences decoded by qt_urlRecode (that is, the input should not
have been processed with QUrl::EncodeUnicode).
The input should also be a valid percent-encoded sequence (the output of
qt_urlRecode is always valid).
*/
static int decode(QString &appendTo, const ushort *begin, const ushort *end)
{
const int origSize = appendTo.size();
@ -537,6 +558,8 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end)
++input;
*output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]);
if (output[-1] >= 0x80)
output[-1] = QChar::ReplacementCharacter;
input += 2;
}

View File

@ -1035,6 +1035,15 @@ void tst_QUrlInternal::encodingRecodeInvalidUtf8()
if (!qt_urlRecode(output, input.constData(), input.constData() + input.length(), QUrl::FullyEncoded))
output += input;
QCOMPARE(output, QTest::currentDataTag() + input);
// verify for security reasons that all bad UTF-8 data got replaced by QChar::ReplacementCharacter
output = QTest::currentDataTag();
if (!qt_urlRecode(output, input.constData(), input.constData() + input.length(), QUrl::FullyEncoded))
output += input;
for (int i = strlen(QTest::currentDataTag()); i < output.length(); ++i) {
QVERIFY2(output.at(i).unicode() < 0x80 || output.at(i) == QChar::ReplacementCharacter,
qPrintable(QString("Character at i == %1 was U+%2").arg(i).arg(output.at(i).unicode(), 4, 16, QLatin1Char('0'))));
}
}
void tst_QUrlInternal::recodeByteArray_data()