From 7b964c77fa15fa3fbf18538ebcdf6b09ffbbbd0e Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 19 Jul 2013 20:16:47 -0700 Subject: [PATCH] Make sure that QUrl::FullyDecoded mode uses U+FFFD for bad UTF-8 It's a good practice to always replace bad UTF-8 sequences with the replacement character. It could be considered a security issue too. Change-Id: I9e7d72e4c4102cdb8334449b5e7f882228a9048f Reviewed-by: David Faure (KDE) --- src/corelib/io/qurlrecode.cpp | 23 +++++++++++++++++++ .../io/qurlinternal/tst_qurlinternal.cpp | 9 ++++++++ 2 files changed, 32 insertions(+) diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index e4f37574a3..7e77b9c251 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -507,6 +507,27 @@ non_trivial: return 0; } +/*! + \since 5.0 + \internal + + This function decodes a percent-encoded string located from \a begin to \a + end, by appending each character to \a appendTo. It returns the number of + characters appended. Each percent-encoded sequence is decoded as follows: + + \list + \li from %00 to %7F: the exact decoded value is appended; + \li from %80 to %FF: QChar::ReplacementCharacter is appended; + \li bad encoding: original input is copied to the output, undecoded. + \endlist + + Given the above, it's important for the input to already have all UTF-8 + percent sequences decoded by qt_urlRecode (that is, the input should not + have been processed with QUrl::EncodeUnicode). + + The input should also be a valid percent-encoded sequence (the output of + qt_urlRecode is always valid). +*/ static int decode(QString &appendTo, const ushort *begin, const ushort *end) { const int origSize = appendTo.size(); @@ -537,6 +558,8 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end) ++input; *output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]); + if (output[-1] >= 0x80) + output[-1] = QChar::ReplacementCharacter; input += 2; } diff --git a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp index b39b34e494..05d5f94e3d 100644 --- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp +++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp @@ -1035,6 +1035,15 @@ void tst_QUrlInternal::encodingRecodeInvalidUtf8() if (!qt_urlRecode(output, input.constData(), input.constData() + input.length(), QUrl::FullyEncoded)) output += input; QCOMPARE(output, QTest::currentDataTag() + input); + + // verify for security reasons that all bad UTF-8 data got replaced by QChar::ReplacementCharacter + output = QTest::currentDataTag(); + if (!qt_urlRecode(output, input.constData(), input.constData() + input.length(), QUrl::FullyEncoded)) + output += input; + for (int i = strlen(QTest::currentDataTag()); i < output.length(); ++i) { + QVERIFY2(output.at(i).unicode() < 0x80 || output.at(i) == QChar::ReplacementCharacter, + qPrintable(QString("Character at i == %1 was U+%2").arg(i).arg(output.at(i).unicode(), 4, 16, QLatin1Char('0')))); + } } void tst_QUrlInternal::recodeByteArray_data()