Make QUrl::setScheme only parse in strict mode (no decoding)

The URI RFC defines schemes as containing only a very restricted set
of characters, none of which require encoding, so don't even
try. Testing this behaviour in some web browsers indicate that they do
not accept percent-encoded schemes either.

Change-Id: I692dd20e1aac7e8a1bcb276cb5113b5802393d38
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
This commit is contained in:
Thiago Macieira 2012-07-31 14:28:48 +02:00 committed by Qt by Nokia
parent f893d9ec41
commit e1038794b1
3 changed files with 14 additions and 19 deletions

View File

@ -656,7 +656,7 @@ inline void QUrlPrivate::appendQuery(QString &appendTo, QUrl::FormattingOptions
// setXXX functions
bool QUrlPrivate::setScheme(const QString &value, int len, bool decoded)
bool QUrlPrivate::setScheme(const QString &value, int len)
{
// schemes are strictly RFC-compliant:
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
@ -687,19 +687,6 @@ bool QUrlPrivate::setScheme(const QString &value, int len, bool decoded)
if (p[i] == '+' || p[i] == '-' || p[i] == '.')
continue;
if (p[i] == '%') {
// found a percent-encoded sign
// if we haven't decoded yet, decode and try again
errorSupplement = '%';
if (decoded)
return false;
QString decodedScheme;
if (qt_urlRecode(decodedScheme, value.constData(), value.constData() + len, 0, 0) == 0)
return false;
return setScheme(decodedScheme, decodedScheme.length(), true);
}
// found something else
errorSupplement = p[i];
return false;
@ -1581,7 +1568,7 @@ void QUrl::setUrl(const QString &url, ParsingMode parsingMode)
/*!
Sets the scheme of the URL to \a scheme. As a scheme can only
contain ASCII characters, no conversion or encoding is done on the
contain ASCII characters, no conversion or decoding is done on the
input. It must also start with an ASCII letter.
The scheme describes the type (or protocol) of the URL. It's

View File

@ -122,7 +122,7 @@ public:
void appendFragment(QString &appendTo, QUrl::FormattingOptions options) const;
// the "end" parameters are like STL iterators: they point to one past the last valid element
bool setScheme(const QString &value, int len, bool decoded = false);
bool setScheme(const QString &value, int len);
bool setAuthority(const QString &auth, int from, int end);
void setUserInfo(const QString &userInfo, int from, int end);
void setUserName(const QString &value, int from, int end);

View File

@ -1833,9 +1833,8 @@ void tst_QUrl::strictParser_data()
QTest::addColumn<QString>("input");
QTest::addColumn<QString>("needle");
// cannot test bad schemes here, as they are parsed as paths instead
//QTest::newRow("invalid-scheme") << "ht%://example.com" << "Invalid scheme";
//QTest::newRow("empty-scheme") << ":/" << "Empty scheme";
QTest::newRow("invalid-scheme") << "ht%://example.com" << "Invalid scheme";
QTest::newRow("empty-scheme") << ":/" << "Empty scheme";
QTest::newRow("invalid-user1") << "http://bad<user_name>@ok-hostname" << "Invalid user name";
QTest::newRow("invalid-user2") << "http://bad%@ok-hostname" << "Invalid user name";
@ -1870,6 +1869,7 @@ void tst_QUrl::strictParser()
QFETCH(QString, needle);
QUrl url(input, QUrl::StrictMode);
QEXPECT_FAIL("empty-scheme", "QUrl does not forbid paths with a colon before the first slash yet", Abort);
QVERIFY(!url.isValid());
QVERIFY(!url.errorString().isEmpty());
if (!url.errorString().contains(needle))
@ -3038,6 +3038,9 @@ void tst_QUrl::setComponents_data()
QTest::newRow("invalid-scheme-2") << QUrl("http://example.com")
<< int(Scheme) << "http%40" << Tolerant << false
<< PrettyDecoded << "" << "";
QTest::newRow("invalid-scheme-3") << QUrl("http://example.com")
<< int(Scheme) << "http%61" << Strict << false
<< PrettyDecoded << "" << "";
QTest::newRow("invalid-host-1") << QUrl("http://example.com")
<< int(Host) << "-not-valid-" << Tolerant << false
@ -3047,6 +3050,10 @@ void tst_QUrl::setComponents_data()
<< PrettyDecoded << "" << "";
// -- test decoded behaviour --
// '%' characters are not permitted in the scheme, this tests that it fails to set anything
QTest::newRow("invalid-scheme-encode") << QUrl("http://example.com")
<< int(Scheme) << "http%61" << Decoded << false
<< PrettyDecoded << "" << "";
QTest::newRow("userinfo-encode") << QUrl("http://example.com")
<< int(UserInfo) << "h%61llo:world@" << Decoded << true
<< PrettyDecoded << "h%2561llo:world@" << "http://h%2561llo:world%40@example.com";
@ -3104,6 +3111,7 @@ void tst_QUrl::setComponents()
switch (component) {
case Scheme:
// scheme is only parsed in strict mode
copy.setScheme(newValue);
QCOMPARE(copy.scheme(), output);
break;