QUrl stringprep: fix handling of prohibited characters
RFC 3454 says about prohibited characters (section 2, "Preparation Overview"): 3) Prohibit -- Check for any characters that are not allowed in the output. If any are found, return an error. This is described in section 5. In other words, we mustn't simply strip the output of prohibited characters. We must generate an error if they are present. We do that by clearing the data. We already had tests for prohibited output, but they were indistinguishable from being stripped. So instead add some extra characters so that we can tell whether the label was cleared. Change-Id: I2d95217c27be5e2d54deed0036cb009e3b7f4886 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
This commit is contained in:
parent
8631227519
commit
736a052d93
@ -1502,18 +1502,20 @@ static bool isMappedToNothing(uint uc)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void stripProhibitedOutput(QString *str, int from)
|
static bool containsProhibitedOuptut(const QString *str, int from)
|
||||||
{
|
{
|
||||||
ushort *out = (ushort *)str->data() + from;
|
const ushort *in = reinterpret_cast<const ushort *>(str->begin() + from);
|
||||||
const ushort *in = out;
|
|
||||||
const ushort *end = (ushort *)str->data() + str->size();
|
const ushort *end = (ushort *)str->data() + str->size();
|
||||||
while (in < end) {
|
for ( ; in < end; ++in) {
|
||||||
uint uc = *in;
|
uint uc = *in;
|
||||||
if (QChar(uc).isHighSurrogate() && in < end - 1) {
|
if (QChar(uc).isHighSurrogate() && in < end - 1) {
|
||||||
ushort low = *(in + 1);
|
ushort low = *(in + 1);
|
||||||
if (QChar(low).isLowSurrogate()) {
|
if (QChar(low).isLowSurrogate()) {
|
||||||
++in;
|
++in;
|
||||||
uc = QChar::surrogateToUcs4(uc, low);
|
uc = QChar::surrogateToUcs4(uc, low);
|
||||||
|
} else {
|
||||||
|
// unpaired surrogates are prohibited
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (uc <= 0xFFFF) {
|
if (uc <= 0xFFFF) {
|
||||||
@ -1538,7 +1540,7 @@ static void stripProhibitedOutput(QString *str, int from)
|
|||||||
|| (uc >= 0xFDD0 && uc <= 0xFDEF)
|
|| (uc >= 0xFDD0 && uc <= 0xFDEF)
|
||||||
|| uc == 0xFEFF
|
|| uc == 0xFEFF
|
||||||
|| (uc >= 0xFFF9 && uc <= 0xFFFF))) {
|
|| (uc >= 0xFFF9 && uc <= 0xFFFF))) {
|
||||||
*out++ = *in;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!((uc >= 0x1D173 && uc <= 0x1D17A)
|
if (!((uc >= 0x1D173 && uc <= 0x1D17A)
|
||||||
@ -1562,14 +1564,12 @@ static void stripProhibitedOutput(QString *str, int from)
|
|||||||
|| (uc >= 0xFFFFE && uc <= 0xFFFFF)
|
|| (uc >= 0xFFFFE && uc <= 0xFFFFF)
|
||||||
|| (uc >= 0x100000 && uc <= 0x10FFFD)
|
|| (uc >= 0x100000 && uc <= 0x10FFFD)
|
||||||
|| (uc >= 0x10FFFE && uc <= 0x10FFFF))) {
|
|| (uc >= 0x10FFFE && uc <= 0x10FFFF))) {
|
||||||
*out++ = QChar::highSurrogate(uc);
|
continue;
|
||||||
*out++ = QChar::lowSurrogate(uc);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++in;
|
return true;
|
||||||
}
|
}
|
||||||
if (in != out)
|
return false;
|
||||||
str->truncate(out - str->utf16());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isBidirectionalRorAL(uint uc)
|
static bool isBidirectionalRorAL(uint uc)
|
||||||
@ -2084,7 +2084,10 @@ Q_AUTOTEST_EXPORT void qt_nameprep(QString *source, int from)
|
|||||||
firstNonAscii > from ? firstNonAscii - 1 : from);
|
firstNonAscii > from ? firstNonAscii - 1 : from);
|
||||||
|
|
||||||
// Strip prohibited output
|
// Strip prohibited output
|
||||||
stripProhibitedOutput(source, firstNonAscii);
|
if (containsProhibitedOuptut(source, firstNonAscii)) {
|
||||||
|
source->resize(from);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Check for valid bidirectional characters
|
// Check for valid bidirectional characters
|
||||||
bool containsLCat = false;
|
bool containsLCat = false;
|
||||||
|
@ -351,7 +351,7 @@ void tst_QUrlInternal::nameprep_testsuite_data()
|
|||||||
<< QString() << 0 << 0;
|
<< QString() << 0 << 0;
|
||||||
|
|
||||||
QTest::newRow("Non-ASCII multibyte space character U+1680")
|
QTest::newRow("Non-ASCII multibyte space character U+1680")
|
||||||
<< QString::fromUtf8("\xE1\x9A\x80")
|
<< QString::fromUtf8("x\xE1\x9A\x80x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
@ -376,12 +376,12 @@ void tst_QUrlInternal::nameprep_testsuite_data()
|
|||||||
<< QString() << 0 << 0;
|
<< QString() << 0 << 0;
|
||||||
|
|
||||||
QTest::newRow("Non-ASCII 8bit control character U+0085")
|
QTest::newRow("Non-ASCII 8bit control character U+0085")
|
||||||
<< QString::fromUtf8("\xC2\x85")
|
<< QString::fromUtf8("x\xC2\x85x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Non-ASCII multibyte control character U+180E")
|
QTest::newRow("Non-ASCII multibyte control character U+180E")
|
||||||
<< QString::fromUtf8("\xE1\xA0\x8E")
|
<< QString::fromUtf8("x\xE1\xA0\x8Ex")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
@ -391,47 +391,47 @@ void tst_QUrlInternal::nameprep_testsuite_data()
|
|||||||
<< QString() << 0 << 0;
|
<< QString() << 0 << 0;
|
||||||
|
|
||||||
QTest::newRow("Non-ASCII control character U+1D175")
|
QTest::newRow("Non-ASCII control character U+1D175")
|
||||||
<< QString::fromUtf8("\xF0\x9D\x85\xB5")
|
<< QString::fromUtf8("x\xF0\x9D\x85\xB5x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Plane 0 private use character U+F123")
|
QTest::newRow("Plane 0 private use character U+F123")
|
||||||
<< QString::fromUtf8("\xEF\x84\xA3")
|
<< QString::fromUtf8("x\xEF\x84\xA3x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Plane 15 private use character U+F1234")
|
QTest::newRow("Plane 15 private use character U+F1234")
|
||||||
<< QString::fromUtf8("\xF3\xB1\x88\xB4")
|
<< QString::fromUtf8("x\xF3\xB1\x88\xB4x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Plane 16 private use character U+10F234")
|
QTest::newRow("Plane 16 private use character U+10F234")
|
||||||
<< QString::fromUtf8("\xF4\x8F\x88\xB4")
|
<< QString::fromUtf8("x\xF4\x8F\x88\xB4x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Non-character code point U+8FFFE")
|
QTest::newRow("Non-character code point U+8FFFE")
|
||||||
<< QString::fromUtf8("\xF2\x8F\xBF\xBE")
|
<< QString::fromUtf8("x\xF2\x8F\xBF\xBEx")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Non-character code point U+10FFFF")
|
QTest::newRow("Non-character code point U+10FFFF")
|
||||||
<< QString::fromUtf8("\xF4\x8F\xBF\xBF")
|
<< QString::fromUtf8("x\xF4\x8F\xBF\xBFx")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Surrogate code U+DF42")
|
QTest::newRow("Surrogate code U+DF42")
|
||||||
<< QString::fromUtf8("\xED\xBD\x82")
|
<< QString::fromUtf8("x\xED\xBD\x82x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Non-plain text character U+FFFD")
|
QTest::newRow("Non-plain text character U+FFFD")
|
||||||
<< QString::fromUtf8("\xEF\xBF\xBD")
|
<< QString::fromUtf8("x\xEF\xBF\xBDx")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Ideographic description character U+2FF5")
|
QTest::newRow("Ideographic description character U+2FF5")
|
||||||
<< QString::fromUtf8("\xE2\xBF\xB5")
|
<< QString::fromUtf8("x\xE2\xBF\xB5x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
@ -441,22 +441,22 @@ void tst_QUrlInternal::nameprep_testsuite_data()
|
|||||||
<< QString() << 0 << 0;
|
<< QString() << 0 << 0;
|
||||||
|
|
||||||
QTest::newRow("Left-to-right mark U+200E")
|
QTest::newRow("Left-to-right mark U+200E")
|
||||||
<< QString::fromUtf8("\xE2\x80\x8E")
|
<< QString::fromUtf8("x\xE2\x80\x8Ex")
|
||||||
<< QString::fromUtf8("\xCC\x81")
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Deprecated U+202A")
|
QTest::newRow("Deprecated U+202A")
|
||||||
<< QString::fromUtf8("\xE2\x80\xAA")
|
<< QString::fromUtf8("x\xE2\x80\xAA")
|
||||||
<< QString::fromUtf8("\xCC\x81")
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Language tagging character U+E0001")
|
QTest::newRow("Language tagging character U+E0001")
|
||||||
<< QString::fromUtf8("\xF3\xA0\x80\x81")
|
<< QString::fromUtf8("x\xF3\xA0\x80\x81x")
|
||||||
<< QString::fromUtf8("\xCC\x81")
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
QTest::newRow("Language tagging character U+E0042")
|
QTest::newRow("Language tagging character U+E0042")
|
||||||
<< QString::fromUtf8("\xF3\xA0\x81\x82")
|
<< QString::fromUtf8("x\xF3\xA0\x81\x82x")
|
||||||
<< QString()
|
<< QString()
|
||||||
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
<< QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
|
||||||
|
|
||||||
@ -512,12 +512,6 @@ void tst_QUrlInternal::nameprep_testsuite()
|
|||||||
QFETCH(QString, out);
|
QFETCH(QString, out);
|
||||||
QFETCH(QString, profile);
|
QFETCH(QString, profile);
|
||||||
|
|
||||||
QEXPECT_FAIL("Left-to-right mark U+200E",
|
|
||||||
"Investigate further", Continue);
|
|
||||||
QEXPECT_FAIL("Deprecated U+202A",
|
|
||||||
"Investigate further", Continue);
|
|
||||||
QEXPECT_FAIL("Language tagging character U+E0001",
|
|
||||||
"Investigate further", Continue);
|
|
||||||
qt_nameprep(&in, 0);
|
qt_nameprep(&in, 0);
|
||||||
QCOMPARE(in, out);
|
QCOMPARE(in, out);
|
||||||
}
|
}
|
||||||
@ -549,9 +543,9 @@ void tst_QUrlInternal::nameprep_highcodes_data()
|
|||||||
<< QString() << 0 << 0;
|
<< QString() << 0 << 0;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
QChar st[] = { 'D', 0xdb40, 0xdc20, 'o', 0xd834, 0xdd7a, '\'', 0x2060, 'h' };
|
QChar st[] = { 'D', 'o', '\'', 0x2060, 'h' };
|
||||||
QChar se[] = { 'd', 'o', '\'', 'h' };
|
QChar se[] = { 'd', 'o', '\'', 'h' };
|
||||||
QTest::newRow("highcodes (D, U+E0020, o, U+1D17A, ', U+2060, h)")
|
QTest::newRow("highcodes (D, o, ', U+2060, h)")
|
||||||
<< QString(st, sizeof(st)/sizeof(st[0]))
|
<< QString(st, sizeof(st)/sizeof(st[0]))
|
||||||
<< QString(se, sizeof(se)/sizeof(se[0]))
|
<< QString(se, sizeof(se)/sizeof(se[0]))
|
||||||
<< QString() << 0 << 0;
|
<< QString() << 0 << 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user