Allow non-character codes in utf8 strings
Changed the processing of non-character code handling in the UTF8 codec. Non-character codes are now accepted in QStrings, QUrls and QJson strings. Unit tests were adapted accordingly. For more info about non-character codes, see: http://www.unicode.org/versions/corrigendum9.html [ChangeLog][QtCore][QUtf8] UTF-8 now accepts non-character unicode points; these are not replaced by the replacement character anymore [ChangeLog][QtCore][QUrl] QUrl now fully accepts non-character unicode points; they are encoded as percent characters; they can also be pretty decoded [ChangeLog][QtCore][QJson] The Writer and the Parser now fully accept non-character unicode points. Change-Id: I77cf4f0e6210741eac8082912a0b6118eced4f77 Task-number: QTBUG-33229 Reviewed-by: Lars Knoll <lars.knoll@digia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
e8853506bf
commit
add2bf739a
@ -106,14 +106,6 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
|
|||||||
if (u < 0x0800) {
|
if (u < 0x0800) {
|
||||||
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
||||||
} else {
|
} else {
|
||||||
// is it one of the Unicode non-characters?
|
|
||||||
if (QChar::isNonCharacter(u)) {
|
|
||||||
*cursor++ = replacement;
|
|
||||||
++ch;
|
|
||||||
++invalid;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (QChar::requiresSurrogates(u)) {
|
if (QChar::requiresSurrogates(u)) {
|
||||||
*cursor++ = 0xf0 | ((uchar) (u >> 18));
|
*cursor++ = 0xf0 | ((uchar) (u >> 18));
|
||||||
*cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
|
*cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
|
||||||
@ -180,15 +172,14 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
|
|||||||
--need;
|
--need;
|
||||||
if (!need) {
|
if (!need) {
|
||||||
// utf-8 bom composes into 0xfeff code point
|
// utf-8 bom composes into 0xfeff code point
|
||||||
bool nonCharacter;
|
|
||||||
if (!headerdone && uc == 0xfeff) {
|
if (!headerdone && uc == 0xfeff) {
|
||||||
// don't do anything, just skip the BOM
|
// don't do anything, just skip the BOM
|
||||||
} else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
} else if (QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
||||||
// surrogate pair
|
// surrogate pair
|
||||||
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||||
*qch++ = QChar::highSurrogate(uc);
|
*qch++ = QChar::highSurrogate(uc);
|
||||||
*qch++ = QChar::lowSurrogate(uc);
|
*qch++ = QChar::lowSurrogate(uc);
|
||||||
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
|
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
|
||||||
// error: overlong sequence, UTF16 surrogate or non-character
|
// error: overlong sequence, UTF16 surrogate or non-character
|
||||||
*qch++ = replacement;
|
*qch++ = replacement;
|
||||||
++invalid;
|
++invalid;
|
||||||
|
@ -304,7 +304,7 @@ static bool encodedUtf8ToUtf16(QString &result, ushort *&output, const ushort *b
|
|||||||
// we've decoded something; safety-check it
|
// we've decoded something; safety-check it
|
||||||
if (uc < min_uc)
|
if (uc < min_uc)
|
||||||
return false;
|
return false;
|
||||||
if (QChar::isSurrogate(uc) || QChar::isNonCharacter(uc) || uc > QChar::LastValidCodePoint)
|
if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!QChar::requiresSurrogates(uc)) {
|
if (!QChar::requiresSurrogates(uc)) {
|
||||||
|
@ -853,7 +853,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result
|
|||||||
uc = (uc << 6) | (ch & 0x3f);
|
uc = (uc << 6) | (ch & 0x3f);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uc < min_uc || QChar::isNonCharacter(uc) ||
|
if (uc < min_uc ||
|
||||||
QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
|
QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -138,13 +138,6 @@ static QByteArray escapedString(const QString &s)
|
|||||||
if (u < 0x0800) {
|
if (u < 0x0800) {
|
||||||
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
||||||
} else {
|
} else {
|
||||||
// is it one of the Unicode non-characters?
|
|
||||||
if (QChar::isNonCharacter(u)) {
|
|
||||||
*cursor++ = replacement;
|
|
||||||
++ch;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (QChar::requiresSurrogates(u)) {
|
if (QChar::requiresSurrogates(u)) {
|
||||||
*cursor++ = 0xf0 | ((uchar) (u >> 18));
|
*cursor++ = 0xf0 | ((uchar) (u >> 18));
|
||||||
*cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
|
*cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
|
||||||
|
@ -66,9 +66,9 @@ private slots:
|
|||||||
void codecForLocale();
|
void codecForLocale();
|
||||||
|
|
||||||
void asciiToIscii() const;
|
void asciiToIscii() const;
|
||||||
void flagCodepointFFFF() const;
|
void nonFlaggedCodepointFFFF() const;
|
||||||
void flagF7808080() const;
|
void flagF7808080() const;
|
||||||
void flagEFBFBF() const;
|
void nonFlaggedEFBFBF() const;
|
||||||
void decode0D() const;
|
void decode0D() const;
|
||||||
void aliasForUTF16() const;
|
void aliasForUTF16() const;
|
||||||
void mibForTSCII() const;
|
void mibForTSCII() const;
|
||||||
@ -409,9 +409,9 @@ void tst_QTextCodec::asciiToIscii() const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QTextCodec::flagCodepointFFFF() const
|
void tst_QTextCodec::nonFlaggedCodepointFFFF() const
|
||||||
{
|
{
|
||||||
// This is an invalid Unicode codepoint.
|
//Check that the code point 0xFFFF (=non-character code 0xEFBFBF) is not flagged
|
||||||
const QChar ch(0xFFFF);
|
const QChar ch(0xFFFF);
|
||||||
QString input(ch);
|
QString input(ch);
|
||||||
|
|
||||||
@ -419,12 +419,11 @@ void tst_QTextCodec::flagCodepointFFFF() const
|
|||||||
QVERIFY(codec);
|
QVERIFY(codec);
|
||||||
|
|
||||||
const QByteArray asDecoded(codec->fromUnicode(input));
|
const QByteArray asDecoded(codec->fromUnicode(input));
|
||||||
QCOMPARE(asDecoded, QByteArray("?"));
|
QCOMPARE(asDecoded, QByteArray("\357\277\277"));
|
||||||
|
|
||||||
QByteArray ffff("\357\277\277");
|
QByteArray ffff("\357\277\277");
|
||||||
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
|
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
|
||||||
QVERIFY(codec->toUnicode(ffff.constData(), ffff.length(), &state) == QChar(0));
|
QVERIFY(codec->toUnicode(ffff.constData(), ffff.length(), &state) == QByteArray::fromHex("EFBFBF"));
|
||||||
QVERIFY(codec->toUnicode(ffff) == QChar(0xfffd));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QTextCodec::flagF7808080() const
|
void tst_QTextCodec::flagF7808080() const
|
||||||
@ -460,13 +459,16 @@ void tst_QTextCodec::flagF7808080() const
|
|||||||
QVERIFY(codec->toUnicode(input.constData(), input.length(), &state) == QChar(0));
|
QVERIFY(codec->toUnicode(input.constData(), input.length(), &state) == QChar(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void tst_QTextCodec::flagEFBFBF() const
|
void tst_QTextCodec::nonFlaggedEFBFBF() const
|
||||||
{
|
{
|
||||||
QByteArray invalidInput;
|
/* Check that the codec does NOT flag EFBFBF.
|
||||||
invalidInput.resize(3);
|
* This is a regression test; see QTBUG-33229
|
||||||
invalidInput[0] = char(0xEF);
|
*/
|
||||||
invalidInput[1] = char(0xBF);
|
QByteArray validInput;
|
||||||
invalidInput[2] = char(0xBF);
|
validInput.resize(3);
|
||||||
|
validInput[0] = char(0xEF);
|
||||||
|
validInput[1] = char(0xBF);
|
||||||
|
validInput[2] = char(0xBF);
|
||||||
|
|
||||||
const QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
|
const QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
|
||||||
QVERIFY(codec);
|
QVERIFY(codec);
|
||||||
@ -474,21 +476,20 @@ void tst_QTextCodec::flagEFBFBF() const
|
|||||||
{
|
{
|
||||||
//QVERIFY(!codec->canEncode(QChar(0xFFFF)));
|
//QVERIFY(!codec->canEncode(QChar(0xFFFF)));
|
||||||
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
|
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
|
||||||
QVERIFY(codec->toUnicode(invalidInput.constData(), invalidInput.length(), &state) == QChar(0));
|
QVERIFY(codec->toUnicode(validInput.constData(), validInput.length(), &state) == QByteArray::fromHex("EFBFBF"));
|
||||||
|
|
||||||
QByteArray start("<?pi ");
|
QByteArray start("<?pi ");
|
||||||
start.append(invalidInput);
|
start.append(validInput);
|
||||||
start.append("?>");
|
start.append("?>");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When 0xEFBFBF is preceded by what seems to be an arbitrary character,
|
// Check that 0xEFBFBF is correctly decoded when preceded by an arbitrary character
|
||||||
* QTextCodec fails to flag it. */
|
|
||||||
{
|
{
|
||||||
QByteArray start("B");
|
QByteArray start("B");
|
||||||
start.append(invalidInput);
|
start.append(validInput);
|
||||||
|
|
||||||
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
|
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
|
||||||
QVERIFY(codec->toUnicode(start.constData(), start.length(), &state) == QString::fromLatin1("B\0", 2));
|
QVERIFY(codec->toUnicode(start.constData(), start.length(), &state) == QByteArray("B").append(QByteArray::fromHex("EFBFBF")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -674,13 +675,12 @@ void tst_QTextCodec::utf8Codec_data()
|
|||||||
str = QChar(0x7ff);
|
str = QChar(0x7ff);
|
||||||
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.2") << utf8 << str << -1;
|
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.2") << utf8 << str << -1;
|
||||||
|
|
||||||
// 2.2.3 U+000FFFF
|
// 2.2.3 U+000FFFF - non-character code
|
||||||
utf8.clear();
|
utf8.clear();
|
||||||
utf8 += char(0xef);
|
utf8 += char(0xef);
|
||||||
utf8 += char(0xbf);
|
utf8 += char(0xbf);
|
||||||
utf8 += char(0xbf);
|
utf8 += char(0xbf);
|
||||||
str.clear();
|
str = QString::fromUtf8(utf8);
|
||||||
str += QChar::ReplacementCharacter;
|
|
||||||
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.3") << utf8 << str << -1;
|
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.3") << utf8 << str << -1;
|
||||||
|
|
||||||
// 2.2.4 U+001FFFFF
|
// 2.2.4 U+001FFFFF
|
||||||
@ -1535,20 +1535,22 @@ void tst_QTextCodec::utf8Codec_data()
|
|||||||
str += QChar(QChar::ReplacementCharacter);
|
str += QChar(QChar::ReplacementCharacter);
|
||||||
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
|
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
|
||||||
|
|
||||||
// 5.3.1
|
// 5.3.1 - non-character code
|
||||||
utf8.clear();
|
utf8.clear();
|
||||||
utf8 += char(0xef);
|
utf8 += char(0xef);
|
||||||
utf8 += char(0xbf);
|
utf8 += char(0xbf);
|
||||||
utf8 += char(0xbe);
|
utf8 += char(0xbe);
|
||||||
str = QChar(QChar::ReplacementCharacter);
|
//str = QChar(QChar::ReplacementCharacter);
|
||||||
|
str = QString::fromUtf8(utf8);
|
||||||
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
|
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
|
||||||
|
|
||||||
// 5.3.2
|
// 5.3.2 - non-character code
|
||||||
utf8.clear();
|
utf8.clear();
|
||||||
utf8 += char(0xef);
|
utf8 += char(0xef);
|
||||||
utf8 += char(0xbf);
|
utf8 += char(0xbf);
|
||||||
utf8 += char(0xbf);
|
utf8 += char(0xbf);
|
||||||
str = QChar(QChar::ReplacementCharacter);
|
//str = QChar(QChar::ReplacementCharacter);
|
||||||
|
str = QString::fromUtf8(utf8);
|
||||||
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
|
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,8 +233,9 @@ void tst_Utf8::nonCharacters_data()
|
|||||||
QTest::addColumn<QByteArray>("utf8");
|
QTest::addColumn<QByteArray>("utf8");
|
||||||
QTest::addColumn<QString>("utf16");
|
QTest::addColumn<QString>("utf16");
|
||||||
|
|
||||||
// Unicode has a couple of "non-characters" that one can use internally,
|
// Unicode has a couple of "non-characters" that one can use internally
|
||||||
// but are not allowed to be used for text interchange.
|
// These characters may be used for interchange;
|
||||||
|
// see: http://www.unicode.org/versions/corrigendum9.html
|
||||||
//
|
//
|
||||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||||
@ -279,20 +280,17 @@ void tst_Utf8::nonCharacters()
|
|||||||
decoder->toUnicode(utf8);
|
decoder->toUnicode(utf8);
|
||||||
|
|
||||||
// Only enforce correctness on our UTF-8 decoder
|
// Only enforce correctness on our UTF-8 decoder
|
||||||
// The system's UTF-8 codec is sometimes buggy
|
|
||||||
// GNU libc's iconv is known to accept U+FFFF and U+FFFE encoded as UTF-8
|
|
||||||
// OS X's iconv is known to accept those, plus surrogates and codepoints above U+10FFFF
|
|
||||||
if (!useLocale)
|
if (!useLocale)
|
||||||
QVERIFY(decoder->hasFailure());
|
QVERIFY(!decoder->hasFailure());
|
||||||
else if (!decoder->hasFailure())
|
else if (decoder->hasFailure())
|
||||||
qWarning("System codec does not report failure when it should. Should report bug upstream.");
|
qWarning("System codec reports failure when it shouldn't. Should report bug upstream.");
|
||||||
|
|
||||||
QSharedPointer<QTextEncoder> encoder(codec->makeEncoder());
|
QSharedPointer<QTextEncoder> encoder(codec->makeEncoder());
|
||||||
encoder->fromUnicode(utf16);
|
encoder->fromUnicode(utf16);
|
||||||
if (!useLocale)
|
if (!useLocale)
|
||||||
QVERIFY(encoder->hasFailure());
|
QVERIFY(!encoder->hasFailure());
|
||||||
else if (!encoder->hasFailure())
|
else if (encoder->hasFailure())
|
||||||
qWarning("System codec does not report failure when it should. Should report bug upstream.");
|
qWarning("System codec reports failure when it shouldn't. Should report bug upstream.");
|
||||||
}
|
}
|
||||||
|
|
||||||
QTEST_MAIN(tst_Utf8)
|
QTEST_MAIN(tst_Utf8)
|
||||||
|
@ -129,8 +129,8 @@ void loadInvalidUtf8Rows()
|
|||||||
|
|
||||||
void loadNonCharactersRows()
|
void loadNonCharactersRows()
|
||||||
{
|
{
|
||||||
// Unicode has a couple of "non-characters" that one can use internally,
|
// Unicode has a couple of "non-characters" that one can use internally
|
||||||
// but are not allowed to be used for text interchange.
|
// These characters are allowed for text-interchange (see http://www.unicode.org/versions/corrigendum9.html)
|
||||||
//
|
//
|
||||||
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
|
||||||
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
|
||||||
|
@ -964,8 +964,10 @@ void tst_QUrlInternal::encodingRecode_data()
|
|||||||
addUtf8Data("utf8-string-2", "\xDF\xBF\xE0\xA0\x80""A");
|
addUtf8Data("utf8-string-2", "\xDF\xBF\xE0\xA0\x80""A");
|
||||||
addUtf8Data("utf8-string-3", "\xE0\xA0\x80\xDF\xBF...");
|
addUtf8Data("utf8-string-3", "\xE0\xA0\x80\xDF\xBF...");
|
||||||
|
|
||||||
|
QTest::newRow("encode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
|
||||||
|
QTest::newRow("decode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::PrettyDecoded) << QString::fromUtf8("\xEF\xBF\xBF");
|
||||||
|
|
||||||
// special cases: stuff we can encode, but not decode
|
// special cases: stuff we can encode, but not decode
|
||||||
QTest::newRow("unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
|
|
||||||
QTest::newRow("unicode-lo-surrogate") << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80";
|
QTest::newRow("unicode-lo-surrogate") << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80";
|
||||||
QTest::newRow("unicode-hi-surrogate") << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80";
|
QTest::newRow("unicode-hi-surrogate") << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80";
|
||||||
|
|
||||||
@ -1011,9 +1013,6 @@ void tst_QUrlInternal::encodingRecodeInvalidUtf8_data()
|
|||||||
extern void loadInvalidUtf8Rows();
|
extern void loadInvalidUtf8Rows();
|
||||||
loadInvalidUtf8Rows();
|
loadInvalidUtf8Rows();
|
||||||
|
|
||||||
extern void loadNonCharactersRows();
|
|
||||||
loadNonCharactersRows();
|
|
||||||
|
|
||||||
QTest::newRow("utf8-mix-4") << QByteArray("\xE0.A2\x80");
|
QTest::newRow("utf8-mix-4") << QByteArray("\xE0.A2\x80");
|
||||||
QTest::newRow("utf8-mix-5") << QByteArray("\xE0\xA2.80");
|
QTest::newRow("utf8-mix-5") << QByteArray("\xE0\xA2.80");
|
||||||
QTest::newRow("utf8-mix-6") << QByteArray("\xE0\xA2\x33");
|
QTest::newRow("utf8-mix-6") << QByteArray("\xE0\xA2\x33");
|
||||||
|
@ -47,7 +47,8 @@
|
|||||||
#include "qjsondocument.h"
|
#include "qjsondocument.h"
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#define INVALID_UNICODE "\357\277\277" // "\uffff"
|
#define INVALID_UNICODE "\xCE\xBA\xE1"
|
||||||
|
#define UNICODE_NON_CHARACTER "\xEF\xBF\xBF"
|
||||||
#define UNICODE_DJE "\320\202" // Character from the Serbian Cyrillic alphabet
|
#define UNICODE_DJE "\320\202" // Character from the Serbian Cyrillic alphabet
|
||||||
|
|
||||||
class tst_QtJson: public QObject
|
class tst_QtJson: public QObject
|
||||||
@ -1305,6 +1306,19 @@ void tst_QtJson::fromJson()
|
|||||||
QCOMPARE(array.at(0).toBool(), true);
|
QCOMPARE(array.at(0).toBool(), true);
|
||||||
QCOMPARE(doc.toJson(), json);
|
QCOMPARE(doc.toJson(), json);
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
//regression test: test if unicode_control_characters are correctly decoded
|
||||||
|
QByteArray json = "[\n \"" UNICODE_NON_CHARACTER "\"\n]\n";
|
||||||
|
QJsonDocument doc = QJsonDocument::fromJson(json);
|
||||||
|
QVERIFY(!doc.isEmpty());
|
||||||
|
QCOMPARE(doc.isArray(), true);
|
||||||
|
QCOMPARE(doc.isObject(), false);
|
||||||
|
QJsonArray array = doc.array();
|
||||||
|
QCOMPARE(array.size(), 1);
|
||||||
|
QCOMPARE(array.at(0).type(), QJsonValue::String);
|
||||||
|
QCOMPARE(array.at(0).toString(), QString::fromUtf8(UNICODE_NON_CHARACTER));
|
||||||
|
QCOMPARE(doc.toJson(), json);
|
||||||
|
}
|
||||||
{
|
{
|
||||||
QByteArray json = "[]";
|
QByteArray json = "[]";
|
||||||
QJsonDocument doc = QJsonDocument::fromJson(json);
|
QJsonDocument doc = QJsonDocument::fromJson(json);
|
||||||
@ -1532,7 +1546,7 @@ void tst_QtJson::fromJsonErrors()
|
|||||||
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
|
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
|
||||||
QVERIFY(doc.isEmpty());
|
QVERIFY(doc.isEmpty());
|
||||||
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
|
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
|
||||||
QCOMPARE(error.offset, 13);
|
QCOMPARE(error.offset, 14);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
QJsonParseError error;
|
QJsonParseError error;
|
||||||
@ -1556,7 +1570,7 @@ void tst_QtJson::fromJsonErrors()
|
|||||||
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
|
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
|
||||||
QVERIFY(doc.isEmpty());
|
QVERIFY(doc.isEmpty());
|
||||||
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
|
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
|
||||||
QCOMPARE(error.offset, 14);
|
QCOMPARE(error.offset, 15);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
QJsonParseError error;
|
QJsonParseError error;
|
||||||
@ -1702,6 +1716,7 @@ void tst_QtJson::parseStrings()
|
|||||||
"abc\\tabc",
|
"abc\\tabc",
|
||||||
"abc\\u0019abc",
|
"abc\\u0019abc",
|
||||||
"abc" UNICODE_DJE "abc",
|
"abc" UNICODE_DJE "abc",
|
||||||
|
UNICODE_NON_CHARACTER
|
||||||
};
|
};
|
||||||
int size = sizeof(strings)/sizeof(const char *);
|
int size = sizeof(strings)/sizeof(const char *);
|
||||||
|
|
||||||
@ -1728,7 +1743,8 @@ void tst_QtJson::parseStrings()
|
|||||||
Pairs pairs [] = {
|
Pairs pairs [] = {
|
||||||
{ "abc\\/abc", "abc/abc" },
|
{ "abc\\/abc", "abc/abc" },
|
||||||
{ "abc\\u0402abc", "abc" UNICODE_DJE "abc" },
|
{ "abc\\u0402abc", "abc" UNICODE_DJE "abc" },
|
||||||
{ "abc\\u0065abc", "abceabc" }
|
{ "abc\\u0065abc", "abceabc" },
|
||||||
|
{ "abc\\uFFFFabc", "abc" UNICODE_NON_CHARACTER "abc" }
|
||||||
};
|
};
|
||||||
size = sizeof(pairs)/sizeof(Pairs);
|
size = sizeof(pairs)/sizeof(Pairs);
|
||||||
|
|
||||||
|
@ -315,8 +315,6 @@ void tst_QXmlSimpleReader::testGoodXmlFile()
|
|||||||
QVERIFY(file.open(QIODevice::ReadOnly));
|
QVERIFY(file.open(QIODevice::ReadOnly));
|
||||||
Parser parser;
|
Parser parser;
|
||||||
|
|
||||||
// static int i = 0;
|
|
||||||
// qWarning("Test nr: " + QString::number(i)); ++i;
|
|
||||||
QEXPECT_FAIL("xmldocs/valid/sa/089.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/valid/sa/089.xml", "", Continue);
|
||||||
QVERIFY(parser.parseFile(&file));
|
QVERIFY(parser.parseFile(&file));
|
||||||
|
|
||||||
@ -326,7 +324,6 @@ void tst_QXmlSimpleReader::testGoodXmlFile()
|
|||||||
ref_stream.setCodec("UTF-8");
|
ref_stream.setCodec("UTF-8");
|
||||||
QString ref_file_contents = ref_stream.readAll();
|
QString ref_file_contents = ref_stream.readAll();
|
||||||
|
|
||||||
QEXPECT_FAIL("xmldocs/valid/sa/089.xml", "", Continue);
|
|
||||||
QCOMPARE(parser.result(), ref_file_contents);
|
QCOMPARE(parser.result(), ref_file_contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -355,8 +352,6 @@ void tst_QXmlSimpleReader::testBadXmlFile()
|
|||||||
QVERIFY(file.open(QIODevice::ReadOnly));
|
QVERIFY(file.open(QIODevice::ReadOnly));
|
||||||
Parser parser;
|
Parser parser;
|
||||||
|
|
||||||
// static int i = 0;
|
|
||||||
// qWarning("Test nr: " + QString::number(++i));
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/030.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/030.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/031.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/031.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/032.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/032.xml", "", Continue);
|
||||||
@ -381,22 +376,17 @@ void tst_QXmlSimpleReader::testBadXmlFile()
|
|||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/132.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/132.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/142.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/142.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/143.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/143.xml", "", Continue);
|
||||||
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/144.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/144.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/145.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/145.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/146.xml", "", Abort);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/146.xml", "", Abort);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/160.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/160.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/162.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/162.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/166.xml", "", Continue);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/167.xml", "", Continue);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/168.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/168.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/169.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/169.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/170.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/170.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/171.xml", "", Abort);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/172.xml", "", Abort);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/173.xml", "", Abort);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/174.xml", "", Abort);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/175.xml", "", Abort);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/177.xml", "", Abort);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/180.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/180.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/181.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/181.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/182.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/182.xml", "", Continue);
|
||||||
@ -411,12 +401,7 @@ void tst_QXmlSimpleReader::testBadXmlFile()
|
|||||||
ref_stream.setCodec("UTF-8");
|
ref_stream.setCodec("UTF-8");
|
||||||
QString ref_file_contents = ref_stream.readAll();
|
QString ref_file_contents = ref_stream.readAll();
|
||||||
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/144.xml", "", Continue);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/145.xml", "", Continue);
|
QEXPECT_FAIL("xmldocs/not-wf/sa/145.xml", "", Continue);
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/146.xml", "", Continue);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/167.xml", "", Continue);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/166.xml", "", Continue);
|
|
||||||
QEXPECT_FAIL("xmldocs/not-wf/sa/170.xml", "", Continue);
|
|
||||||
|
|
||||||
QCOMPARE(parser.result(), ref_file_contents);
|
QCOMPARE(parser.result(), ref_file_contents);
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
setDocumentLocator(locator={columnNumber=1, lineNumber=1})
|
setDocumentLocator(locator={columnNumber=1, lineNumber=1})
|
||||||
startDocument()
|
startDocument()
|
||||||
startElement(namespaceURI="", localName="doc", qName="doc", atts=[])
|
startElement(namespaceURI="", localName="doc", qName="doc", atts=[])
|
||||||
characters(ch="<22><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>")
|
characters(ch="<22>")
|
||||||
endElement(namespaceURI="", localName="doc", qName="doc")
|
endElement(namespaceURI="", localName="doc", qName="doc")
|
||||||
endDocument()
|
endDocument()
|
||||||
|
@ -1980,16 +1980,15 @@ int fromUtf8_qt47(ushort *dst, const char *chars, int len)
|
|||||||
--need;
|
--need;
|
||||||
if (!need) {
|
if (!need) {
|
||||||
// utf-8 bom composes into 0xfeff code point
|
// utf-8 bom composes into 0xfeff code point
|
||||||
bool nonCharacter;
|
|
||||||
if (!headerdone && uc == 0xfeff) {
|
if (!headerdone && uc == 0xfeff) {
|
||||||
// don't do anything, just skip the BOM
|
// don't do anything, just skip the BOM
|
||||||
} else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
} else if (QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
||||||
// surrogate pair
|
// surrogate pair
|
||||||
//Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
//Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||||
*qch++ = QChar::highSurrogate(uc);
|
*qch++ = QChar::highSurrogate(uc);
|
||||||
*qch++ = QChar::lowSurrogate(uc);
|
*qch++ = QChar::lowSurrogate(uc);
|
||||||
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
|
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
|
||||||
// error: overlong sequence, UTF16 surrogate or non-character
|
// error: overlong sequence or UTF16 surrogate
|
||||||
*qch++ = replacement;
|
*qch++ = replacement;
|
||||||
++invalid;
|
++invalid;
|
||||||
} else {
|
} else {
|
||||||
@ -2086,16 +2085,15 @@ int fromUtf8_qt47_stateless(ushort *dst, const char *chars, int len)
|
|||||||
--need;
|
--need;
|
||||||
if (!need) {
|
if (!need) {
|
||||||
// utf-8 bom composes into 0xfeff code point
|
// utf-8 bom composes into 0xfeff code point
|
||||||
bool nonCharacter;
|
|
||||||
if (!headerdone && uc == 0xfeff) {
|
if (!headerdone && uc == 0xfeff) {
|
||||||
// don't do anything, just skip the BOM
|
// don't do anything, just skip the BOM
|
||||||
} else if (!(nonCharacter = QChar::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
} else if (QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
|
||||||
// surrogate pair
|
// surrogate pair
|
||||||
//Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
//Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
|
||||||
*qch++ = QChar::highSurrogate(uc);
|
*qch++ = QChar::highSurrogate(uc);
|
||||||
*qch++ = QChar::lowSurrogate(uc);
|
*qch++ = QChar::lowSurrogate(uc);
|
||||||
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || nonCharacter || uc > QChar::LastValidCodePoint) {
|
} else if ((uc < min_uc) || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
|
||||||
// error: overlong sequence, UTF16 surrogate or non-character
|
// error: overlong sequence or UTF16 surrogate
|
||||||
*qch++ = replacement;
|
*qch++ = replacement;
|
||||||
++invalid;
|
++invalid;
|
||||||
} else {
|
} else {
|
||||||
@ -2214,7 +2212,7 @@ static inline void extract_utf8_multibyte(ushort *&dst, const char *&chars, qptr
|
|||||||
chars += 2;
|
chars += 2;
|
||||||
len -= 2;
|
len -= 2;
|
||||||
if (!trusted &&
|
if (!trusted &&
|
||||||
(ucs < 0x800 || QChar::isNonCharacter(ucs) || QChar::isSurrogate(ucs)))
|
(ucs < 0x800 || QChar::isSurrogate(ucs)))
|
||||||
dst[counter] = QChar::ReplacementCharacter;
|
dst[counter] = QChar::ReplacementCharacter;
|
||||||
else
|
else
|
||||||
dst[counter] = ucs;
|
dst[counter] = ucs;
|
||||||
@ -2245,7 +2243,7 @@ static inline void extract_utf8_multibyte(ushort *&dst, const char *&chars, qptr
|
|||||||
// dst[counter] will correspond to chars[counter..counter+2], so adjust
|
// dst[counter] will correspond to chars[counter..counter+2], so adjust
|
||||||
chars += 3;
|
chars += 3;
|
||||||
len -= 3;
|
len -= 3;
|
||||||
if (trusted || (QChar::requiresSurrogates(ucs) && ucs <= QChar::LastValidCodePoint && !QChar::isNonCharacter(ucs))) {
|
if (trusted || (QChar::requiresSurrogates(ucs) && ucs <= QChar::LastValidCodePoint)) {
|
||||||
dst[counter + 0] = QChar::highSurrogate(ucs);
|
dst[counter + 0] = QChar::highSurrogate(ucs);
|
||||||
dst[counter + 1] = QChar::lowSurrogate(ucs);
|
dst[counter + 1] = QChar::lowSurrogate(ucs);
|
||||||
counter += 2;
|
counter += 2;
|
||||||
|
Loading…
Reference in New Issue
Block a user