Use the new UTF-8 codec in QJsonDocument

The encoder is in qjsonwriter.cpp, which requires special handling for
ASCII due to the use of escape sequences. The decoder is in
qjsonparser.cpp, which only scan one character at a time.

As a side-effect, the JSON parser now reports the UTF-8 error in the
first character with error, instead of the last. This is probably what
should have been expected.

Change-Id: I52e5bc30d71466b6a36098b4150c61b2e385d8e9
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Thiago Macieira 2013-10-19 23:04:47 -04:00 committed by The Qt Project
parent 8dd47e34b9
commit cd750c86d6
4 changed files with 25 additions and 81 deletions

View File

@ -108,6 +108,11 @@ struct QUtf8BaseTraits
{ *ptr++ = uc; }
};
struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits
{
static const bool skipAsciiHandling = true;
};
namespace QUtf8Functions
{
/// returns 0 on success; errors can only happen if \a u is a surrogate:

View File

@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
** Copyright (C) 2013 Intel Corporation
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -45,6 +46,7 @@
#include <qdebug.h>
#include "qjsonparser_p.h"
#include "qjson_p.h"
#include "private/qutfcodec_p.h"
//#define PARSER_DEBUG
#ifdef PARSER_DEBUG
@ -820,45 +822,16 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *
static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
{
int need;
uint min_uc;
uint uc;
uchar ch = *json++;
if (ch < 128) {
*result = ch;
return true;
} else if ((ch & 0xe0) == 0xc0) {
uc = ch & 0x1f;
need = 1;
min_uc = 0x80;
} else if ((ch & 0xf0) == 0xe0) {
uc = ch & 0x0f;
need = 2;
min_uc = 0x800;
} else if ((ch&0xf8) == 0xf0) {
uc = ch & 0x07;
need = 3;
min_uc = 0x10000;
} else {
const uchar *&src = reinterpret_cast<const uchar *&>(json);
const uchar *uend = reinterpret_cast<const uchar *>(end);
uchar b = *src++;
int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, result, src, uend);
if (res < 0) {
// decoding error, backtrack the character we read above
--json;
return false;
}
if (json >= end - need)
return false;
for (int i = 0; i < need; ++i) {
ch = *json++;
if ((ch&0xc0) != 0x80)
return false;
uc = (uc << 6) | (ch & 0x3f);
}
if (uc < min_uc ||
QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
return false;
}
*result = uc;
return true;
}

View File

@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
** Copyright (C) 2013 Intel Corporation
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -41,6 +42,7 @@
#include "qjsonwriter_p.h"
#include "qjson_p.h"
#include "private/qutfcodec_p.h"
QT_BEGIN_NAMESPACE
@ -59,15 +61,12 @@ static QByteArray escapedString(const QString &s)
const uchar replacement = '?';
QByteArray ba(s.length(), Qt::Uninitialized);
uchar *cursor = (uchar *)ba.data();
uchar *cursor = reinterpret_cast<uchar *>(const_cast<char *>(ba.constData()));
const uchar *ba_end = cursor + ba.length();
const ushort *src = reinterpret_cast<const ushort *>(s.constBegin());
const ushort *const end = reinterpret_cast<const ushort *>(s.constEnd());
const QChar *ch = (const QChar *)s.constData();
const QChar *end = ch + s.length();
int surrogate_high = -1;
while (ch < end) {
while (src != end) {
if (cursor >= ba_end - 6) {
// ensure we have enough space
int pos = cursor - (const uchar *)ba.constData();
@ -76,29 +75,7 @@ static QByteArray escapedString(const QString &s)
ba_end = (const uchar *)ba.constData() + ba.length();
}
uint u = ch->unicode();
if (surrogate_high >= 0) {
if (ch->isLowSurrogate()) {
u = QChar::surrogateToUcs4(surrogate_high, u);
surrogate_high = -1;
} else {
// high surrogate without low
*cursor = replacement;
++ch;
surrogate_high = -1;
continue;
}
} else if (ch->isLowSurrogate()) {
// low surrogate without high
*cursor = replacement;
++ch;
continue;
} else if (ch->isHighSurrogate()) {
surrogate_high = u;
++ch;
continue;
}
uint u = *src++;
if (u < 0x80) {
if (u < 0x20 || u == 0x22 || u == 0x5c) {
*cursor++ = '\\';
@ -135,20 +112,9 @@ static QByteArray escapedString(const QString &s)
*cursor++ = (uchar)u;
}
} else {
if (u < 0x0800) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
if (QChar::requiresSurrogates(u)) {
*cursor++ = 0xf0 | ((uchar) (u >> 18));
*cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
} else {
*cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
}
*cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f);
}
*cursor++ = 0x80 | ((uchar) (u&0x3f));
if (QUtf8Functions::toUtf8<QUtf8BaseTraits>(u, cursor, src, end) < 0)
*cursor++ = replacement;
}
++ch;
}
ba.resize(cursor - (const uchar *)ba.constData());

View File

@ -1546,7 +1546,7 @@ void tst_QtJson::fromJsonErrors()
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
QVERIFY(doc.isEmpty());
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
QCOMPARE(error.offset, 14);
QCOMPARE(error.offset, 12);
}
{
QJsonParseError error;
@ -1570,7 +1570,7 @@ void tst_QtJson::fromJsonErrors()
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
QVERIFY(doc.isEmpty());
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
QCOMPARE(error.offset, 15);
QCOMPARE(error.offset, 13);
}
{
QJsonParseError error;