QCborStreamReader: use QByteArray directly if possible

QIODevice represents considreable overhead, even with just QBuffer, for
parsing simple things. Benchmarking showed it was spending 25% of the
parsing time inside one QIODevice function or another. So this commit
accomplishes two things:

1) it increases the buffer size from 9 bytes to up to 256, which should
   reduce the number of calls into the QIODevice
2) if the source data is a QByteArray, then use it directly and bypass
   the QIODevice, thus increasing performance considerably

Change-Id: I56b444f9d6274221a3b7fffd150c531c9d28e54b
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
Thiago Macieira 2018-01-22 19:35:56 -08:00
parent adf50269e7
commit 91e1356335
2 changed files with 289 additions and 73 deletions

View File

@ -1840,54 +1840,93 @@ bool QCborStreamWriter::endMap()
class QCborStreamReaderPrivate
{
public:
enum {
// 9 bytes is the maximum size for any integer, floating point or
// length in CBOR.
MaxCborIndividualSize = 9,
IdealIoBufferSize = 256
};
struct ChunkParameters {
qsizetype offset;
qsizetype size;
};
QIODevice *device;
QByteArray buffer;
QStack<CborValue> containerStack;
CborParser parser;
CborValue currentElement;
QStack<CborValue> containerStack;
QCborError lastError = {};
bool deleteDevice = false;
QByteArray::size_type bufferStart;
bool corrupt = false;
quint8 buflen = 0;
char buffer[sizeof(qint64) + 1];
QCborStreamReaderPrivate(const QByteArray &data)
: device(new QBuffer), deleteDevice(true)
: device(nullptr), buffer(data)
{
static_cast<QBuffer *>(device)->setData(data);
device->open(QIODevice::ReadWrite | QIODevice::Unbuffered);
initDecoder();
}
QCborStreamReaderPrivate(QIODevice *device)
: device(device)
{
initDecoder();
setDevice(device);
}
~QCborStreamReaderPrivate()
{
if (deleteDevice)
delete device;
}
void setDevice(QIODevice *dev)
{
buffer.clear();
device = dev;
initDecoder();
}
void initDecoder()
{
containerStack.clear();
bufferStart = 0;
if (device) {
buffer.clear();
buffer.reserve(IdealIoBufferSize); // sets the CapacityReserved flag
}
preread();
if (CborError err = cbor_parser_init_reader(nullptr, &parser, &currentElement, this))
handleError(err);
}
char *bufferPtr()
{
Q_ASSERT(buffer.isDetached());
return const_cast<char *>(buffer.constData()) + bufferStart;
}
void preread()
{
// Read up to 9 bytes into our internal buffer
// (this is enough for all CBOR types except strings).
buflen = device->peek(buffer, sizeof(buffer));
if (device && buffer.size() - bufferStart < MaxCborIndividualSize) {
// load more, but only if there's more to be read
qint64 avail = device->bytesAvailable();
Q_ASSERT(avail >= buffer.size());
if (avail == buffer.size())
return;
if (bufferStart)
device->skip(bufferStart); // skip what we've already parsed
if (buffer.size() != IdealIoBufferSize)
buffer.resize(IdealIoBufferSize);
bufferStart = 0;
qint64 read = device->peek(bufferPtr(), IdealIoBufferSize);
if (read < 0)
buffer.clear();
else if (read != IdealIoBufferSize)
buffer.truncate(read);
}
}
void handleError(CborError err) Q_DECL_NOTHROW
@ -1902,6 +1941,27 @@ public:
lastError = { QCborError::Code(err) };
}
void updateBufferAfterString(ChunkParameters params)
{
Q_ASSERT(device);
bufferStart += params.offset;
qsizetype newStart = bufferStart + params.size;
qsizetype remainingInBuffer = buffer.size() - newStart;
if (remainingInBuffer <= 0) {
// We've read from the QIODevice more than what was in the buffer.
buffer.truncate(0);
} else {
// There's still data buffered, but we need to move it around.
char *ptr = buffer.data();
memmove(ptr, ptr + newStart, remainingInBuffer);
buffer.truncate(remainingInBuffer);
}
bufferStart = 0;
}
ChunkParameters getStringChunkParameters();
};
@ -1912,16 +1972,20 @@ void qt_cbor_stream_set_error(QCborStreamReaderPrivate *d, QCborError error)
static inline bool qt_cbor_decoder_can_read(void *token, size_t len)
{
Q_ASSERT(len <= QCborStreamReaderPrivate::MaxCborIndividualSize);
auto self = static_cast<QCborStreamReaderPrivate *>(token);
qint64 avail = self->device->bytesAvailable();
return avail >= 0 && len <= quint64(avail);
qint64 avail = self->buffer.size() - self->bufferStart;
return len <= quint64(avail);
}
static void qt_cbor_decoder_advance(void *token, size_t len)
{
Q_ASSERT(len <= QCborStreamReaderPrivate::MaxCborIndividualSize);
auto self = static_cast<QCborStreamReaderPrivate *>(token);
Q_ASSERT(len <= self->buflen);
self->device->skip(qint64(len));
Q_ASSERT(len <= size_t(self->buffer.size() - self->bufferStart));
self->bufferStart += int(len);
self->preread();
}
@ -1929,17 +1993,17 @@ static void *qt_cbor_decoder_read(void *token, void *userptr, size_t offset, siz
{
Q_ASSERT(len == 1 || len == 2 || len == 4 || len == 8);
Q_ASSERT(offset == 0 || offset == 1);
auto self = static_cast<QCborStreamReaderPrivate *>(token);
auto self = static_cast<const QCborStreamReaderPrivate *>(token);
// we must have pre-read the data
Q_ASSERT(len <= self->buflen);
return memcpy(userptr, self->buffer + offset, len);
Q_ASSERT(len + offset <= size_t(self->buffer.size() - self->bufferStart));
return memcpy(userptr, self->buffer.constData() + self->bufferStart + offset, len);
}
static CborError qt_cbor_decoder_transfer_string(void *token, const void **userptr, size_t offset, size_t len)
{
auto self = static_cast<QCborStreamReaderPrivate *>(token);
Q_ASSERT(offset <= sizeof(self->buffer));
Q_ASSERT(offset <= size_t(self->buffer.size()));
Q_STATIC_ASSERT(sizeof(size_t) >= sizeof(QByteArray::size_type));
Q_STATIC_ASSERT(sizeof(size_t) == sizeof(qsizetype));
@ -1950,13 +2014,12 @@ static CborError qt_cbor_decoder_transfer_string(void *token, const void **userp
|| add_overflow<qsizetype>(offset, len, &total))
return CborErrorDataTooLarge;
qint64 avail = self->device->bytesAvailable();
if (total > avail)
return CborErrorUnexpectedEOF;
// our string transfer is just saving the offset to the userptr
*userptr = reinterpret_cast<void *>(offset);
return CborNoError;
qint64 avail = (self->device ? self->device->bytesAvailable() : self->buffer.size()) -
self->bufferStart;
return total > avail ? CborErrorUnexpectedEOF : CborNoError;
}
QCborStreamReaderPrivate::ChunkParameters QCborStreamReaderPrivate::getStringChunkParameters()
@ -2000,7 +2063,7 @@ inline void QCborStreamReader::preparse()
// Null and Undefined).
if (type_ == CborBooleanType || type_ == CborNullType || type_ == CborUndefinedType) {
type_ = SimpleType;
value64 = quint8(d->buffer[0]) - CborSimpleType;
value64 = quint8(d->buffer.at(d->bufferStart)) - CborSimpleType;
} else {
// Using internal TinyCBOR API!
value64 = _cbor_value_extract_int64_helper(&d->currentElement);
@ -2088,11 +2151,7 @@ QCborStreamReader::~QCborStreamReader()
*/
void QCborStreamReader::setDevice(QIODevice *device)
{
if (d->deleteDevice)
delete d->device;
d->device = device;
d->deleteDevice = false;
d->initDecoder();
d->setDevice(device);
preparse();
}
@ -2103,8 +2162,7 @@ void QCborStreamReader::setDevice(QIODevice *device)
*/
QIODevice *QCborStreamReader::device() const
{
// don't return our own, internal QBuffer
return d->deleteDevice ? nullptr : d->device;
return d->device;
}
/*!
@ -2137,15 +2195,10 @@ void QCborStreamReader::addData(const QByteArray &data)
*/
void QCborStreamReader::addData(const char *data, qsizetype len)
{
if (d->deleteDevice) {
if (len > 0) {
auto buf = static_cast<QBuffer *>(d->device);
qint64 currentPos = buf->pos();
buf->seek(buf->size());
buf->write(data, len);
buf->seek(currentPos);
reparse();
}
if (!d->device) {
if (len > 0)
d->buffer.append(data, len);
reparse();
} else {
qWarning("QCborStreamReader: addData() with device()");
}
@ -2180,10 +2233,7 @@ void QCborStreamReader::reparse()
*/
void QCborStreamReader::clear()
{
auto buf = new QBuffer;
buf->setData(QByteArray());
setDevice(buf);
d->deleteDevice = true;
setDevice(nullptr);
}
/*!
@ -2201,7 +2251,8 @@ void QCborStreamReader::clear()
*/
void QCborStreamReader::reset()
{
d->device->reset();
if (d->device)
d->device->reset();
d->lastError = {};
d->initDecoder();
preparse();
@ -2228,7 +2279,7 @@ QCborError QCborStreamReader::lastError()
*/
qint64 QCborStreamReader::currentOffset() const
{
return d->device->pos();
return (d->device ? d->device->pos() : 0) + d->bufferStart;
}
/*!
@ -2749,22 +2800,31 @@ QCborStreamReader::readStringChunk(char *ptr, qsizetype maxlen)
// Read the chunk into the user's buffer.
qsizetype toRead = qMin(maxlen, params.size);
qsizetype left = params.size - maxlen;
qint64 actuallyRead;
// This first skip can't fail because we've already read this many bytes.
d->device->skip(params.offset);
if (d->device) {
// This first skip can't fail because we've already read this many bytes.
d->device->skip(d->bufferStart + params.offset);
actuallyRead = d->device->read(ptr, toRead);
qint64 actuallyRead = d->device->read(ptr, toRead);
if (actuallyRead != toRead) {
actuallyRead = -1;
} else if (left) {
qint64 skipped = d->device->skip(left);
if (skipped != left)
if (actuallyRead != toRead) {
actuallyRead = -1;
}
} else if (left) {
qint64 skipped = d->device->skip(left);
if (skipped != left)
actuallyRead = -1;
}
if (actuallyRead < 0) {
d->handleError(CborErrorIO);
return result;
if (actuallyRead < 0) {
d->handleError(CborErrorIO);
return result;
}
d->updateBufferAfterString(params);
} else {
actuallyRead = toRead;
memcpy(ptr, d->buffer.constData() + d->bufferStart + params.offset, toRead);
d->bufferStart += QByteArray::size_type(params.offset + params.size);
}
d->preread();

View File

@ -45,7 +45,10 @@ class tst_QCborStreamReader : public QObject
Q_OBJECT
private Q_SLOTS:
void initTestCase_data();
void basics();
void clear_data();
void clear();
void integers_data();
void integers();
void fixed_data();
@ -113,11 +116,27 @@ QT_END_NAMESPACE
// Get the data from TinyCBOR (see src/3rdparty/tinycbor/tests/parser/data.cpp)
#include "data.cpp"
void tst_QCborStreamReader::initTestCase_data()
{
QTest::addColumn<bool>("useDevice");
QTest::newRow("QByteArray") << false;
QTest::newRow("QBuffer") << true;
}
void tst_QCborStreamReader::basics()
{
QFETCH_GLOBAL(bool, useDevice);
QBuffer buffer;
QCborStreamReader reader;
QCOMPARE(reader.device(), nullptr);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
QVERIFY(reader.device() != nullptr);
} else {
QCOMPARE(reader.device(), nullptr);
}
QCOMPARE(reader.currentOffset(), 0);
QCOMPARE(reader.lastError(), QCborError::EndOfFile);
@ -149,11 +168,15 @@ void tst_QCborStreamReader::basics()
QVERIFY(reader.isLengthKnown()); // well, it's not unknown
QCOMPARE(reader.length(), ~0ULL);
reader.addData(QByteArray());
reader.reparse();
if (useDevice) {
reader.reparse();
QVERIFY(reader.device() != nullptr);
} else {
reader.addData(QByteArray());
QCOMPARE(reader.device(), nullptr);
}
// nothing changes, we added nothing
QCOMPARE(reader.device(), nullptr);
QCOMPARE(reader.currentOffset(), 0);
QCOMPARE(reader.lastError(), QCborError::EndOfFile);
@ -183,6 +206,51 @@ void tst_QCborStreamReader::basics()
QCOMPARE(reader.parentContainerType(), QCborStreamReader::Invalid);
QVERIFY(!reader.hasNext());
QVERIFY(!reader.next());
reader.clear();
QCOMPARE(reader.device(), nullptr);
QCOMPARE(reader.currentOffset(), 0);
QCOMPARE(reader.lastError(), QCborError::EndOfFile);
}
void tst_QCborStreamReader::clear_data()
{
QTest::addColumn<QByteArray>("data");
QTest::addColumn<QCborError>("firstError");
QTest::addColumn<int>("offsetAfterSkip");
QTest::newRow("invalid") << QByteArray(512, '\xff') << QCborError{QCborError::UnexpectedBreak} << 0;
QTest::newRow("valid") << QByteArray(512, '\0') << QCborError{QCborError::NoError} << 0;
QTest::newRow("skipped") << QByteArray(512, '\0') << QCborError{QCborError::NoError} << 1;
}
void tst_QCborStreamReader::clear()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QFETCH(QCborError, firstError);
QFETCH(int, offsetAfterSkip);
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
QCOMPARE(reader.isValid(), !firstError);
QCOMPARE(reader.currentOffset(), 0);
QCOMPARE(reader.lastError(), firstError);
if (offsetAfterSkip) {
reader.next();
QVERIFY(!reader.isValid());
QCOMPARE(reader.currentOffset(), 1);
QCOMPARE(reader.lastError(), QCborError::NoError);
}
reader.clear();
QCOMPARE(reader.device(), nullptr);
QCOMPARE(reader.currentOffset(), 0);
QCOMPARE(reader.lastError(), QCborError::EndOfFile);
}
void tst_QCborStreamReader::integers_data()
@ -192,6 +260,7 @@ void tst_QCborStreamReader::integers_data()
void tst_QCborStreamReader::integers()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QFETCH(bool, isNegative);
QFETCH(quint64, expectedRaw);
@ -199,7 +268,12 @@ void tst_QCborStreamReader::integers()
QFETCH(bool, inInt64Range);
quint64 absolute = (isNegative ? expectedRaw + 1 : expectedRaw);
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
QVERIFY(reader.isValid());
QCOMPARE(reader.lastError(), QCborError::NoError);
QVERIFY(reader.isInteger());
@ -525,11 +599,17 @@ void tst_QCborStreamReader::fixed_data()
void tst_QCborStreamReader::fixed()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QFETCH(QString, expected);
removeIndicators(expected);
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
QVERIFY(reader.isValid());
QCOMPARE(reader.lastError(), QCborError::NoError);
QCOMPARE(parseOne(reader), expected);
@ -560,9 +640,17 @@ void tst_QCborStreamReader::strings()
QFETCH(QByteArray, data);
QFETCH(QString, expected);
QFETCH_GLOBAL(bool, useDevice);
bool isChunked = expected.startsWith('(');
QBuffer buffer(&data), controlBuffer(&data);
QCborStreamReader reader(data), controlReader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
controlBuffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
controlReader.setDevice(&controlBuffer);
}
QVERIFY(reader.isString() || reader.isByteArray());
QCOMPARE(reader.isLengthKnown(), !isChunked);
@ -618,11 +706,17 @@ void tst_QCborStreamReader::emptyContainers_data()
void tst_QCborStreamReader::emptyContainers()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QFETCH(QString, expected);
removeIndicators(expected);
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
QVERIFY(reader.isValid());
QCOMPARE(reader.lastError(), QCborError::NoError);
if (reader.isLengthKnown())
@ -649,7 +743,15 @@ void tst_QCborStreamReader::arrays_data()
static void checkContainer(int len, const QByteArray &data, const QString &expected)
{
QFETCH_GLOBAL(bool, useDevice);
QByteArray copy = data;
QBuffer buffer(&copy);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
QVERIFY(reader.isValid());
QCOMPARE(reader.lastError(), QCborError::NoError);
if (len >= 0) {
@ -749,8 +851,15 @@ void tst_QCborStreamReader::next()
{
QFETCH(QByteArray, data);
auto doit = [](const QByteArray &data) {
auto doit = [](QByteArray data) {
QFETCH_GLOBAL(bool, useDevice);
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
return reader.next();
};
@ -774,9 +883,15 @@ void tst_QCborStreamReader::validation_data()
void tst_QCborStreamReader::validation()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
parseOne(reader);
QVERIFY(reader.lastError() != QCborError::NoError);
@ -830,9 +945,16 @@ void tst_QCborStreamReader::recursionLimit_data()
void tst_QCborStreamReader::recursionLimit()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QCborStreamReader reader('\x81' + data);
data.prepend('\x81');
QBuffer buffer(&data);
QCborStreamReader reader(data);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
// verify that it works normally:
QVERIFY(reader.enterContainer());
@ -854,20 +976,38 @@ void tst_QCborStreamReader::addData_singleElement_data()
void tst_QCborStreamReader::addData_singleElement()
{
QFETCH_GLOBAL(bool, useDevice);
QFETCH(QByteArray, data);
QFETCH(QString, expected);
removeIndicators(expected);
QByteArray growing;
QBuffer buffer(&growing);
QCborStreamReader reader;
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
for (int i = 0; i < data.size() - 1; ++i) {
// add one byte from the data
reader.addData(data.constData() + i, 1);
if (useDevice) {
growing.append(data.at(i));
reader.reparse();
} else {
reader.addData(data.constData() + i, 1);
}
parseOne(reader);
QCOMPARE(reader.lastError(), QCborError::EndOfFile);
}
// add the last byte
reader.addData(data.right(1));
if (useDevice) {
growing.append(data.right(1));
reader.reparse();
} else {
reader.addData(data.right(1));
}
QCOMPARE(reader.lastError(), QCborError::NoError);
QCOMPARE(parseOne(reader), expected);
}
@ -878,13 +1018,22 @@ void tst_QCborStreamReader::addData_complex()
QFETCH(QString, expected);
removeIndicators(expected);
// transform tags (parseNonRecursive() can't produce the usual form)
// transform tags (parseNonRecursive can't produce the usual form)
expected.replace(QRegularExpression(R"/((\d+)\(([^)]+)\))/"), "Tag:\\1:\\2");
auto doit = [](const QByteArray &data) {
QFETCH_GLOBAL(bool, useDevice);
// start with one byte
int added = 1;
QCborStreamReader reader(data.constData(), 1);
QByteArray growing = data.left(added);
QBuffer buffer(&growing);
QCborStreamReader reader(growing);
if (useDevice) {
buffer.open(QIODevice::ReadOnly);
reader.setDevice(&buffer);
}
QString result;
bool printingStringChunks = false;
forever {
@ -897,7 +1046,14 @@ void tst_QCborStreamReader::addData_complex()
// add more data
if (added == data.size())
return QStringLiteral("Couldn't parse even with all data");
reader.addData(data.constData() + added++, 1);
if (useDevice) {
growing.append(data.at(added));
reader.reparse();
} else {
reader.addData(data.constData() + added, 1);
}
++added;
}
}
};