diff --git a/src/network/access/qdecompresshelper.cpp b/src/network/access/qdecompresshelper.cpp index 451684bf1b..f3227efe8a 100644 --- a/src/network/access/qdecompresshelper.cpp +++ b/src/network/access/qdecompresshelper.cpp @@ -237,7 +237,13 @@ void QDecompressHelper::setCountingBytesEnabled(bool shouldCount) qint64 QDecompressHelper::uncompressedSize() const { Q_ASSERT(countDecompressed); - return uncompressedBytes; + // Use the 'totalUncompressedBytes' from the countHelper if it exceeds the amount of bytes + // that we know about. + auto totalUncompressed = + countHelper && countHelper->totalUncompressedBytes > totalUncompressedBytes + ? countHelper->totalUncompressedBytes + : totalUncompressedBytes; + return totalUncompressed - totalBytesRead; } /*! @@ -262,10 +268,9 @@ void QDecompressHelper::feed(QByteArray &&data) { Q_ASSERT(contentEncoding != None); totalCompressedBytes += data.size(); - if (!countInternal(data)) + compressedDataBuffer.append(std::move(data)); + if (!countInternal(compressedDataBuffer[compressedDataBuffer.bufferCount() - 1])) clear(); // If our counting brother failed then so will we :| - else - compressedDataBuffer.append(std::move(data)); } /*! @@ -276,10 +281,9 @@ void QDecompressHelper::feed(const QByteDataBuffer &buffer) { Q_ASSERT(contentEncoding != None); totalCompressedBytes += buffer.byteAmount(); + compressedDataBuffer.append(buffer); if (!countInternal(buffer)) clear(); // If our counting brother failed then so will we :| - else - compressedDataBuffer.append(buffer); } /*! @@ -290,10 +294,10 @@ void QDecompressHelper::feed(QByteDataBuffer &&buffer) { Q_ASSERT(contentEncoding != None); totalCompressedBytes += buffer.byteAmount(); - if (!countInternal(buffer)) + const QByteDataBuffer copy(buffer); + compressedDataBuffer.append(std::move(buffer)); + if (!countInternal(copy)) clear(); // If our counting brother failed then so will we :| - else - compressedDataBuffer.append(std::move(buffer)); } /*! @@ -303,19 +307,34 @@ void QDecompressHelper::feed(QByteDataBuffer &&buffer) This lets us know the final size, unfortunately at the cost of increased computation. - Potential @future improvement: - Decompress XX MiB/KiB before starting the count. - For smaller files the extra decompression can then be avoided. + To save on some of the computation we will store the data until + we reach \c MaxDecompressedDataBufferSize stored. In this case the + "penalty" is completely removed from users who read the data on + readyRead rather than waiting for it all to be received. And + any file smaller than \c MaxDecompressedDataBufferSize will + avoid this issue as well. */ bool QDecompressHelper::countInternal() { Q_ASSERT(countDecompressed); + while (hasDataInternal() + && decompressedDataBuffer.byteAmount() < MaxDecompressedDataBufferSize) { + const qsizetype toRead = 256 * 1024; + QByteArray buffer(toRead, Qt::Uninitialized); + qsizetype bytesRead = readInternal(buffer.data(), buffer.size()); + if (bytesRead == -1) + return false; + buffer.truncate(bytesRead); + decompressedDataBuffer.append(std::move(buffer)); + } + if (!hasDataInternal()) + return true; // handled all the data so far, just return + while (countHelper->hasData()) { std::array temp; qsizetype bytesRead = countHelper->read(temp.data(), temp.size()); if (bytesRead == -1) return false; - uncompressedBytes += bytesRead; } return true; } @@ -358,13 +377,45 @@ bool QDecompressHelper::countInternal(const QByteDataBuffer &buffer) qsizetype QDecompressHelper::read(char *data, qsizetype maxSize) { + if (maxSize <= 0) + return 0; + if (!isValid()) return -1; - qsizetype bytesRead = -1; if (!hasData()) return 0; + qsizetype cachedRead = 0; + if (!decompressedDataBuffer.isEmpty()) { + cachedRead = decompressedDataBuffer.read(data, maxSize); + data += cachedRead; + maxSize -= cachedRead; + } + + qsizetype bytesRead = readInternal(data, maxSize); + if (bytesRead == -1) + return -1; + totalBytesRead += bytesRead + cachedRead; + return bytesRead + cachedRead; +} + +/*! + \internal + Like read() but without attempting to read the + cached/already-decompressed data. +*/ +qsizetype QDecompressHelper::readInternal(char *data, qsizetype maxSize) +{ + Q_ASSERT(isValid()); + + if (maxSize <= 0) + return 0; + + if (!hasDataInternal()) + return 0; + + qsizetype bytesRead = -1; switch (contentEncoding) { case None: Q_UNREACHABLE(); @@ -382,8 +433,6 @@ qsizetype QDecompressHelper::read(char *data, qsizetype maxSize) } if (bytesRead == -1) clear(); - else if (countDecompressed) - uncompressedBytes -= bytesRead; totalUncompressedBytes += bytesRead; if (isPotentialArchiveBomb()) @@ -449,6 +498,16 @@ bool QDecompressHelper::isPotentialArchiveBomb() const read 0 bytes. This most likely means the decompression is done. */ bool QDecompressHelper::hasData() const +{ + return hasDataInternal() || !decompressedDataBuffer.isEmpty(); +} + +/*! + \internal + Like hasData() but internally the buffer of decompressed data is + not interesting. +*/ +bool QDecompressHelper::hasDataInternal() const { return encodedBytesAvailable() || decoderHasData; } @@ -497,11 +556,12 @@ void QDecompressHelper::clear() contentEncoding = None; compressedDataBuffer.clear(); + decompressedDataBuffer.clear(); decoderHasData = false; countDecompressed = false; countHelper.reset(); - uncompressedBytes = 0; + totalBytesRead = 0; totalUncompressedBytes = 0; totalCompressedBytes = 0; } diff --git a/src/network/access/qdecompresshelper_p.h b/src/network/access/qdecompresshelper_p.h index 33241e14f1..b0b60b2119 100644 --- a/src/network/access/qdecompresshelper_p.h +++ b/src/network/access/qdecompresshelper_p.h @@ -98,6 +98,8 @@ public: private: bool isPotentialArchiveBomb() const; + bool hasDataInternal() const; + qsizetype readInternal(char *data, qsizetype maxSize); bool countInternal(); bool countInternal(const QByteArray &data); @@ -111,16 +113,18 @@ private: qsizetype readZstandard(char *data, qsizetype maxSize); QByteDataBuffer compressedDataBuffer; + QByteDataBuffer decompressedDataBuffer; + const qsizetype MaxDecompressedDataBufferSize = 10 * 1024 * 1024; bool decoderHasData = false; bool countDecompressed = false; std::unique_ptr countHelper; - qint64 uncompressedBytes = 0; // Used for calculating the ratio qint64 archiveBombCheckThreshold = 10 * 1024 * 1024; qint64 totalUncompressedBytes = 0; qint64 totalCompressedBytes = 0; + qint64 totalBytesRead = 0; ContentEncoding contentEncoding = None; diff --git a/tests/auto/network/access/qdecompresshelper/tst_qdecompresshelper.cpp b/tests/auto/network/access/qdecompresshelper/tst_qdecompresshelper.cpp index c9849fdcdf..321d373a78 100644 --- a/tests/auto/network/access/qdecompresshelper/tst_qdecompresshelper.cpp +++ b/tests/auto/network/access/qdecompresshelper/tst_qdecompresshelper.cpp @@ -347,20 +347,24 @@ void tst_QDecompressHelper::decompressBigData_data() QTest::addColumn("encoding"); QTest::addColumn("path"); QTest::addColumn("size"); + QTest::addColumn("countAhead"); qint64 fourGiB = 4ll * 1024ll * 1024ll * 1024ll; qint64 fiveGiB = 5ll * 1024ll * 1024ll * 1024ll; - QTest::newRow("gzip-4G") << QByteArray("gzip") << QString(":/4G.gz") << fourGiB; + // Only use countAhead on one of these since they share codepath anyway + QTest::newRow("gzip-counted-4G") << QByteArray("gzip") << QString(":/4G.gz") << fourGiB << true; QTest::newRow("deflate-5G") << QByteArray("deflate") << QString(":/5GiB.txt.inflate") - << fiveGiB; + << fiveGiB << false; #if QT_CONFIG(brotli) - QTest::newRow("brotli-4G") << QByteArray("br") << (srcDir + "/4G.br") << fourGiB; + QTest::newRow("brotli-4G") << QByteArray("br") << (srcDir + "/4G.br") << fourGiB << false; + QTest::newRow("brotli-counted-4G") << QByteArray("br") << (srcDir + "/4G.br") << fourGiB << true; #endif #if QT_CONFIG(zstd) - QTest::newRow("zstandard-4G") << QByteArray("zstd") << (":/4G.zst") << fourGiB; + QTest::newRow("zstandard-4G") << QByteArray("zstd") << (":/4G.zst") << fourGiB << false; + QTest::newRow("zstandard-counted-4G") << QByteArray("zstd") << (":/4G.zst") << fourGiB << true; #endif } @@ -373,16 +377,20 @@ void tst_QDecompressHelper::decompressBigData() const qint64 third = file.bytesAvailable() / 3; QDecompressHelper helper; + QFETCH(bool, countAhead); + helper.setCountingBytesEnabled(countAhead); helper.setDecompressedSafetyCheckThreshold(-1); QFETCH(QByteArray, encoding); helper.setEncoding(encoding); - QByteArray output(32 * 1024, Qt::Uninitialized); + // The size of 'output' should be at least QDecompressHelper::MaxDecompressedDataBufferSize + 1 + QByteArray output(10 * 1024 * 1024 + 1, Qt::Uninitialized); qint64 totalSize = 0; while (!file.atEnd()) { helper.feed(file.read(third)); while (helper.hasData()) { qsizetype bytesRead = helper.read(output.data(), output.size()); + QVERIFY(bytesRead >= 0); QVERIFY(bytesRead <= output.size()); totalSize += bytesRead; const auto isZero = [](char c) { return c == '\0'; }; @@ -449,10 +457,10 @@ void tst_QDecompressHelper::bigZlib() helper.feed(compressedData.mid(firstHalf.size())); // We need the whole thing in one go... which is why this test is not available for 32-bit - qint64 expected = 5ll * 1024ll * 1024ll * 1024ll; - // This can be replaced with QByteArray after the qsizetype change is merged - std::unique_ptr output(new char[expected]); - qsizetype size = helper.read(output.get(), expected); + const qint64 expected = 5ll * 1024ll * 1024ll * 1024ll; + // Request a few more byte than what is available, to verify exact size + QByteArray output(expected + 42, Qt::Uninitialized); + const qsizetype size = helper.read(output.data(), output.size()); QCOMPARE(size, expected); #endif }