QDecompressHelper: Skip double-decompression if download is small
To retain backwards compatibility with some QNetworkReply usage, namely connecting solely to finished-signal and allocating a buffer to read into, but without storing the entire decompressed file in memory until read, we may decompress the file twice. With this patch users can now avoid this double decompression if the amount of buffered data stays below 10 MiB. This means any file smaller than 10 MiB will never need to be decompressed twice to know the size of it. On top of that, if the data is handled as it arrives (e.g. in readyRead) and the buffer is kept below 10 MiB it won't need to decompress twice either. This is active as long as "countDecompressed" is true, though it currently always is in QNetworkAccessManger, with a future goal to make it possible to control with public API. Since it requires the user to potentially adapt their usage of QNetworkReply. In this patch we also stop tracking the amount of unhandled uncompressed bytes (uncompressedBytes) in favor of tracking the total amount of bytes which has been read() by the user of QDecompressHelper (totalBytesRead), since we can more intuitively work out the total amount of unread bytes using this value. Change-Id: Ie3d8d6e39a18343fcf9b610f45c7fe7e4cd4e474 Reviewed-by: Timur Pocheptsov <timur.pocheptsov@qt.io>
This commit is contained in:
parent
5889985c8c
commit
b41d5f6293
@ -237,7 +237,13 @@ void QDecompressHelper::setCountingBytesEnabled(bool shouldCount)
|
|||||||
qint64 QDecompressHelper::uncompressedSize() const
|
qint64 QDecompressHelper::uncompressedSize() const
|
||||||
{
|
{
|
||||||
Q_ASSERT(countDecompressed);
|
Q_ASSERT(countDecompressed);
|
||||||
return uncompressedBytes;
|
// Use the 'totalUncompressedBytes' from the countHelper if it exceeds the amount of bytes
|
||||||
|
// that we know about.
|
||||||
|
auto totalUncompressed =
|
||||||
|
countHelper && countHelper->totalUncompressedBytes > totalUncompressedBytes
|
||||||
|
? countHelper->totalUncompressedBytes
|
||||||
|
: totalUncompressedBytes;
|
||||||
|
return totalUncompressed - totalBytesRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -262,10 +268,9 @@ void QDecompressHelper::feed(QByteArray &&data)
|
|||||||
{
|
{
|
||||||
Q_ASSERT(contentEncoding != None);
|
Q_ASSERT(contentEncoding != None);
|
||||||
totalCompressedBytes += data.size();
|
totalCompressedBytes += data.size();
|
||||||
if (!countInternal(data))
|
|
||||||
clear(); // If our counting brother failed then so will we :|
|
|
||||||
else
|
|
||||||
compressedDataBuffer.append(std::move(data));
|
compressedDataBuffer.append(std::move(data));
|
||||||
|
if (!countInternal(compressedDataBuffer[compressedDataBuffer.bufferCount() - 1]))
|
||||||
|
clear(); // If our counting brother failed then so will we :|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -276,10 +281,9 @@ void QDecompressHelper::feed(const QByteDataBuffer &buffer)
|
|||||||
{
|
{
|
||||||
Q_ASSERT(contentEncoding != None);
|
Q_ASSERT(contentEncoding != None);
|
||||||
totalCompressedBytes += buffer.byteAmount();
|
totalCompressedBytes += buffer.byteAmount();
|
||||||
|
compressedDataBuffer.append(buffer);
|
||||||
if (!countInternal(buffer))
|
if (!countInternal(buffer))
|
||||||
clear(); // If our counting brother failed then so will we :|
|
clear(); // If our counting brother failed then so will we :|
|
||||||
else
|
|
||||||
compressedDataBuffer.append(buffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -290,10 +294,10 @@ void QDecompressHelper::feed(QByteDataBuffer &&buffer)
|
|||||||
{
|
{
|
||||||
Q_ASSERT(contentEncoding != None);
|
Q_ASSERT(contentEncoding != None);
|
||||||
totalCompressedBytes += buffer.byteAmount();
|
totalCompressedBytes += buffer.byteAmount();
|
||||||
if (!countInternal(buffer))
|
const QByteDataBuffer copy(buffer);
|
||||||
clear(); // If our counting brother failed then so will we :|
|
|
||||||
else
|
|
||||||
compressedDataBuffer.append(std::move(buffer));
|
compressedDataBuffer.append(std::move(buffer));
|
||||||
|
if (!countInternal(copy))
|
||||||
|
clear(); // If our counting brother failed then so will we :|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -303,19 +307,34 @@ void QDecompressHelper::feed(QByteDataBuffer &&buffer)
|
|||||||
This lets us know the final size, unfortunately at the cost of
|
This lets us know the final size, unfortunately at the cost of
|
||||||
increased computation.
|
increased computation.
|
||||||
|
|
||||||
Potential @future improvement:
|
To save on some of the computation we will store the data until
|
||||||
Decompress XX MiB/KiB before starting the count.
|
we reach \c MaxDecompressedDataBufferSize stored. In this case the
|
||||||
For smaller files the extra decompression can then be avoided.
|
"penalty" is completely removed from users who read the data on
|
||||||
|
readyRead rather than waiting for it all to be received. And
|
||||||
|
any file smaller than \c MaxDecompressedDataBufferSize will
|
||||||
|
avoid this issue as well.
|
||||||
*/
|
*/
|
||||||
bool QDecompressHelper::countInternal()
|
bool QDecompressHelper::countInternal()
|
||||||
{
|
{
|
||||||
Q_ASSERT(countDecompressed);
|
Q_ASSERT(countDecompressed);
|
||||||
|
while (hasDataInternal()
|
||||||
|
&& decompressedDataBuffer.byteAmount() < MaxDecompressedDataBufferSize) {
|
||||||
|
const qsizetype toRead = 256 * 1024;
|
||||||
|
QByteArray buffer(toRead, Qt::Uninitialized);
|
||||||
|
qsizetype bytesRead = readInternal(buffer.data(), buffer.size());
|
||||||
|
if (bytesRead == -1)
|
||||||
|
return false;
|
||||||
|
buffer.truncate(bytesRead);
|
||||||
|
decompressedDataBuffer.append(std::move(buffer));
|
||||||
|
}
|
||||||
|
if (!hasDataInternal())
|
||||||
|
return true; // handled all the data so far, just return
|
||||||
|
|
||||||
while (countHelper->hasData()) {
|
while (countHelper->hasData()) {
|
||||||
std::array<char, 1024> temp;
|
std::array<char, 1024> temp;
|
||||||
qsizetype bytesRead = countHelper->read(temp.data(), temp.size());
|
qsizetype bytesRead = countHelper->read(temp.data(), temp.size());
|
||||||
if (bytesRead == -1)
|
if (bytesRead == -1)
|
||||||
return false;
|
return false;
|
||||||
uncompressedBytes += bytesRead;
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -358,13 +377,45 @@ bool QDecompressHelper::countInternal(const QByteDataBuffer &buffer)
|
|||||||
|
|
||||||
qsizetype QDecompressHelper::read(char *data, qsizetype maxSize)
|
qsizetype QDecompressHelper::read(char *data, qsizetype maxSize)
|
||||||
{
|
{
|
||||||
|
if (maxSize <= 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (!isValid())
|
if (!isValid())
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
qsizetype bytesRead = -1;
|
|
||||||
if (!hasData())
|
if (!hasData())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
qsizetype cachedRead = 0;
|
||||||
|
if (!decompressedDataBuffer.isEmpty()) {
|
||||||
|
cachedRead = decompressedDataBuffer.read(data, maxSize);
|
||||||
|
data += cachedRead;
|
||||||
|
maxSize -= cachedRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
qsizetype bytesRead = readInternal(data, maxSize);
|
||||||
|
if (bytesRead == -1)
|
||||||
|
return -1;
|
||||||
|
totalBytesRead += bytesRead + cachedRead;
|
||||||
|
return bytesRead + cachedRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\internal
|
||||||
|
Like read() but without attempting to read the
|
||||||
|
cached/already-decompressed data.
|
||||||
|
*/
|
||||||
|
qsizetype QDecompressHelper::readInternal(char *data, qsizetype maxSize)
|
||||||
|
{
|
||||||
|
Q_ASSERT(isValid());
|
||||||
|
|
||||||
|
if (maxSize <= 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!hasDataInternal())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
qsizetype bytesRead = -1;
|
||||||
switch (contentEncoding) {
|
switch (contentEncoding) {
|
||||||
case None:
|
case None:
|
||||||
Q_UNREACHABLE();
|
Q_UNREACHABLE();
|
||||||
@ -382,8 +433,6 @@ qsizetype QDecompressHelper::read(char *data, qsizetype maxSize)
|
|||||||
}
|
}
|
||||||
if (bytesRead == -1)
|
if (bytesRead == -1)
|
||||||
clear();
|
clear();
|
||||||
else if (countDecompressed)
|
|
||||||
uncompressedBytes -= bytesRead;
|
|
||||||
|
|
||||||
totalUncompressedBytes += bytesRead;
|
totalUncompressedBytes += bytesRead;
|
||||||
if (isPotentialArchiveBomb())
|
if (isPotentialArchiveBomb())
|
||||||
@ -449,6 +498,16 @@ bool QDecompressHelper::isPotentialArchiveBomb() const
|
|||||||
read 0 bytes. This most likely means the decompression is done.
|
read 0 bytes. This most likely means the decompression is done.
|
||||||
*/
|
*/
|
||||||
bool QDecompressHelper::hasData() const
|
bool QDecompressHelper::hasData() const
|
||||||
|
{
|
||||||
|
return hasDataInternal() || !decompressedDataBuffer.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
\internal
|
||||||
|
Like hasData() but internally the buffer of decompressed data is
|
||||||
|
not interesting.
|
||||||
|
*/
|
||||||
|
bool QDecompressHelper::hasDataInternal() const
|
||||||
{
|
{
|
||||||
return encodedBytesAvailable() || decoderHasData;
|
return encodedBytesAvailable() || decoderHasData;
|
||||||
}
|
}
|
||||||
@ -497,11 +556,12 @@ void QDecompressHelper::clear()
|
|||||||
contentEncoding = None;
|
contentEncoding = None;
|
||||||
|
|
||||||
compressedDataBuffer.clear();
|
compressedDataBuffer.clear();
|
||||||
|
decompressedDataBuffer.clear();
|
||||||
decoderHasData = false;
|
decoderHasData = false;
|
||||||
|
|
||||||
countDecompressed = false;
|
countDecompressed = false;
|
||||||
countHelper.reset();
|
countHelper.reset();
|
||||||
uncompressedBytes = 0;
|
totalBytesRead = 0;
|
||||||
totalUncompressedBytes = 0;
|
totalUncompressedBytes = 0;
|
||||||
totalCompressedBytes = 0;
|
totalCompressedBytes = 0;
|
||||||
}
|
}
|
||||||
|
@ -98,6 +98,8 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
bool isPotentialArchiveBomb() const;
|
bool isPotentialArchiveBomb() const;
|
||||||
|
bool hasDataInternal() const;
|
||||||
|
qsizetype readInternal(char *data, qsizetype maxSize);
|
||||||
|
|
||||||
bool countInternal();
|
bool countInternal();
|
||||||
bool countInternal(const QByteArray &data);
|
bool countInternal(const QByteArray &data);
|
||||||
@ -111,16 +113,18 @@ private:
|
|||||||
qsizetype readZstandard(char *data, qsizetype maxSize);
|
qsizetype readZstandard(char *data, qsizetype maxSize);
|
||||||
|
|
||||||
QByteDataBuffer compressedDataBuffer;
|
QByteDataBuffer compressedDataBuffer;
|
||||||
|
QByteDataBuffer decompressedDataBuffer;
|
||||||
|
const qsizetype MaxDecompressedDataBufferSize = 10 * 1024 * 1024;
|
||||||
bool decoderHasData = false;
|
bool decoderHasData = false;
|
||||||
|
|
||||||
bool countDecompressed = false;
|
bool countDecompressed = false;
|
||||||
std::unique_ptr<QDecompressHelper> countHelper;
|
std::unique_ptr<QDecompressHelper> countHelper;
|
||||||
qint64 uncompressedBytes = 0;
|
|
||||||
|
|
||||||
// Used for calculating the ratio
|
// Used for calculating the ratio
|
||||||
qint64 archiveBombCheckThreshold = 10 * 1024 * 1024;
|
qint64 archiveBombCheckThreshold = 10 * 1024 * 1024;
|
||||||
qint64 totalUncompressedBytes = 0;
|
qint64 totalUncompressedBytes = 0;
|
||||||
qint64 totalCompressedBytes = 0;
|
qint64 totalCompressedBytes = 0;
|
||||||
|
qint64 totalBytesRead = 0;
|
||||||
|
|
||||||
ContentEncoding contentEncoding = None;
|
ContentEncoding contentEncoding = None;
|
||||||
|
|
||||||
|
@ -347,20 +347,24 @@ void tst_QDecompressHelper::decompressBigData_data()
|
|||||||
QTest::addColumn<QByteArray>("encoding");
|
QTest::addColumn<QByteArray>("encoding");
|
||||||
QTest::addColumn<QString>("path");
|
QTest::addColumn<QString>("path");
|
||||||
QTest::addColumn<qint64>("size");
|
QTest::addColumn<qint64>("size");
|
||||||
|
QTest::addColumn<bool>("countAhead");
|
||||||
|
|
||||||
qint64 fourGiB = 4ll * 1024ll * 1024ll * 1024ll;
|
qint64 fourGiB = 4ll * 1024ll * 1024ll * 1024ll;
|
||||||
qint64 fiveGiB = 5ll * 1024ll * 1024ll * 1024ll;
|
qint64 fiveGiB = 5ll * 1024ll * 1024ll * 1024ll;
|
||||||
|
|
||||||
QTest::newRow("gzip-4G") << QByteArray("gzip") << QString(":/4G.gz") << fourGiB;
|
// Only use countAhead on one of these since they share codepath anyway
|
||||||
|
QTest::newRow("gzip-counted-4G") << QByteArray("gzip") << QString(":/4G.gz") << fourGiB << true;
|
||||||
QTest::newRow("deflate-5G") << QByteArray("deflate") << QString(":/5GiB.txt.inflate")
|
QTest::newRow("deflate-5G") << QByteArray("deflate") << QString(":/5GiB.txt.inflate")
|
||||||
<< fiveGiB;
|
<< fiveGiB << false;
|
||||||
|
|
||||||
#if QT_CONFIG(brotli)
|
#if QT_CONFIG(brotli)
|
||||||
QTest::newRow("brotli-4G") << QByteArray("br") << (srcDir + "/4G.br") << fourGiB;
|
QTest::newRow("brotli-4G") << QByteArray("br") << (srcDir + "/4G.br") << fourGiB << false;
|
||||||
|
QTest::newRow("brotli-counted-4G") << QByteArray("br") << (srcDir + "/4G.br") << fourGiB << true;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if QT_CONFIG(zstd)
|
#if QT_CONFIG(zstd)
|
||||||
QTest::newRow("zstandard-4G") << QByteArray("zstd") << (":/4G.zst") << fourGiB;
|
QTest::newRow("zstandard-4G") << QByteArray("zstd") << (":/4G.zst") << fourGiB << false;
|
||||||
|
QTest::newRow("zstandard-counted-4G") << QByteArray("zstd") << (":/4G.zst") << fourGiB << true;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -373,16 +377,20 @@ void tst_QDecompressHelper::decompressBigData()
|
|||||||
const qint64 third = file.bytesAvailable() / 3;
|
const qint64 third = file.bytesAvailable() / 3;
|
||||||
|
|
||||||
QDecompressHelper helper;
|
QDecompressHelper helper;
|
||||||
|
QFETCH(bool, countAhead);
|
||||||
|
helper.setCountingBytesEnabled(countAhead);
|
||||||
helper.setDecompressedSafetyCheckThreshold(-1);
|
helper.setDecompressedSafetyCheckThreshold(-1);
|
||||||
QFETCH(QByteArray, encoding);
|
QFETCH(QByteArray, encoding);
|
||||||
helper.setEncoding(encoding);
|
helper.setEncoding(encoding);
|
||||||
|
|
||||||
QByteArray output(32 * 1024, Qt::Uninitialized);
|
// The size of 'output' should be at least QDecompressHelper::MaxDecompressedDataBufferSize + 1
|
||||||
|
QByteArray output(10 * 1024 * 1024 + 1, Qt::Uninitialized);
|
||||||
qint64 totalSize = 0;
|
qint64 totalSize = 0;
|
||||||
while (!file.atEnd()) {
|
while (!file.atEnd()) {
|
||||||
helper.feed(file.read(third));
|
helper.feed(file.read(third));
|
||||||
while (helper.hasData()) {
|
while (helper.hasData()) {
|
||||||
qsizetype bytesRead = helper.read(output.data(), output.size());
|
qsizetype bytesRead = helper.read(output.data(), output.size());
|
||||||
|
QVERIFY(bytesRead >= 0);
|
||||||
QVERIFY(bytesRead <= output.size());
|
QVERIFY(bytesRead <= output.size());
|
||||||
totalSize += bytesRead;
|
totalSize += bytesRead;
|
||||||
const auto isZero = [](char c) { return c == '\0'; };
|
const auto isZero = [](char c) { return c == '\0'; };
|
||||||
@ -449,10 +457,10 @@ void tst_QDecompressHelper::bigZlib()
|
|||||||
helper.feed(compressedData.mid(firstHalf.size()));
|
helper.feed(compressedData.mid(firstHalf.size()));
|
||||||
|
|
||||||
// We need the whole thing in one go... which is why this test is not available for 32-bit
|
// We need the whole thing in one go... which is why this test is not available for 32-bit
|
||||||
qint64 expected = 5ll * 1024ll * 1024ll * 1024ll;
|
const qint64 expected = 5ll * 1024ll * 1024ll * 1024ll;
|
||||||
// This can be replaced with QByteArray after the qsizetype change is merged
|
// Request a few more byte than what is available, to verify exact size
|
||||||
std::unique_ptr<char[]> output(new char[expected]);
|
QByteArray output(expected + 42, Qt::Uninitialized);
|
||||||
qsizetype size = helper.read(output.get(), expected);
|
const qsizetype size = helper.read(output.data(), output.size());
|
||||||
QCOMPARE(size, expected);
|
QCOMPARE(size, expected);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user