Port qCompress() to zstream/deflate()
The zlib convenience API we've been using so far has two problems: - On Windows-64, where sizeof(long) == 4, the use of ulong for sizes meant that we could not compress data compressable on other 64-bit platforms (Unix). While zstream also uses ulong, being a stream API, it allows feeding data in chunks. The total_in and total_out members are only required for gzip compression and are otherwise just informational. They're unsigned, so their overflow does not cause UB. In summary, using zstream + deflate() allows us to compress more than 4GiB of data even on Windows-64. - On all platforms, we always allocated the output buffer in such a way as to accommodate the pathological case of random, incompressible data, so the output buffer was larger than the input. Using zstream + deflate(), we can start with a smaller buffer, then let zlib pick up where it left off when it ran out of output buffer space, saving memory in the common case that compression meaningfully reduces the size. To avoid the first few rounds of reallocations, we continue to use zlib's compressBound() for input less than 256KiB. This completely fixes the compression side of QTBUG-106542 and QTBUG-104972. Pick-to: 6.4 6.3 6.2 Fixes: QTBUG-104972 Fixes: QTBUG-106542 Change-Id: Ia7e6c38403906b35462480fd611b482f05a5c59c Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
parent
fdd0a1bc17
commit
2fd990b386
@ -506,15 +506,6 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard)
|
||||
The default value is -1, which specifies zlib's default
|
||||
compression.
|
||||
|
||||
//![compress-limit-note]
|
||||
\note The maximum size of data that this function can consume is limited by
|
||||
what the platform's \c{unsigned long} can represent (a Zlib limitation).
|
||||
That means that data > 4GiB can be compressed and decompressed on a 64-bit
|
||||
Unix system, but not on a 64-bit Windows system. Portable code should
|
||||
therefore avoid using qCompress()/qUncompress() to compress more than 4GiB
|
||||
of input.
|
||||
//![compress-limit-note]
|
||||
|
||||
\sa qUncompress(const QByteArray &data)
|
||||
*/
|
||||
|
||||
@ -526,8 +517,6 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard)
|
||||
|
||||
Compresses the first \a nbytes of \a data at compression level
|
||||
\a compressionLevel and returns the compressed data in a new byte array.
|
||||
|
||||
\include qbytearray.cpp compress-limit-note
|
||||
*/
|
||||
|
||||
#ifndef QT_NO_COMPRESS
|
||||
@ -686,30 +675,40 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
|
||||
if (compressionLevel < -1 || compressionLevel > 9)
|
||||
compressionLevel = -1;
|
||||
|
||||
ulong len = nbytes + nbytes / 100 + 13;
|
||||
QByteArray bazip;
|
||||
int res;
|
||||
do {
|
||||
bazip.resize(len + HeaderSize);
|
||||
res = ::compress2(reinterpret_cast<uchar *>(bazip.data()) + HeaderSize, &len,
|
||||
data, nbytes,
|
||||
compressionLevel);
|
||||
|
||||
switch (res) {
|
||||
case Z_OK:
|
||||
bazip.resize(len + HeaderSize);
|
||||
qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), bazip.data());
|
||||
break;
|
||||
case Z_MEM_ERROR:
|
||||
return tooMuchData(ZLibOp::Compression);
|
||||
|
||||
case Z_BUF_ERROR:
|
||||
len *= 2;
|
||||
break;
|
||||
QArrayDataPointer out = [&] {
|
||||
constexpr qsizetype SingleAllocLimit = 256 * 1024; // the maximum size for which we use
|
||||
// zlib's compressBound() to guarantee
|
||||
// the output buffer size is sufficient
|
||||
// to hold result
|
||||
qsizetype capacity = HeaderSize;
|
||||
if (nbytes < SingleAllocLimit) {
|
||||
// use maximum size
|
||||
capacity += compressBound(uLong(nbytes)); // cannot overflow (both times)!
|
||||
return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity)};
|
||||
}
|
||||
} while (res == Z_BUF_ERROR);
|
||||
|
||||
return bazip;
|
||||
// for larger buffers, assume it compresses optimally, and
|
||||
// grow geometrically from there:
|
||||
constexpr qsizetype MaxCompressionFactor = 1024; // max theoretical factor is 1032
|
||||
// cf. http://www.zlib.org/zlib_tech.html,
|
||||
// but use a nearby power-of-two (faster)
|
||||
capacity += std::max(qsizetype(compressBound(uLong(SingleAllocLimit))),
|
||||
nbytes / MaxCompressionFactor);
|
||||
return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity, QArrayData::Grow)};
|
||||
}();
|
||||
|
||||
if (out.data() == nullptr) // allocation failed
|
||||
return tooMuchData(ZLibOp::Compression);
|
||||
|
||||
qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), out.data());
|
||||
out.size = HeaderSize;
|
||||
|
||||
return xxflate(ZLibOp::Compression, std::move(out), {data, nbytes},
|
||||
[=] (z_stream *zs) { return deflateInit(zs, compressionLevel); },
|
||||
[] (z_stream *zs, size_t inputLeft) {
|
||||
return deflate(zs, inputLeft ? Z_NO_FLUSH : Z_FINISH);
|
||||
},
|
||||
[] (z_stream *zs) { deflateEnd(zs); });
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -357,6 +357,19 @@ void tst_QByteArray::qUncompress4GiBPlus()
|
||||
QSKIP("Failed to allocate enough memory.");
|
||||
}
|
||||
|
||||
QCOMPARE(c.size(), 4 * GiB + 1);
|
||||
QCOMPARE(std::string_view{c}.find_first_not_of('X'),
|
||||
std::string_view::npos);
|
||||
|
||||
// re-compress once
|
||||
// (produces 18MiB, we shouldn't use much more than that in allocated capacity)
|
||||
c = ::qCompress(c);
|
||||
QVERIFY(!c.isNull());
|
||||
|
||||
// and un-compress again, to make sure compression worked (we
|
||||
// can't compare with compressed_3x, because zlib may change):
|
||||
c = ::qUncompress(c);
|
||||
|
||||
QCOMPARE(c.size(), 4 * GiB + 1);
|
||||
QCOMPARE(std::string_view{c}.find_first_not_of('X'),
|
||||
std::string_view::npos);
|
||||
|
Loading…
Reference in New Issue
Block a user