From 93cf628a08e82256abc77d1fd144bda78b7ee1df Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 2 May 2018 12:56:37 -0700 Subject: [PATCH] introduce LZ4_decoderRingBufferSize() fuzzer : fix and robustify ring buffer tests --- lib/lz4.c | 33 +++++++++++++++---- lib/lz4.h | 56 +++++++++++++++++++++----------- tests/fuzzer.c | 87 ++++++++++++++++++++++++++++++-------------------- 3 files changed, 115 insertions(+), 61 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index b46910f..71fe8f3 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1431,7 +1431,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /* strictly "less than" on input, to re-enter the loop with at least one byte */ likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend))) { - /* Copy the literals. */ + /* Copy the literals */ memcpy(op, ip, endOnInput ? 16 : 8); op += length; ip += length; @@ -1688,12 +1688,11 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) return 0; } -/*! - * LZ4_setStreamDecode() : - * Use this function to instruct where to find the dictionary. - * This function is not necessary if previous data is still available where it was decoded. - * Loading a size of 0 is allowed (same effect as no dictionary). - * Return : 1 if OK, 0 if error +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error */ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) { @@ -1705,6 +1704,26 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti return 1; } +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ + +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + /* *_continue() : These decoding functions allow decompression of multiple blocks in "streaming" mode. diff --git a/lib/lz4.h b/lib/lz4.h index 2745260..410f480 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -293,45 +293,62 @@ LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxD * Streaming Decompression Functions * Bufferless synchronous API ************************************************/ -typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* incomplete type (defined later) */ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : - * creation / destruction of streaming decompression tracking structure. - * A tracking structure can be re-used multiple times sequentially. */ + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); /*! LZ4_setStreamDecode() : - * An LZ4_streamDecode_t structure can be allocated once and re-used multiple times. + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. * Use this function to start decompression of a new stream of blocks. * A dictionary can optionnally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); +/*! LZ4_decoderRingBufferSize() : v1.8.2 + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs)) /* for static allocation; mbs presumed valid */ + /*! LZ4_decompress_*_continue() : * These decoding functions allow decompression of consecutive blocks in "streaming" mode. * A block is an unsplittable entity, it must be presented entirely to a decompression function. - * Decompression functions only accept one block at a time. + * Decompression functions only accepts one block at a time. * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. - * If less than 64KB of data has been decoded all the data must be present. + * If less than 64KB of data has been decoded, all the data must be present. * - * Special : if application sets a ring buffer for decompression, it must respect one of the following conditions : + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. - * maxBlockSize is the maximum size of any single block. It is implementation dependent, and can have any value (presumed > 16 bytes). * In which case, encoding and decoding buffers do not need to be synchronized, * and encoding ring buffer can have any size, including small ones ( < 64 KB). - * - Decompression buffer size is _at least_ 64 KB + 8 bytes + maxBlockSize. - * In which case, encoding and decoding buffers do not need to be synchronized, - * and encoding ring buffer can have any size, including larger than decoding buffer. - * - Decompression buffer size is exactly the same as compression buffer size, - * and follows exactly same update rule (block boundaries at same positions). - * If the decoding function is provided with the exact decompressed size of each block, - * then decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - * If the decoding function only knows the compressed size, - * then buffer size must be a minimum of 64 KB + 8 bytes + maxBlockSize. - * Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, - * and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block. + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. */ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); @@ -341,6 +358,7 @@ LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecod * These decoding functions work the same as * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. */ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 1fbda8a..28f14bf 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -47,6 +47,7 @@ #include /* fgets, sscanf */ #include /* strcmp */ #include /* clock_t, clock, CLOCKS_PER_SEC */ +#include #define LZ4_STATIC_LINKING_ONLY #define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" @@ -953,6 +954,7 @@ static void FUZ_unitTests(int compressionLevel) const unsigned cycleNb= 0; char testInput[testInputSize]; char testCompressed[testCompressedSize]; + size_t const testVerifySize = testInputSize; char testVerify[testInputSize]; char ringBuffer[ringBufferSize]; U32 randState = 1; @@ -1197,19 +1199,28 @@ static void FUZ_unitTests(int compressionLevel) } } - /* small decoder-side ring buffer test */ + /* Ring buffer test : Non synchronized decoder */ + /* This test uses minimum amount of memory required to setup a decoding ring buffer + * while being unsynchronized with encoder + * (no assumption done on how the data is encoded, it just follows LZ4 format specification). + * This size is documented in lz4.h, and is LZ4_decoderRingBufferSize(maxBlockSize). + */ { XXH64_state_t xxhOrig; XXH64_state_t xxhNewSafe, xxhNewFast; LZ4_streamDecode_t decodeStateSafe, decodeStateFast; - const U32 maxMessageSizeLog = 12; - const U32 maxMessageSizeMask = (1< maxMessageSize. We just want to fill the decoding ring buffer once. */ + assert(messageSize < dBufferSize); XXH64_update(&xxhOrig, testInput + iNext, messageSize); crcOrig = XXH64_digest(&xxhOrig); compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize); FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed"); - result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize); - FUZ_CHECKTEST(result!=(int)messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed"); + result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, ringBufferSafe + dNext, compressedSize, messageSize); + FUZ_CHECKTEST(result!=messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed"); - XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize); + XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, messageSize); { U64 const crcNew = XXH64_digest(&xxhNewSafe); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); } - result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, messageSize); + result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize); FUZ_CHECKTEST(result!=compressedSize, "64K D.ringBuffer : LZ4_decompress_fast_continue() test failed"); - XXH64_update(&xxhNewFast, testVerify + dNext, messageSize); + XXH64_update(&xxhNewFast, ringBufferFast + dNext, messageSize); { U64 const crcNew = XXH64_digest(&xxhNewFast); FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); } - /* prepare next message */ + /* prepare second message */ dNext += messageSize; totalMessageSize += messageSize; - messageSize = BSIZE2; - iNext = 132000; - memcpy(testInput + iNext, testInput + 8, messageSize); - if (dNext > dBufferSize) dNext = 0; + messageSize = maxMessageSize; + iNext = BSIZE1+1; + assert(BSIZE1 >= 65535); + memcpy(testInput + iNext, testInput + (BSIZE1-65535), messageSize); /* will generate a match at max distance == 65535 */ + FUZ_CHECKTEST(dNext+messageSize <= dBufferSize, "Ring buffer test : second message should require restarting from beginning"); + dNext = 0; while (totalMessageSize < 9 MB) { XXH64_update(&xxhOrig, testInput + iNext, messageSize); @@ -1256,32 +1269,36 @@ static void FUZ_unitTests(int compressionLevel) compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize); FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed"); + DISPLAYLEVEL(5, "compressed %i bytes to %i bytes \n", messageSize, compressedSize); -#if 1 /* Because the ring buffer is small, decompression overwrites part of the output which - * is first used as dictionary. Hence only one decompression function can be tested. */ - result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize); - FUZ_CHECKTEST(result!=(int)messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed"); - XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize); + /* test LZ4_decompress_safe_continue */ + assert(dNext < dBufferSize); + assert(dBufferSize - dNext >= maxMessageSize); + result = LZ4_decompress_safe_continue(&decodeStateSafe, + testCompressed, ringBufferSafe + dNext, + compressedSize, dBufferSize - dNext); /* works without knowing messageSize, but under assumption that messageSize < maxMessageSize */ + FUZ_CHECKTEST(result!=messageSize, "D.ringBuffer : LZ4_decompress_safe_continue() test failed"); + XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, messageSize); { U64 const crcNew = XXH64_digest(&xxhNewSafe); - if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, testVerify + dNext); - FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption during small decoder-side ring buffer test"); + if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferSafe + dNext); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption during D.ringBuffer test"); } -#else - result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, messageSize); - FUZ_CHECKTEST(result!=compressedSize, "64K D.ringBuffer : LZ4_decompress_fast_continue() test failed"); - XXH64_update(&xxhNewFast, testVerify + dNext, messageSize); + /* test LZ4_decompress_fast_continue in its own buffer ringBufferFast */ + result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize); + FUZ_CHECKTEST(result!=compressedSize, "D.ringBuffer : LZ4_decompress_fast_continue() test failed"); + XXH64_update(&xxhNewFast, ringBufferFast + dNext, messageSize); { U64 const crcNew = XXH64_digest(&xxhNewFast); - if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, testVerify + dNext); - FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption during small decoder-side ring buffer test"); + if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferFast + dNext); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption during D.ringBuffer test"); } -#endif + /* prepare next message */ dNext += messageSize; totalMessageSize += messageSize; messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1; iNext = (FUZ_rand(&randState) & 65535); - if (dNext > dBufferSize) dNext = 0; + if (dNext + maxMessageSize > dBufferSize) dNext = 0; } } }