From f2ae385c2f57b692e3283ead684c54b46067d966 Mon Sep 17 00:00:00 2001 From: Oleg Khabinov Date: Tue, 18 Sep 2018 11:29:31 -0700 Subject: [PATCH] Rename initCheck to dirtyContext and use it in LZ4_resetStream_fast() to check if full reset is needed. --- lib/lz4.c | 40 ++++++++++++++++++++++++++++++++-------- lib/lz4.h | 29 +++++++++++++++++------------ tests/fuzzer.c | 11 ++++++++--- 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index d0fa3e8..aed2bb2 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -610,6 +610,15 @@ LZ4_FORCE_INLINE void LZ4_prepareTable( LZ4_stream_t_internal* const cctx, const int inputSize, const tableType_t tableType) { + /* If compression failed during the previous step, then the context + * is marked as dirty, therefore, it has to be fully reset. + */ + if (cctx->dirtyContext) { + DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx); + MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal)); + return; + } + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is * therefore safe to use no matter what mode we're in. Otherwise, we figure * out if it's safe to leave as is or whether it needs to be reset. @@ -660,6 +669,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( const dictIssue_directive dictIssue, const U32 acceleration) { + int result; const BYTE* ip = (const BYTE*) source; U32 const startIndex = cctx->currentOffset; @@ -695,9 +705,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType); /* Init conditions */ - if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */ - if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported inputSize, too large (or negative) */ - if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ + if (outputLimited == fillOutput && maxOutputSize < 1) goto _failure; /* Impossible to store anything */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) goto _failure; /* Unsupported inputSize, too large (or negative) */ + if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) goto _failure; /* Size too large (not within 64K limit) */ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ assert(acceleration >= 1); @@ -814,7 +824,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( token = op++; if ((outputLimited == limitedOutput) && /* Check output buffer overflow */ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) - return 0; + goto _failure; + if ((outputLimited == fillOutput) && (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { op--; @@ -887,7 +898,7 @@ _next_match: if ((outputLimited) && /* Check output buffer overflow */ (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) { if (outputLimited == limitedOutput) - return 0; + goto _failure; if (outputLimited == fillOutput) { /* Match description too long : reduce it */ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255; @@ -985,7 +996,7 @@ _last_literals: lastRun -= (lastRun+240)/255; } if (outputLimited == limitedOutput) - return 0; + goto _failure; } if (lastRun >= RUN_MASK) { size_t accumulator = lastRun - RUN_MASK; @@ -1004,7 +1015,14 @@ _last_literals: *inputConsumed = (int) (((const char*)ip)-source); } DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest)); - return (int)(((char*)op) - dest); + result = (int)(((char*)op) - dest); + assert(result > 0); + return result; + +_failure: + /* Mark stream as having dirty context, so, it has to be fully reset */ + cctx->dirtyContext = 1; + return 0; } @@ -1233,6 +1251,12 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) } void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) { + /* Calling LZ4_resetStream_fast() here makes sure that changes will not be + * erased by subsequent calls to LZ4_resetStream_fast() in case stream was + * marked as having dirty context, e.g. requiring full reset. + */ + LZ4_resetStream_fast(working_stream); + if (dictionary_stream != NULL) { /* If the current offset is zero, we will never look in the * external dictionary context, since there is no value a table @@ -1276,7 +1300,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, ch DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); - if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ + if (streamPtr->dirtyContext) return 0; /* Uninitialized structure detected */ LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; diff --git a/lib/lz4.h b/lib/lz4.h index 4d30857..346f0dc 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -414,19 +414,22 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or #endif /*! LZ4_resetStream_fast() : - * Use this, like LZ4_resetStream(), to prepare a context for a new chain of - * calls to a streaming API (e.g., LZ4_compress_fast_continue()). + * Use this to prepare a context for a new chain of calls to a streaming API + * (e.g., LZ4_compress_fast_continue()). + * + * Note: + * To stay on the safe side, when LZ4_stream_t is used for the first time, + * it should be either created using LZ4_createStream() or + * initialized using LZ4_resetStream(). * * Note: * Using this in advance of a non-streaming-compression function is redundant, * since they all perform their own custom reset internally. * * Differences from LZ4_resetStream(): - * When an LZ4_stream_t is known to be in a internally coherent state, - * it can often be prepared for a new compression with almost no work, - * only sometimes falling back to the full, expensive reset - * that is always required when the stream is in an indeterminate state - * (i.e., the reset performed by LZ4_resetStream()). + * When an LZ4_stream_t is known to be in an internally coherent state, + * it will be prepared for a new compression with almost no work. + * Otherwise, it will fall back to the full, expensive reset. * * LZ4_streams are guaranteed to be in a valid state when: * - returned from LZ4_createStream() @@ -439,9 +442,11 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or * call that fully reset the state (e.g., LZ4_compress_fast_extState()) and * that returned success * - * When a stream isn't known to be in a valid state, it is not safe to pass to - * any fastReset or streaming function. It must first be cleansed by the full - * LZ4_resetStream(). + * Note: + * A stream that was used in a compression call that did not return success + * (e.g., LZ4_compress_fast_continue()), can still be passed to this function, + * however, it's history is not preserved because of previous compression + * failure. */ LZ4LIB_STATIC_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); @@ -506,7 +511,7 @@ typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; struct LZ4_stream_t_internal { uint32_t hashTable[LZ4_HASH_SIZE_U32]; uint32_t currentOffset; - uint16_t initCheck; + uint16_t dirtyContext; uint16_t tableType; const uint8_t* dictionary; const LZ4_stream_t_internal* dictCtx; @@ -526,7 +531,7 @@ typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; struct LZ4_stream_t_internal { unsigned int hashTable[LZ4_HASH_SIZE_U32]; unsigned int currentOffset; - unsigned short initCheck; + unsigned short dirtyContext; unsigned short tableType; const unsigned char* dictionary; const LZ4_stream_t_internal* dictCtx; diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b29e82e..de434f9 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -744,6 +744,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c LZ4_attach_dictionary(&LZ4_stream, &LZ4dict); blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1); FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed"); + FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirtyContext, "context should be good"); /* In the future, it might be desirable to let extDictCtx mode's * output diverge from the output generated by regular extDict mode. @@ -754,19 +755,21 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(XXH32(compressedBuffer, blockContinueCompressedSize, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output"); FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary(), but output buffer is 1 byte too short"); - LZ4_resetStream(&LZ4_stream); + LZ4_resetStream_fast(&LZ4_stream); LZ4_attach_dictionary(&LZ4_stream, &LZ4dict); ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1); FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using extDictCtx should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize); + FUZ_CHECKTEST(!LZ4_stream.internal_donotuse.dirtyContext, "context should be dirty"); FUZ_DISPLAYTEST(); - LZ4_resetStream(&LZ4_stream); + LZ4_resetStream_fast(&LZ4_stream); LZ4_attach_dictionary(&LZ4_stream, &LZ4dict); ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1); FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize); FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer"); FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output"); FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output"); + FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirtyContext, "context should be good"); FUZ_DISPLAYTEST(); LZ4_resetStream_fast(&LZ4_stream); @@ -776,6 +779,7 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer"); FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output"); FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output"); + FUZ_CHECKTEST(LZ4_stream.internal_donotuse.dirtyContext, "context should be good"); } /* Decompress with dictionary as external */ @@ -990,6 +994,7 @@ static void FUZ_unitTests(int compressionLevel) LZ4_resetStream(&streamingState); result = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1); FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed!"); + FUZ_CHECKTEST(streamingState.internal_donotuse.dirtyContext, "dirtyContext flag is set") result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize); FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed"); @@ -1012,7 +1017,7 @@ static void FUZ_unitTests(int compressionLevel) XXH64_reset(&xxhOrig, 0); XXH64_reset(&xxhNewSafe, 0); XXH64_reset(&xxhNewFast, 0); - LZ4_resetStream(&streamingState); + LZ4_resetStream_fast(&streamingState); LZ4_setStreamDecode(&decodeStateSafe, NULL, 0); LZ4_setStreamDecode(&decodeStateFast, NULL, 0);