From 534f8fa5d6504c00ac7e956a7891ed438af2e212 Mon Sep 17 00:00:00 2001 From: remittor Date: Tue, 7 Mar 2017 17:11:48 +0300 Subject: [PATCH 1/6] lz4hc: Cleanup function LZ4HC_compress_hashChain --- lib/lz4hc.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 5d4ea3e..2518300 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -398,7 +398,8 @@ _Search3: if (start2 + ml2 < mflimit) ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); - else ml3 = ml2; + else + ml3 = ml2; if (ml3 == ml2) { /* No better match : 2 sequences to encode */ /* ip & ref are known; Now for ml */ @@ -474,12 +475,21 @@ _Search3: } /* Encode Last Literals */ - { int lastRun = (int)(iend - anchor); - if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (BYTE)(lastRun< oend)) return 0; /* Check output limit */ + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; } /* End */ From 45b592b7ecf7afc686a4b9d0ddfcae2f7229f55e Mon Sep 17 00:00:00 2001 From: remittor Date: Wed, 8 Mar 2017 00:30:54 +0300 Subject: [PATCH 2/6] lz4hc: Cleanup function LZ4HC_encodeSequence --- lib/lz4hc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 2518300..d3cc831 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -261,10 +261,10 @@ FORCE_INLINE int LZ4HC_encodeSequence ( const BYTE** anchor, int matchLength, const BYTE* const match, - limitedOutput_directive limitedOutputBuffer, + limitedOutput_directive limit, BYTE* oend) { - int length; + size_t length; BYTE* token; #if LZ4HC_DEBUG @@ -272,11 +272,18 @@ FORCE_INLINE int LZ4HC_encodeSequence ( #endif /* Encode Literal length */ - length = (int)(*ip - *anchor); + length = (size_t)(*ip - *anchor); token = (*op)++; - if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ - if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } - else *token = (BYTE)(length<> 8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ + if (length >= RUN_MASK) { + size_t len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for(; len >= 255 ; len -= 255) *(*op)++ = 255; + *(*op)++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } /* Copy Literals */ LZ4_wildCopy(*op, *anchor, (*op) + length); @@ -286,13 +293,13 @@ FORCE_INLINE int LZ4HC_encodeSequence ( LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ - length = (int)(matchLength-MINMATCH); - if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ - if (length>=(int)ML_MASK) { + length = (size_t)(matchLength - MINMATCH); + if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ + if (length >= ML_MASK) { *token += ML_MASK; length -= ML_MASK; - for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } - if (length > 254) { length-=255; *(*op)++ = 255; } + for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } + if (length >= 255) { length -= 255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } else { *token += (BYTE)(length); From f007153e3f0c401d51d6ba5f6ecf27b223591ef2 Mon Sep 17 00:00:00 2001 From: remittor Date: Wed, 8 Mar 2017 11:11:15 +0300 Subject: [PATCH 3/6] lz4hc: Add LZ4_compressHC_destSize and LZ4_compress_HC_continue_destSize --- lib/lz4hc.c | 74 +++++++++++++++++++++++++++++++++++++++++++++-------- lib/lz4hc.h | 2 ++ 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index d3cc831..abea72e 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -248,7 +248,11 @@ FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( } -typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; +typedef enum { + noLimit = 0, + limitedOutput = 1, + limitedDestSize = 2, +} limitedOutput_directive; #define LZ4HC_DEBUG 0 #if LZ4HC_DEBUG @@ -318,18 +322,21 @@ static int LZ4HC_compress_hashChain ( LZ4HC_CCtx_internal* const ctx, const char* const source, char* const dest, - int const inputSize, + int* srcSizePtr, int const maxOutputSize, unsigned maxNbAttempts, limitedOutput_directive limit ) { + const int inputSize = *srcSizePtr; + const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; const BYTE* const matchlimit = (iend - LASTLITERALS); + BYTE* optr = (BYTE*) dest; BYTE* op = (BYTE*) dest; BYTE* const oend = op + maxOutputSize; @@ -343,7 +350,12 @@ static int LZ4HC_compress_hashChain ( const BYTE* ref0; /* init */ + *srcSizePtr = 0; + if (limit && maxOutputSize < 1) return 0; /* Impossible to store anything */ + if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ + ctx->end += inputSize; + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ ip++; @@ -363,7 +375,8 @@ _Search2: else ml2 = ml; if (ml2 == ml) { /* No better match */ - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; continue; } @@ -412,9 +425,11 @@ _Search3: /* ip & ref are known; Now for ml */ if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; ip = start2; - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) goto _dest_overflow; continue; } @@ -432,7 +447,8 @@ _Search3: } } - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; ip = start3; ref = ref3; ml = ml3; @@ -468,7 +484,8 @@ _Search3: ml = (int)(start2 - ip); } } - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + optr = op; + if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow; ip = start2; ref = ref2; @@ -481,12 +498,21 @@ _Search3: goto _Search3; } +_last_literals: /* Encode Last Literals */ { size_t lastRunSize, litLength, totalSize; lastRunSize = (size_t)(iend - anchor); /* literals */ litLength = (lastRunSize + 255 - RUN_MASK) / 255; totalSize = 1 + litLength + lastRunSize; - if ((limit == limitedOutput) && (op + totalSize > oend)) return 0; /* Check output limit */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; /* Check output limit */ + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1; + litLength = (lastRunSize + 255 - RUN_MASK) / 255; + lastRunSize -= litLength; + } + ip = anchor + lastRunSize; + if (lastRunSize >= RUN_MASK) { size_t accumulator = lastRunSize - RUN_MASK; *op++ = (RUN_MASK << ML_BITS); @@ -500,7 +526,15 @@ _Search3: } /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); return (int) (((char*)op)-dest); + +_dest_overflow: + if (limit == limitedDestSize) { + op = optr; /* restore correct out pointer */ + goto _last_literals; + } + return 0; } static int LZ4HC_getSearchNum(int compressionLevel) @@ -522,16 +556,17 @@ static int LZ4HC_compress_generic ( limitedOutput_directive limit ) { + int srcSize = inputSize; if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; if (compressionLevel > 9) { switch (compressionLevel) { - case 10: return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (16-1), limit); + case 10: return LZ4HC_compress_hashChain(ctx, source, dest, &srcSize, maxOutputSize, 1 << (16-1), limit); case 11: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, 128, 0); default: case 12: ctx->searchNum = LZ4HC_getSearchNum(compressionLevel); return LZ4HC_compress_optimal(ctx, source, dest, inputSize, maxOutputSize, limit, LZ4_OPT_NUM, 1); } } - return LZ4HC_compress_hashChain(ctx, source, dest, inputSize, maxOutputSize, 1 << (compressionLevel-1), limit); + return LZ4HC_compress_hashChain(ctx, source, dest, &srcSize, maxOutputSize, 1 << (compressionLevel-1), limit); } @@ -657,6 +692,17 @@ int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* sourc return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, noLimit); } +int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int* sourceSizePtr, int targetDestSize) +{ + LZ4HC_CCtx_internal* ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + unsigned maxNbAttempts = 1 << (ctxPtr->compressionLevel - 1); + + /* destSize: always auto-init */ + LZ4HC_init(ctxPtr, (const BYTE*) source); + + return LZ4HC_compress_hashChain(ctxPtr, source, dest, sourceSizePtr, targetDestSize, maxNbAttempts, limitedDestSize); +} + /* dictionary saving */ @@ -735,3 +781,11 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data) int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB); return (char*)(hc4->inputBuffer + dictSize); } + +int LZ4_compressHC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int compressionLevel) +{ + LZ4HC_CCtx_internal * const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse; + unsigned maxNbAttempts = 1 << (compressionLevel - 1); + LZ4HC_init(ctx, (const BYTE*) source); + return LZ4HC_compress_hashChain(ctx, source, dest, sourceSizePtr, targetDestSize, maxNbAttempts, limitedDestSize); +} diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 1036fd0..13112f9 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -99,6 +99,7 @@ LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionL LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize); +LZ4LIB_API int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int maxDstSize); LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); @@ -219,6 +220,7 @@ LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_con LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int LZ4_sizeofStreamStateHC(void); LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStreamStateHC(void* state, char* inputBuffer); +LZ4_DEPRECATED("use LZ4_compress_HC_continue_destSize() instead") int LZ4_compressHC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int compressionLevel); #if defined (__cplusplus) From 66b26a389f210548c49a69983ff4ffd2250bf0d0 Mon Sep 17 00:00:00 2001 From: remittor Date: Wed, 8 Mar 2017 11:13:28 +0300 Subject: [PATCH 4/6] tests: fuzzer: Add test for LZ4_compressHC_destSize --- tests/fuzzer.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b129c96..6da0ea5 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -32,6 +32,7 @@ # pragma warning(disable : 4310) /* disable: C4310: constant char value > 127 */ #endif +#define LZ4_DISABLE_DEPRECATE_WARNINGS /*-************************************ * Dependencies @@ -348,6 +349,41 @@ static int FUZ_test(U32 seed, U32 nbCycles, const U32 startCycle, const double c DISPLAYLEVEL(5, " \n"); } + /* Test compression HC destSize */ + FUZ_DISPLAYTEST; + { int srcSize = blockSize; + int const targetSize = srcSize * ((FUZ_rand(&randState) & 127)+1) >> 7; + char endCheck = FUZ_rand(&randState) & 255; + void * ctx = LZ4_createHC(block); + FUZ_CHECKTEST(ctx==NULL, "LZ4_createHC() allocation failed"); + compressedBuffer[targetSize] = endCheck; + ret = LZ4_compressHC_destSize(ctx, block, compressedBuffer, &srcSize, targetSize, compressionLevel); + LZ4_freeHC(ctx); + FUZ_CHECKTEST(ret > targetSize, "LZ4_compressHC_destSize() result larger than dst buffer !"); + FUZ_CHECKTEST(compressedBuffer[targetSize] != endCheck, "LZ4_compressHC_destSize() overwrite dst buffer !"); + FUZ_CHECKTEST(srcSize > blockSize, "LZ4_compressHC_destSize() fed more than src buffer !"); + DISPLAYLEVEL(5, "destSize : %7i/%7i; content%7i/%7i ", ret, targetSize, srcSize, blockSize); + if (targetSize>0) { + /* check correctness */ + U32 const crcBase = XXH32(block, srcSize, 0); + char const canary = FUZ_rand(&randState) & 255; + FUZ_CHECKTEST((ret==0), "LZ4_compressHC_destSize() compression failed"); + FUZ_DISPLAYTEST; + compressedSize = ret; + decodedBuffer[srcSize] = canary; + ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, srcSize); + FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe() failed on data compressed by LZ4_compressHC_destSize"); + FUZ_CHECKTEST(ret!=srcSize, "LZ4_decompress_safe() failed : did not fully decompressed data"); + FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe() overwrite dst buffer !"); + { U32 const crcDec = XXH32(decodedBuffer, srcSize, 0); + FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data"); } + + DISPLAYLEVEL(5, " OK \n"); + } + else + DISPLAYLEVEL(5, " \n"); + } + /* Test compression HC */ FUZ_DISPLAYTEST; ret = LZ4_compress_HC(block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel); @@ -688,7 +724,7 @@ static void FUZ_unitTests(int compressionLevel) crcOrig = XXH64(testInput, testCompressedSize, 0); LZ4_resetStream(&streamingState); result = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1); - FUZ_CHECKTEST(result==0, "LZ4_compress_limitedOutput_continue() compression failed"); + FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed!"); result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize); FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed"); @@ -718,7 +754,7 @@ static void FUZ_unitTests(int compressionLevel) memcpy (ringBuffer + rNext, testInput + iNext, messageSize); result = LZ4_compress_fast_continue(&streamingState, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize, 1); - FUZ_CHECKTEST(result==0, "LZ4_compress_limitedOutput_continue() compression failed"); + FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed"); result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize); FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed"); From baa155088b2700526cb97e97b6e2ce4199bf264c Mon Sep 17 00:00:00 2001 From: remittor Date: Wed, 8 Mar 2017 18:49:55 +0300 Subject: [PATCH 5/6] lz4hc: Fix LZ4HC_compress_hashChain for full support destSize variant --- lib/lz4hc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index abea72e..618ed8a 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -338,7 +338,7 @@ static int LZ4HC_compress_hashChain ( BYTE* optr = (BYTE*) dest; BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; + BYTE* oend = op + maxOutputSize; int ml, ml2, ml3, ml0; const BYTE* ref = NULL; @@ -355,6 +355,7 @@ static int LZ4HC_compress_hashChain ( if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ ctx->end += inputSize; + if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ ip++; @@ -372,7 +373,8 @@ static int LZ4HC_compress_hashChain ( _Search2: if (ip+ml < mflimit) ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts); - else ml2 = ml; + else + ml2 = ml; if (ml2 == ml) { /* No better match */ optr = op; @@ -504,6 +506,7 @@ _last_literals: lastRunSize = (size_t)(iend - anchor); /* literals */ litLength = (lastRunSize + 255 - RUN_MASK) / 255; totalSize = 1 + litLength + lastRunSize; + if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */ if (limit && (op + totalSize > oend)) { if (limit == limitedOutput) return 0; /* Check output limit */ /* adapt lastRunSize to fill 'dst' */ From 36842ebb1978a3e500572ead564e60e513c4ad0a Mon Sep 17 00:00:00 2001 From: remittor Date: Thu, 9 Mar 2017 12:19:24 +0300 Subject: [PATCH 6/6] lz4hc: Fix LZ4HC_compress_hashChain for backward compatibility --- lib/lz4hc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 618ed8a..290e575 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -351,7 +351,7 @@ static int LZ4HC_compress_hashChain ( /* init */ *srcSizePtr = 0; - if (limit && maxOutputSize < 1) return 0; /* Impossible to store anything */ + if (limit == limitedDestSize && maxOutputSize < 1) return 0; /* Impossible to store anything */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ ctx->end += inputSize; @@ -509,7 +509,7 @@ _last_literals: if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */ if (limit && (op + totalSize > oend)) { if (limit == limitedOutput) return 0; /* Check output limit */ - /* adapt lastRunSize to fill 'dst' */ + /* adapt lastRunSize to fill 'dest' */ lastRunSize = (size_t)(oend - op) - 1; litLength = (lastRunSize + 255 - RUN_MASK) / 255; lastRunSize -= litLength;