From 4ba85344e3205ab77e8f7b18a858c9eee48418bb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 7 Mar 2016 20:01:45 +0100 Subject: [PATCH 1/3] added test to generate lots of small sequences (3-bytes) --- programs/fuzzer.c | 68 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 7cbfd794..50d496b9 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -196,22 +196,6 @@ static int basicUnitTests(U32 seed, double compressibility) if (result != (size_t)-ZSTD_error_srcSize_wrong) goto _output_error; DISPLAYLEVEL(4, "OK \n"); - /* All zeroes test (#137 verif) */ - #define ZEROESLENGTH 100 - DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); - memset(CNBuffer, 0, ZEROESLENGTH); - result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1); - if (ZSTD_isError(result)) goto _output_error; - cSize = result; - DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100); - - DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); - result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize); - if (ZSTD_isError(result)) goto _output_error; - if (result != ZEROESLENGTH) goto _output_error; - DISPLAYLEVEL(4, "OK \n"); - - /* Dictionary and Duplication tests */ { ZSTD_CCtx* ctxOrig = ZSTD_createCCtx(); @@ -344,6 +328,58 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); } + /* All zeroes test (#137 verif) */ + #define ZEROESLENGTH 100 + DISPLAYLEVEL(4, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); + memset(CNBuffer, 0, ZEROESLENGTH); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(ZEROESLENGTH), CNBuffer, ZEROESLENGTH, 1); + if (ZSTD_isError(result)) goto _output_error; + cSize = result; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/ZEROESLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress %u zeroes : ", testNb++, ZEROESLENGTH); + result = ZSTD_decompress(decodedBuffer, ZEROESLENGTH, compressedBuffer, cSize); + if (ZSTD_isError(result)) goto _output_error; + if (result != ZEROESLENGTH) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + + /* nbSeq limit test */ + { + #define _3BYTESTESTLENGTH 131000 + #define NB3BYTESSEQLOG 9 + #define NB3BYTESSEQ (1 << NB3BYTESSEQLOG) + #define NB3BYTESSEQMASK (NB3BYTESSEQ-1) + BYTE _3BytesSeqs[NB3BYTESSEQ][3]; + U32 r = 1; + int i; + + for (i=0; i < NB3BYTESSEQ; i++) { + _3BytesSeqs[i][0] = (BYTE)(FUZ_rand(&r) & 255); + _3BytesSeqs[i][1] = (BYTE)(FUZ_rand(&r) & 255); + _3BytesSeqs[i][2] = (BYTE)(FUZ_rand(&r) & 255); + } + + for (i=0; i < _3BYTESTESTLENGTH; ){ + U32 id = FUZ_rand(&r) & NB3BYTESSEQMASK; + ((BYTE*)CNBuffer)[i+0] = _3BytesSeqs[id][0]; + ((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1]; + ((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2]; + i += 3; + } + + DISPLAYLEVEL(4, "test%3i : compress lots 3-bytes sequences : ", testNb++); + result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH), CNBuffer, _3BYTESTESTLENGTH, 19); + if (ZSTD_isError(result)) goto _output_error; + cSize = result; + DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/_3BYTESTESTLENGTH*100); + + DISPLAYLEVEL(4, "test%3i : decompress lots 3-bytes sequence : ", testNb++); + result = ZSTD_decompress(decodedBuffer, _3BYTESTESTLENGTH, compressedBuffer, cSize); + if (ZSTD_isError(result)) goto _output_error; + if (result != _3BYTESTESTLENGTH) goto _output_error; + DISPLAYLEVEL(4, "OK \n"); + } + _end: free(CNBuffer); free(compressedBuffer); From 370b08e84041845f8ae7845aaad65b90cb6541f7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 Mar 2016 00:03:59 +0100 Subject: [PATCH 2/3] fix compression ratio for blocks <= 16 KB --- lib/zstd_compress.c | 52 ++++++++++++++++++++++--------------------- lib/zstd_decompress.c | 3 ++- programs/fuzzer.c | 1 + 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 4858831e..a5127b74 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -147,13 +147,14 @@ void ZSTD_validateParams(ZSTD_parameters* params) const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); const U32 searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; const U32 searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; + const U32 hashLog3Min = (params->strategy == ZSTD_btopt) ? ZSTD_HASHLOG3_MIN : 0; /* validate params */ if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */ CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMP(params->hashLog3, ZSTD_HASHLOG3_MIN, ZSTD_HASHLOG3_MAX); + CLAMP(params->hashLog3, hashLog3Min, ZSTD_HASHLOG3_MAX); CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMP(params->searchLength, searchLengthMin, searchLengthMax); CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); @@ -174,10 +175,11 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, { /* note : params considered validated here */ const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); /* reserve table memory */ - const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); - const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) - + ((params.strategy == ZSTD_btopt) ? ((1<workSpaceSize < neededSpace) { free(zc->workSpace); @@ -204,7 +206,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart) + blockSize; + zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2)); zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2); zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); @@ -227,9 +229,9 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } -/*! ZSTD_copyCCtx -* Duplicate an existing context @srcCCtx into another one @dstCCtx. -* Only works during stage 0 (i.e. before first call to ZSTD_compressContinue()) +/*! ZSTD_copyCCtx() : +* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. +* Only works during stage 0 (i.e. before first call to ZSTD_compressContinue()). * @return : 0, or an error code */ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) { @@ -2363,7 +2365,7 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 17, 18, 17, 0, 7, 4,128, ZSTD_btopt }, /* level 16 */ { 0, 17, 18, 17, 0, 8, 4,128, ZSTD_btopt }, /* level 17 */ { 0, 17, 18, 17, 0, 8, 4,256, ZSTD_btopt }, /* level 18 */ - { 0, 17, 18, 17, 0, 9, 4,256, ZSTD_btopt }, /* level 19 */ + { 0, 17, 18, 17, 16, 9, 3,256, ZSTD_btopt }, /* level 19 */ { 0, 17, 18, 17, 0, 10, 4,512, ZSTD_btopt }, /* level 20 */ { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21 */ { 0, 17, 18, 17, 0, 11, 4,512, ZSTD_btopt }, /* level 21-2 */ @@ -2384,22 +2386,22 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 14, 14, 14, 0, 6, 4, 4, ZSTD_lazy2 }, /* level 7.*/ { 0, 14, 14, 14, 0, 7, 4, 4, ZSTD_lazy2 }, /* level 8.*/ { 0, 14, 15, 14, 0, 6, 4, 4, ZSTD_btlazy2 }, /* level 9.*/ - { 0, 14, 15, 14, 0, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ - { 0, 14, 15, 14, 0, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ - { 0, 14, 15, 14, 0, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ - { 0, 14, 15, 14, 0, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ - { 0, 14, 15, 15, 0, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ - { 0, 14, 15, 15, 0, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ - { 0, 14, 15, 15, 0, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ - { 0, 14, 15, 15, 0, 6, 3,128, ZSTD_btopt }, /* level 17.*/ - { 0, 14, 15, 15, 0, 6, 3,256, ZSTD_btopt }, /* level 18.*/ - { 0, 14, 15, 15, 0, 7, 3,256, ZSTD_btopt }, /* level 19.*/ - { 0, 14, 15, 15, 0, 8, 3,256, ZSTD_btopt }, /* level 20.*/ - { 0, 14, 15, 15, 0, 9, 3,256, ZSTD_btopt }, /* level 21.*/ - { 0, 14, 15, 15, 0, 10, 3,256, ZSTD_btopt }, /* level 22.*/ - { 0, 14, 15, 15, 0, 11, 3,256, ZSTD_btopt }, /* level 23.*/ - { 0, 14, 15, 15, 0, 12, 3,256, ZSTD_btopt }, /* level 24.*/ - { 0, 14, 15, 15, 0, 13, 3,256, ZSTD_btopt }, /* level 25.*/ + { 0, 14, 15, 14, 16, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 0, 14, 15, 14, 16, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 0, 14, 15, 14, 16, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 0, 14, 15, 14, 16, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 0, 14, 15, 15, 16, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 0, 14, 15, 15, 16, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 0, 14, 15, 15, 16, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 0, 14, 15, 15, 16, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 0, 14, 15, 15, 16, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 0, 14, 15, 15, 16, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 0, 14, 15, 15, 16, 8, 3,256, ZSTD_btopt }, /* level 20.*/ + { 0, 14, 15, 15, 16, 9, 3,256, ZSTD_btopt }, /* level 21.*/ + { 0, 14, 15, 15, 16, 10, 3,256, ZSTD_btopt }, /* level 22.*/ + { 0, 14, 15, 15, 16, 11, 3,256, ZSTD_btopt }, /* level 23.*/ + { 0, 14, 15, 15, 16, 12, 3,256, ZSTD_btopt }, /* level 24.*/ + { 0, 14, 15, 15, 16, 13, 3,256, ZSTD_btopt }, /* level 25.*/ }, }; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index c4338bcc..844aa976 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -822,7 +822,8 @@ static size_t ZSTD_decompressSequences( nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) + return oneSeqSize; op += oneSeqSize; } diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 50d496b9..6d57080d 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -344,6 +344,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); /* nbSeq limit test */ + if (0) { #define _3BYTESTESTLENGTH 131000 #define NB3BYTESSEQLOG 9 From dd54bbc184c7b260a7ed419fbbc248e03afd9942 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 Mar 2016 02:35:34 +0100 Subject: [PATCH 3/3] Fixed large NbSeq > 32 K Added a test in Fuzzer to check NbSeq > 32 K --- lib/zstd_compress.c | 19 +++++++++++-------- programs/fuzzer.c | 3 +-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index a5127b74..50979ce5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -165,8 +165,8 @@ void ZSTD_validateParams(ZSTD_parameters* params) U32 srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1; if (params->windowLog > srcLog) params->windowLog = srcLog; } - if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ + if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; /* <= ZSTD_CONTENTLOG_MAX */ } @@ -176,9 +176,12 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(BLOCKSIZE, (size_t)1 << params.windowLog); /* reserve table memory */ const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog; + const U32 divider = (params.searchLength==3) ? 3 : 4; + const size_t maxNbSeq = blockSize / divider; + const size_t tokenSpace = blockSize + 8*maxNbSeq; const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32); const size_t optSpace = ((1<workSpaceSize < neededSpace) { @@ -206,13 +209,13 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2)); - zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2); + zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; - zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2); - zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2); + zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + maxNbSeq; + zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + (blockSize>>2))); + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<