diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 7707c28b..741784fd 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, *nbSeqPtr = nbSeq; /* FSE table descriptors */ - RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 9910d3c5..df40862b 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -840,16 +840,16 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, { unsigned max = MaxLL; size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ assert(!HIST_isError(mostFrequent)); - if (mostFrequent == nbSeq) { - /* do RLE if we have the chance */ - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = set_rle; - } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + if (frame->stats.fseInit && !(RAND(seed) & 3) && isSymbolSubset(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35)) { /* maybe do repeat mode if we're allowed to */ LLtype = set_repeat; + } else if (mostFrequent == nbSeq) { + /* do RLE if we have the chance */ + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = set_rle; } else if (!(RAND(seed) & 3)) { /* maybe use the default distribution */ FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); @@ -872,14 +872,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, { unsigned max = MaxOff; size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ assert(!HIST_isError(mostFrequent)); - if (mostFrequent == nbSeq) { - *op++ = ofCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = set_rle; - } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + if (frame->stats.fseInit && !(RAND(seed) & 3) && isSymbolSubset(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28)) { Offtype = set_repeat; + } else if (mostFrequent == nbSeq) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = set_rle; } else if (!(RAND(seed) & 3)) { FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); Offtype = set_basic; @@ -900,14 +900,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, { unsigned max = MaxML; size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ assert(!HIST_isError(mostFrequent)); - if (mostFrequent == nbSeq) { - *op++ = *mlCodeTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = set_rle; - } else if (frame->stats.fseInit && !(RAND(seed) & 3) && + if (frame->stats.fseInit && !(RAND(seed) & 3) && isSymbolSubset(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52)) { MLtype = set_repeat; + } else if (mostFrequent == nbSeq) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = set_rle; } else if (!(RAND(seed) & 3)) { /* sometimes do default distribution */ FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 55c14ad5..97d4e33e 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1184,6 +1184,58 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + /* Small Sequence Section bug */ + DISPLAYLEVEL(3, "test%3i : decompress blocks with small sequences section : ", testNb++); + { /* This test consists of 3 blocks. Each block has one sequence. + The sequence has literal length of 10, match length of 10 and offset of 10. + The sequence value and compression mode for the blocks are following: + The order of values are ll, ml, of. + - First block : (10, 7, 13) (rle, rle, rle) + - size of sequences section: 6 bytes (1 byte for nbSeq, 1 byte for encoding mode, 3 bytes for rle, 1 byte bitstream) + - Second block : (10, 7, 1) (repeat, repeat, rle) + - size of sequences section: 4 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 bytes for rle, 1 byte bitstream) + - Third block : (10, 7, 1) (repeat, repeat, repeat) + - size of sequences section: 3 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 byte bitstream) */ + + unsigned char compressed[] = { + 0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x3c, 0x35, 0x01, 0x00, 0xf0, 0x85, 0x08, + 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b, + 0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, + 0x69, 0x94, 0x89, 0x1c, 0x03, 0x44, 0x0a, 0x07, 0x00, 0xb4, 0x04, 0x80, + 0x40, 0x0a, 0xa4 + }; + unsigned int compressedSize = 51; + unsigned char decompressed[] = { + 0x85, 0x08, 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x85, 0x08, + 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b, + 0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0x4c, 0x6b, 0xa9, 0x8b, 0xbc, 0xc5, + 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, + 0x89, 0x1c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, 0x89, 0x1c + }; + unsigned int decompressedSize = 60; + + ZSTD_DStream* const zds = ZSTD_createDStream(); + if (zds==NULL) goto _output_error; + + CHECK_Z( ZSTD_initDStream(zds) ); + inBuff.src = compressed; + inBuff.size = compressedSize; + inBuff.pos = 0; + outBuff.dst = decodedBuffer; + outBuff.size = CNBufferSize; + outBuff.pos = 0; + + CHECK(ZSTD_decompressStream(zds, &outBuff, &inBuff) != 0, + "Decompress did not reach the end of frame"); + CHECK(inBuff.pos != inBuff.size, "Decompress did not fully consume input"); + CHECK(outBuff.pos != decompressedSize, "Decompressed size does not match"); + CHECK(memcmp(outBuff.dst, decompressed, decompressedSize) != 0, + "Decompressed data does not match"); + + ZSTD_freeDStream(zds); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : dictionary + uncompressible block + reusing tables checks offset table validity: ", testNb++); { ZSTD_CDict* const cdict = ZSTD_createCDict_advanced( dictionary.start, dictionary.filled,