Merge pull request #1664 from ephiepark/dev

decodecorpus
This commit is contained in:
Yann Collet 2019-07-01 14:13:49 -07:00 committed by GitHub
commit 4d611ca405
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 17 deletions

View File

@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
*nbSeqPtr = nbSeq;
/* FSE table descriptors */
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);

View File

@ -840,16 +840,16 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
{ unsigned max = MaxLL;
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
assert(!HIST_isError(mostFrequent));
if (mostFrequent == nbSeq) {
/* do RLE if we have the chance */
*op++ = llCodeTable[0];
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
LLtype = set_rle;
} else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
if (frame->stats.fseInit && !(RAND(seed) & 3) &&
isSymbolSubset(llCodeTable, nbSeq,
frame->stats.litlengthSymbolSet, 35)) {
/* maybe do repeat mode if we're allowed to */
LLtype = set_repeat;
} else if (mostFrequent == nbSeq) {
/* do RLE if we have the chance */
*op++ = llCodeTable[0];
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
LLtype = set_rle;
} else if (!(RAND(seed) & 3)) {
/* maybe use the default distribution */
FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@ -872,14 +872,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
{ unsigned max = MaxOff;
size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
assert(!HIST_isError(mostFrequent));
if (mostFrequent == nbSeq) {
*op++ = ofCodeTable[0];
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
Offtype = set_rle;
} else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
if (frame->stats.fseInit && !(RAND(seed) & 3) &&
isSymbolSubset(ofCodeTable, nbSeq,
frame->stats.offsetSymbolSet, 28)) {
Offtype = set_repeat;
} else if (mostFrequent == nbSeq) {
*op++ = ofCodeTable[0];
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
Offtype = set_rle;
} else if (!(RAND(seed) & 3)) {
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Offtype = set_basic;
@ -900,14 +900,14 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
{ unsigned max = MaxML;
size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
assert(!HIST_isError(mostFrequent));
if (mostFrequent == nbSeq) {
*op++ = *mlCodeTable;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
MLtype = set_rle;
} else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
if (frame->stats.fseInit && !(RAND(seed) & 3) &&
isSymbolSubset(mlCodeTable, nbSeq,
frame->stats.matchlengthSymbolSet, 52)) {
MLtype = set_repeat;
} else if (mostFrequent == nbSeq) {
*op++ = *mlCodeTable;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
MLtype = set_rle;
} else if (!(RAND(seed) & 3)) {
/* sometimes do default distribution */
FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));

View File

@ -1184,6 +1184,58 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
/* Small Sequence Section bug */
DISPLAYLEVEL(3, "test%3i : decompress blocks with small sequences section : ", testNb++);
{ /* This test consists of 3 blocks. Each block has one sequence.
The sequence has literal length of 10, match length of 10 and offset of 10.
The sequence value and compression mode for the blocks are following:
The order of values are ll, ml, of.
- First block : (10, 7, 13) (rle, rle, rle)
- size of sequences section: 6 bytes (1 byte for nbSeq, 1 byte for encoding mode, 3 bytes for rle, 1 byte bitstream)
- Second block : (10, 7, 1) (repeat, repeat, rle)
- size of sequences section: 4 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 bytes for rle, 1 byte bitstream)
- Third block : (10, 7, 1) (repeat, repeat, repeat)
- size of sequences section: 3 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 byte bitstream) */
unsigned char compressed[] = {
0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x3c, 0x35, 0x01, 0x00, 0xf0, 0x85, 0x08,
0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac,
0x69, 0x94, 0x89, 0x1c, 0x03, 0x44, 0x0a, 0x07, 0x00, 0xb4, 0x04, 0x80,
0x40, 0x0a, 0xa4
};
unsigned int compressedSize = 51;
unsigned char decompressed[] = {
0x85, 0x08, 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x85, 0x08,
0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0x4c, 0x6b, 0xa9, 0x8b, 0xbc, 0xc5,
0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94,
0x89, 0x1c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, 0x89, 0x1c
};
unsigned int decompressedSize = 60;
ZSTD_DStream* const zds = ZSTD_createDStream();
if (zds==NULL) goto _output_error;
CHECK_Z( ZSTD_initDStream(zds) );
inBuff.src = compressed;
inBuff.size = compressedSize;
inBuff.pos = 0;
outBuff.dst = decodedBuffer;
outBuff.size = CNBufferSize;
outBuff.pos = 0;
CHECK(ZSTD_decompressStream(zds, &outBuff, &inBuff) != 0,
"Decompress did not reach the end of frame");
CHECK(inBuff.pos != inBuff.size, "Decompress did not fully consume input");
CHECK(outBuff.pos != decompressedSize, "Decompressed size does not match");
CHECK(memcmp(outBuff.dst, decompressed, decompressedSize) != 0,
"Decompressed data does not match");
ZSTD_freeDStream(zds);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : dictionary + uncompressible block + reusing tables checks offset table validity: ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(
dictionary.start, dictionary.filled,