Merge pull request #1346 from terrelln/small-block-fse

[zstd] Fix resetting FSE tables
This commit is contained in:
Yann Collet 2018-09-28 17:34:27 -07:00 committed by GitHub
commit 918603c9c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 20 deletions

View File

@ -2349,13 +2349,15 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
const void* src, size_t srcSize)
{
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
size_t cSize;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)",
dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
return 0; /* don't even attempt compression below a certain srcSize */
cSize = 0;
goto out; /* don't even attempt compression below a certain srcSize */
}
ZSTD_resetSeqStore(&(zc->seqStore));
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
@ -2417,27 +2419,27 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
} }
/* encode sequences and literals */
{ size_t const cSize = ZSTD_compressSequences(&zc->seqStore,
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
dst, dstCapacity,
srcSize, zc->entropyWorkspace, zc->bmi2);
if (!ZSTD_isError(cSize) && cSize != 0) {
/* confirm repcodes and entropy tables */
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
zc->blockState.prevCBlock = zc->blockState.nextCBlock;
zc->blockState.nextCBlock = tmp;
}
cSize = ZSTD_compressSequences(&zc->seqStore,
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
dst, dstCapacity,
srcSize, zc->entropyWorkspace, zc->bmi2);
/* We check that dictionaries have offset codes available for the first
* block. After the first block, the offcode table might not have large
* enough codes to represent the offsets in the data.
*/
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
return cSize;
out:
if (!ZSTD_isError(cSize) && cSize != 0) {
/* confirm repcodes and entropy tables when emitting a compressed block */
ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
zc->blockState.prevCBlock = zc->blockState.nextCBlock;
zc->blockState.nextCBlock = tmp;
}
/* We check that dictionaries have offset codes available for the first
* block. After the first block, the offcode table might not have large
* enough codes to represent the offsets in the data.
*/
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
return cSize;
}

View File

@ -1073,6 +1073,44 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : dictionary + small blocks + reusing tables checks offset table validity: ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(
dictionary.start, dictionary.filled,
ZSTD_dlm_byRef, ZSTD_dct_fullDict,
ZSTD_getCParams(3, 0, dictionary.filled),
ZSTD_defaultCMem);
ZSTD_outBuffer out = {compressedBuffer, compressedBufferSize, 0};
int remainingInput = 256 * 1024;
int offset;
ZSTD_CCtx_reset(zc);
CHECK_Z(ZSTD_CCtx_resetParameters(zc));
CHECK_Z(ZSTD_CCtx_refCDict(zc, cdict));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_p_checksumFlag, 1));
/* Write a bunch of 6 byte blocks */
while (remainingInput > 0) {
char testBuffer[6] = "\xAA\xAA\xAA\xAA\xAA\xAA";
const size_t kSmallBlockSize = sizeof(testBuffer);
ZSTD_inBuffer in = {testBuffer, kSmallBlockSize, 0};
CHECK_Z(ZSTD_compress_generic(zc, &out, &in, ZSTD_e_flush));
CHECK(in.pos != in.size, "input not fully consumed");
remainingInput -= kSmallBlockSize;
}
/* Write several very long offset matches into the dictionary */
for (offset = 1024; offset >= 0; offset -= 128) {
ZSTD_inBuffer in = {dictionary.start + offset, 128, 0};
ZSTD_EndDirective flush = offset > 0 ? ZSTD_e_continue : ZSTD_e_end;
CHECK_Z(ZSTD_compress_generic(zc, &out, &in, flush));
CHECK(in.pos != in.size, "input not fully consumed");
}
/* Ensure decompression works */
CHECK_Z(ZSTD_decompress_usingDict(zd, decodedBuffer, CNBufferSize, out.dst, out.pos, dictionary.start, dictionary.filled));
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);