Fix external sequence corner cases

* Clear external sequences when we reset the `ZSTD_CCtx`.
* Skip external sequences when a block is too small to compress.
This commit is contained in:
Nick Terrell 2018-03-20 14:31:43 -07:00
parent d19f803a3b
commit 136b9e2392
3 changed files with 52 additions and 37 deletions

View File

@ -922,6 +922,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
cctx->dictID = 0;
if (params.ldmParams.enableLdm)
ZSTD_window_clear(&cctx->ldmState.window);
ZSTD_referenceExternalSequences(cctx, NULL, 0);
ZSTD_invalidateMatchState(&cctx->blockState.matchState);
ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
XXH64_reset(&cctx->xxhState, 0);
@ -1108,6 +1109,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
ZSTD_window_clear(&zc->ldmState.window);
}
ZSTD_referenceExternalSequences(zc, NULL, 0);
/* buffers */
zc->inBuffSize = buffInSize;
@ -1818,8 +1820,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
return 0; /* don't even attempt compression below a certain srcSize */
}
ZSTD_resetSeqStore(&(zc->seqStore));
/* limited update after a very long match */

View File

@ -536,6 +536,34 @@ size_t ZSTD_ldm_generateSequences(
return 0;
}
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
if (srcSize <= seq->litLength) {
/* Skip past srcSize literals */
seq->litLength -= srcSize;
return;
}
srcSize -= seq->litLength;
seq->litLength = 0;
if (srcSize < seq->matchLength) {
/* Skip past the first srcSize of the match */
seq->matchLength -= srcSize;
if (seq->matchLength < minMatch) {
/* The match is too short, omit it */
if (rawSeqStore->pos + 1 < rawSeqStore->size) {
seq[1].litLength += seq[0].matchLength;
}
rawSeqStore->pos++;
}
return;
}
srcSize -= seq->matchLength;
seq->matchLength = 0;
rawSeqStore->pos++;
}
}
/**
* If the sequence length is longer than remaining then the sequence is split
* between this block and the next.
@ -546,51 +574,24 @@ size_t ZSTD_ldm_generateSequences(
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
U32 const remaining, U32 const minMatch)
{
size_t const pos = rawSeqStore->pos;
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
assert(sequence.offset > 0);
/* Handle partial sequences */
/* Likely: No partial sequence */
if (remaining >= sequence.litLength + sequence.matchLength) {
rawSeqStore->pos++;
return sequence;
}
/* Cut the sequence short (offset == 0 ==> rest is literals). */
if (remaining <= sequence.litLength) {
/* Split the literals that we have out of the sequence.
* They will become the last literals of this block.
* The next block starts off with the remaining literals.
*/
rawSeqStore->seq[pos].litLength -= remaining;
sequence.offset = 0;
} else if (remaining < sequence.litLength + sequence.matchLength) {
/* Split the match up into two sequences. One in this block, and one
* in the next with no literals. If either match would be shorter
* than searchLength we omit it.
*/
U32 const matchPrefix = remaining - sequence.litLength;
U32 const matchSuffix = sequence.matchLength - matchPrefix;
assert(remaining > sequence.litLength);
assert(matchPrefix < sequence.matchLength);
assert(matchPrefix + matchSuffix == sequence.matchLength);
/* Update the first sequence */
sequence.matchLength = matchPrefix;
/* Update the second sequence */
if (matchSuffix >= minMatch) {
/* Update the second sequence, since the suffix is long enough */
rawSeqStore->seq[pos].litLength = 0;
rawSeqStore->seq[pos].matchLength = matchSuffix;
} else {
/* Omit the second sequence since the match suffix is too short.
* Add to the next sequences literals (if any).
*/
if (pos + 1 < rawSeqStore->size)
rawSeqStore->seq[pos + 1].litLength += matchSuffix;
rawSeqStore->pos++; /* Consume the sequence */
}
sequence.matchLength = remaining - sequence.litLength;
if (sequence.matchLength < minMatch) {
/* Skip the current sequence if it is too short */
sequence.offset = 0;
}
} else {
/* No partial sequence */
rawSeqStore->pos++; /* Consume the sequence */
}
/* Skip past `remaining` bytes for the future sequences. */
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
return sequence;
}

View File

@ -65,6 +65,16 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
int const extDict);
/**
* ZSTD_ldm_skipSequences():
*
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
* Avoids emitting matches less than `minMatch` bytes.
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
*/
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
U32 const minMatch);
/** ZSTD_ldm_initializeParameters() :
* Initialize the long distance matching parameters to their default values. */