Fix external sequence corner cases
* Clear external sequences when we reset the `ZSTD_CCtx`. * Skip external sequences when a block is too small to compress.
This commit is contained in:
parent
d19f803a3b
commit
136b9e2392
@ -922,6 +922,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
|
||||
cctx->dictID = 0;
|
||||
if (params.ldmParams.enableLdm)
|
||||
ZSTD_window_clear(&cctx->ldmState.window);
|
||||
ZSTD_referenceExternalSequences(cctx, NULL, 0);
|
||||
ZSTD_invalidateMatchState(&cctx->blockState.matchState);
|
||||
ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
|
||||
XXH64_reset(&cctx->xxhState, 0);
|
||||
@ -1108,6 +1109,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
|
||||
ZSTD_window_clear(&zc->ldmState.window);
|
||||
}
|
||||
ZSTD_referenceExternalSequences(zc, NULL, 0);
|
||||
|
||||
/* buffers */
|
||||
zc->inBuffSize = buffInSize;
|
||||
@ -1818,8 +1820,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||
(U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
|
||||
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
|
||||
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
|
||||
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
|
||||
return 0; /* don't even attempt compression below a certain srcSize */
|
||||
}
|
||||
ZSTD_resetSeqStore(&(zc->seqStore));
|
||||
|
||||
/* limited update after a very long match */
|
||||
|
@ -536,6 +536,34 @@ size_t ZSTD_ldm_generateSequences(
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
||||
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
||||
if (srcSize <= seq->litLength) {
|
||||
/* Skip past srcSize literals */
|
||||
seq->litLength -= srcSize;
|
||||
return;
|
||||
}
|
||||
srcSize -= seq->litLength;
|
||||
seq->litLength = 0;
|
||||
if (srcSize < seq->matchLength) {
|
||||
/* Skip past the first srcSize of the match */
|
||||
seq->matchLength -= srcSize;
|
||||
if (seq->matchLength < minMatch) {
|
||||
/* The match is too short, omit it */
|
||||
if (rawSeqStore->pos + 1 < rawSeqStore->size) {
|
||||
seq[1].litLength += seq[0].matchLength;
|
||||
}
|
||||
rawSeqStore->pos++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
srcSize -= seq->matchLength;
|
||||
seq->matchLength = 0;
|
||||
rawSeqStore->pos++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the sequence length is longer than remaining then the sequence is split
|
||||
* between this block and the next.
|
||||
@ -546,51 +574,24 @@ size_t ZSTD_ldm_generateSequences(
|
||||
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
||||
U32 const remaining, U32 const minMatch)
|
||||
{
|
||||
size_t const pos = rawSeqStore->pos;
|
||||
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
|
||||
assert(sequence.offset > 0);
|
||||
/* Handle partial sequences */
|
||||
/* Likely: No partial sequence */
|
||||
if (remaining >= sequence.litLength + sequence.matchLength) {
|
||||
rawSeqStore->pos++;
|
||||
return sequence;
|
||||
}
|
||||
/* Cut the sequence short (offset == 0 ==> rest is literals). */
|
||||
if (remaining <= sequence.litLength) {
|
||||
/* Split the literals that we have out of the sequence.
|
||||
* They will become the last literals of this block.
|
||||
* The next block starts off with the remaining literals.
|
||||
*/
|
||||
rawSeqStore->seq[pos].litLength -= remaining;
|
||||
sequence.offset = 0;
|
||||
} else if (remaining < sequence.litLength + sequence.matchLength) {
|
||||
/* Split the match up into two sequences. One in this block, and one
|
||||
* in the next with no literals. If either match would be shorter
|
||||
* than searchLength we omit it.
|
||||
*/
|
||||
U32 const matchPrefix = remaining - sequence.litLength;
|
||||
U32 const matchSuffix = sequence.matchLength - matchPrefix;
|
||||
|
||||
assert(remaining > sequence.litLength);
|
||||
assert(matchPrefix < sequence.matchLength);
|
||||
assert(matchPrefix + matchSuffix == sequence.matchLength);
|
||||
/* Update the first sequence */
|
||||
sequence.matchLength = matchPrefix;
|
||||
/* Update the second sequence */
|
||||
if (matchSuffix >= minMatch) {
|
||||
/* Update the second sequence, since the suffix is long enough */
|
||||
rawSeqStore->seq[pos].litLength = 0;
|
||||
rawSeqStore->seq[pos].matchLength = matchSuffix;
|
||||
} else {
|
||||
/* Omit the second sequence since the match suffix is too short.
|
||||
* Add to the next sequences literals (if any).
|
||||
*/
|
||||
if (pos + 1 < rawSeqStore->size)
|
||||
rawSeqStore->seq[pos + 1].litLength += matchSuffix;
|
||||
rawSeqStore->pos++; /* Consume the sequence */
|
||||
}
|
||||
sequence.matchLength = remaining - sequence.litLength;
|
||||
if (sequence.matchLength < minMatch) {
|
||||
/* Skip the current sequence if it is too short */
|
||||
sequence.offset = 0;
|
||||
}
|
||||
} else {
|
||||
/* No partial sequence */
|
||||
rawSeqStore->pos++; /* Consume the sequence */
|
||||
}
|
||||
/* Skip past `remaining` bytes for the future sequences. */
|
||||
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
|
||||
return sequence;
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,16 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
|
||||
int const extDict);
|
||||
|
||||
/**
|
||||
* ZSTD_ldm_skipSequences():
|
||||
*
|
||||
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
|
||||
* Avoids emitting matches less than `minMatch` bytes.
|
||||
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
|
||||
*/
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
|
||||
U32 const minMatch);
|
||||
|
||||
|
||||
/** ZSTD_ldm_initializeParameters() :
|
||||
* Initialize the long distance matching parameters to their default values. */
|
||||
|
Loading…
Reference in New Issue
Block a user