diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index db8d7a8c..642296fb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -922,6 +922,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl cctx->dictID = 0; if (params.ldmParams.enableLdm) ZSTD_window_clear(&cctx->ldmState.window); + ZSTD_referenceExternalSequences(cctx, NULL, 0); ZSTD_invalidateMatchState(&cctx->blockState.matchState); ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); XXH64_reset(&cctx->xxhState, 0); @@ -1108,6 +1109,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = zc->ldmState.bucketOffsets + ldmBucketSize; ZSTD_window_clear(&zc->ldmState.window); } + ZSTD_referenceExternalSequences(zc, NULL, 0); /* buffers */ zc->inBuffSize = buffInSize; @@ -1818,8 +1820,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ZSTD_matchState_t* const ms = &zc->blockState.matchState; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength); return 0; /* don't even attempt compression below a certain srcSize */ + } ZSTD_resetSeqStore(&(zc->seqStore)); /* limited update after a very long match */ diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 5c9c0d2b..aff9dd2b 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -536,6 +536,34 @@ size_t ZSTD_ldm_generateSequences( return 0; } +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + /** * If the sequence length is longer than remaining then the sequence is split * between this block and the next. @@ -546,51 +574,24 @@ size_t ZSTD_ldm_generateSequences( static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, U32 const remaining, U32 const minMatch) { - size_t const pos = rawSeqStore->pos; rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; assert(sequence.offset > 0); - /* Handle partial sequences */ + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ if (remaining <= sequence.litLength) { - /* Split the literals that we have out of the sequence. - * They will become the last literals of this block. - * The next block starts off with the remaining literals. - */ - rawSeqStore->seq[pos].litLength -= remaining; sequence.offset = 0; } else if (remaining < sequence.litLength + sequence.matchLength) { - /* Split the match up into two sequences. One in this block, and one - * in the next with no literals. If either match would be shorter - * than searchLength we omit it. - */ - U32 const matchPrefix = remaining - sequence.litLength; - U32 const matchSuffix = sequence.matchLength - matchPrefix; - - assert(remaining > sequence.litLength); - assert(matchPrefix < sequence.matchLength); - assert(matchPrefix + matchSuffix == sequence.matchLength); - /* Update the first sequence */ - sequence.matchLength = matchPrefix; - /* Update the second sequence */ - if (matchSuffix >= minMatch) { - /* Update the second sequence, since the suffix is long enough */ - rawSeqStore->seq[pos].litLength = 0; - rawSeqStore->seq[pos].matchLength = matchSuffix; - } else { - /* Omit the second sequence since the match suffix is too short. - * Add to the next sequences literals (if any). - */ - if (pos + 1 < rawSeqStore->size) - rawSeqStore->seq[pos + 1].litLength += matchSuffix; - rawSeqStore->pos++; /* Consume the sequence */ - } + sequence.matchLength = remaining - sequence.litLength; if (sequence.matchLength < minMatch) { - /* Skip the current sequence if it is too short */ sequence.offset = 0; } - } else { - /* No partial sequence */ - rawSeqStore->pos++; /* Consume the sequence */ } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); return sequence; } diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 9d2f7c39..84d3723c 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -65,6 +65,16 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, int const extDict); +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + /** ZSTD_ldm_initializeParameters() : * Initialize the long distance matching parameters to their default values. */