diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 46751537..66fb4827 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -983,6 +983,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ + params.ldmParams.windowLog = params.cParams.windowLog; ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); assert(params.ldmParams.hashEveryLog < 32); @@ -1067,6 +1068,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = zc->ldmState.hashTable + ldmHSize; zc->ldmSequences = (rawSeq*)ptr; ptr = zc->ldmSequences + maxNbLdmSeq; + + memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); } assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ @@ -1277,19 +1280,6 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const ZSTD_reduceTable_internal(table, size, reducerValue, 1); } - -/*! ZSTD_ldm_reduceTable() : - * reduce table indexes by `reducerValue` */ -static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, - U32 const reducerValue) -{ - U32 u; - for (u = 0; u < size; u++) { - if (table[u].offset < reducerValue) table[u].offset = 0; - else table[u].offset -= reducerValue; - } -} - /*! ZSTD_reduceIndex() : * rescale all indexes to avoid future overflow (indexes are U32) */ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) @@ -1311,11 +1301,6 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) U32 const h3Size = (U32)1 << ms->hashLog3; ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); } - - if (zc->appliedParams.ldmParams.enableLdm) { - U32 const ldmHSize = (U32)1 << zc->appliedParams.ldmParams.hashLog; - ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue); - } } @@ -1832,7 +1817,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (zc->appliedParams.ldmParams.enableLdm) { size_t const nbSeq = ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences, - ms, &zc->appliedParams.ldmParams, + &zc->appliedParams.ldmParams, src, srcSize, extDict); lastLLSize = ZSTD_ldm_blockCompress(zc->ldmSequences, nbSeq, @@ -2023,6 +2008,8 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!ZSTD_window_update(&ms->window, src, srcSize)) { ms->nextToUpdate = ms->window.dictLimit; } + if (cctx->appliedParams.ldmParams.enableLdm) + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize); { size_t const cSize = frame ? diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 98f866cd..92bd0c70 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -136,6 +136,7 @@ typedef struct { } ldmEntry_t; typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ ldmEntry_t* hashTable; BYTE* bucketOffsets; /* Next position in bucket to insert entry */ U64 hashPower; /* Used to compute the rolling hash. @@ -148,6 +149,7 @@ typedef struct { U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ U32 minMatchLength; /* Minimum match length */ U32 hashEveryLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ } ldmParams_t; struct ZSTD_CCtx_params_s { diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index a043556b..22f9b4a8 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -294,8 +294,8 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) } } -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldmState, rawSeq* sequences, ZSTD_matchState_t const* ms, +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeq* sequences, ldmParams_t const* params, void const* src, size_t srcSize, int const extDict) { @@ -308,10 +308,10 @@ size_t ZSTD_ldm_generateSequences( U32 const hashEveryLog = params->hashEveryLog; U32 const ldmTagMask = (1U << params->hashEveryLog) - 1; /* Prefix and extDict parameters */ - U32 const dictLimit = ms->window.dictLimit; - U32 const lowestIndex = extDict ? ms->window.lowLimit : dictLimit; - BYTE const* const base = ms->window.base; - BYTE const* const dictBase = extDict ? ms->window.dictBase : NULL; + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; BYTE const* const lowPrefixPtr = base + dictLimit; @@ -453,6 +453,62 @@ size_t ZSTD_ldm_generateSequences( return sequences - sequencesStart; } +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeq* sequences, + ldmParams_t const* params, void const* src, size_t srcSize, + int const extDict) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t nbSeq = 0; + size_t chunk; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + for (chunk = 0; chunk < nbChunks; ++chunk) { + size_t const chunkStart = chunk * kMaxChunkSize; + size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize); + size_t const chunkSize = chunkEnd - chunkStart; + + assert(chunkStart < srcSize); + if (ZSTD_window_needOverflowCorrection(ldmState->window)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, src); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + } + /* kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd, + maxDist); + nbSeq += ZSTD_ldm_generateSequences_internal( + ldmState, sequences + nbSeq, params, istart + chunkStart, chunkSize, + extDict); + } + return nbSeq; +} + #if 0 /** * If the sequence length is longer than remaining then the sequence is split diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index a836033a..d9219a18 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -29,13 +29,15 @@ extern "C" { * * Generates the sequences using the long distance match finder. * The sequences completely parse a prefix of the source, but leave off the last - * literals. Returns the number of sequences generated into `sequences`. + * literals. Returns the number of sequences generated into `sequences`. The + * user must have called ZSTD_window_update() for all of the input they have, + * even if they pass it to ZSTD_ldm_generateSequences() in chunks. * * NOTE: The source may be any size, assuming it doesn't overflow the hash table * indices, and the output sequences table is large enough.. */ size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeq* sequences, ZSTD_matchState_t const* ms, + ldmState_t* ldms, rawSeq* sequences, ldmParams_t const* params, void const* src, size_t srcSize, int const extDict);