Expose reference external sequence API
Summary: * Expose the reference external sequences API for zstdmt. Allows external sequences of any length, which get split when necessary. * Reset the LDM window when the context is reset. * Store the maximum number of LDM sequences. * Sequence generation now returns the number of last literals. * Fix sequence generation to not throw out the last literals when blocks of more than 1 MB are encountered. Expose reference external sequence API * Expose the reference external sequences API for zstdmt. * Allows external sequences of any length, which get split when necessary. * Reset the LDM window when the context is reset. * Store the maximum number of LDM sequences. * Sequence generation now returns the number of last literals. * Fix sequence generation to not throw out the last literals when blocks of more than 1 MB are encountered. Test Plan: * CI * Test the zstdmt ldm integration stacked on top of this diff Reviewers: cyan Differential Revision: https://phabricator.intern.facebook.com/D7283968 Tasks: T25664120
This commit is contained in:
commit
a271399c97
@ -1922,7 +1922,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
|
||||
if (remaining < blockSize) blockSize = remaining;
|
||||
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window)) {
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
|
||||
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
|
@ -468,13 +468,12 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
/*-*************************************
|
||||
* Round buffer management
|
||||
***************************************/
|
||||
|
||||
#define ZSTD_LOWLIMIT_MAX (3U << 29) /* Max lowLimit allowed */
|
||||
/* Max current allowed */
|
||||
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
|
||||
/* Maximum chunk size before overflow correction needs to be called again */
|
||||
#define ZSTD_CHUNKSIZE_MAX \
|
||||
( ((U32)-1) /* Maximum ending current index */ \
|
||||
- (1U << ZSTD_WINDOWLOG_MAX) /* Max distance from lowLimit to current */ \
|
||||
- ZSTD_LOWLIMIT_MAX) /* Maximum beginning lowLimit */
|
||||
#define ZSTD_CHUNKSIZE_MAX \
|
||||
( ((U32)-1) /* Maximum ending current index */ \
|
||||
- ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
|
||||
|
||||
/**
|
||||
* ZSTD_window_clear():
|
||||
@ -503,9 +502,11 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
|
||||
* Returns non-zero if the indices are getting too large and need overflow
|
||||
* protection.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window)
|
||||
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
||||
void const* srcEnd)
|
||||
{
|
||||
return window.lowLimit > ZSTD_LOWLIMIT_MAX;
|
||||
U32 const current = (U32)((BYTE const*)srcEnd - window.base);
|
||||
return current > ZSTD_CURRENT_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -473,6 +473,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
{
|
||||
U32 const maxDist = 1U << params->windowLog;
|
||||
BYTE const* const istart = (BYTE const*)src;
|
||||
BYTE const* const iend = istart + srcSize;
|
||||
size_t const kMaxChunkSize = 1 << 20;
|
||||
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
|
||||
size_t chunk;
|
||||
@ -489,15 +490,17 @@ size_t ZSTD_ldm_generateSequences(
|
||||
assert(sequences->pos <= sequences->size);
|
||||
assert(sequences->size <= sequences->capacity);
|
||||
for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
|
||||
size_t const chunkStart = chunk * kMaxChunkSize;
|
||||
size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize);
|
||||
BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
|
||||
size_t const remaining = (size_t)(iend - chunkStart);
|
||||
BYTE const *const chunkEnd =
|
||||
(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
|
||||
size_t const chunkSize = chunkEnd - chunkStart;
|
||||
size_t newLeftoverSize;
|
||||
size_t const prevSize = sequences->size;
|
||||
|
||||
assert(chunkStart < srcSize);
|
||||
assert(chunkStart < iend);
|
||||
/* 1. Perform overflow correction if necessary. */
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window)) {
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
||||
U32 const ldmHSize = 1U << params->hashLog;
|
||||
U32 const correction = ZSTD_window_correctOverflow(
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, src);
|
||||
@ -511,12 +514,10 @@ size_t ZSTD_ldm_generateSequences(
|
||||
* * Try invalidation after the sequence generation and test the
|
||||
* the offset against maxDist directly.
|
||||
*/
|
||||
ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd,
|
||||
maxDist);
|
||||
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist);
|
||||
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
||||
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
||||
ldmState, sequences, params, istart + chunkStart,
|
||||
chunkSize);
|
||||
ldmState, sequences, params, chunkStart, chunkSize);
|
||||
if (ZSTD_isError(newLeftoverSize))
|
||||
return newLeftoverSize;
|
||||
/* 4. We add the leftover literals from previous iterations to the first
|
||||
|
@ -394,6 +394,21 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3d : large window log smaller data : ", testNb++);
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_parameters params = ZSTD_getParams(1, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
||||
size_t const nbCompressions = (1U << 31) / CNBuffSize + 1;
|
||||
size_t i;
|
||||
params.fParams.contentSizeFlag = 0;
|
||||
params.cParams.windowLog = ZSTD_WINDOWLOG_MAX;
|
||||
for (i = 0; i < nbCompressions; ++i) {
|
||||
CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN) ); /* re-use same parameters */
|
||||
CHECK_Z( ZSTD_compressEnd(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize) );
|
||||
}
|
||||
ZSTD_freeCCtx(cctx);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
/* Static CCtx tests */
|
||||
#define STATIC_CCTX_LEVEL 3
|
||||
DISPLAYLEVEL(3, "test%3i : create static CCtx for level %u :", testNb++, STATIC_CCTX_LEVEL);
|
||||
|
Loading…
Reference in New Issue
Block a user