[zstd] Fix data corruption in niche use case
* Extract the overflow correction into a helper function. * Load the dictionary `ZSTD_CHUNKSIZE_MAX = 512 MB` bytes at a time and overflow correct between each chunk. Data corruption could happen when all these conditions are true: * You are using multithreading mode * Your overlap size is >= 512 MB (implies window size >= 512 MB) * You are using a strategy >= ZSTD_btlazy * You are compressing more than 4 GB The problem is that when loading a large dictionary we don't do overflow correction. We can only load 512 MB at a time, and may need to do overflow correction before each chunk.
This commit is contained in:
parent
4156060ca4
commit
674534a700
@ -1826,16 +1826,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const
|
||||
|
||||
/*! ZSTD_reduceIndex() :
|
||||
* rescale all indexes to avoid future overflow (indexes are U32) */
|
||||
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
|
||||
{
|
||||
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
|
||||
{ U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
|
||||
{ U32 const hSize = (U32)1 << params->cParams.hashLog;
|
||||
ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
|
||||
}
|
||||
|
||||
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
|
||||
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
|
||||
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
|
||||
if (params->cParams.strategy != ZSTD_fast) {
|
||||
U32 const chainSize = (U32)1 << params->cParams.chainLog;
|
||||
if (params->cParams.strategy == ZSTD_btlazy2)
|
||||
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
|
||||
else
|
||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||
@ -2821,6 +2820,25 @@ out:
|
||||
}
|
||||
|
||||
|
||||
static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
|
||||
{
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
|
||||
U32 const maxDist = (U32)1 << params->cParams.windowLog;
|
||||
U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
||||
ZSTD_reduceIndex(ms, params, correction);
|
||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||
else ms->nextToUpdate -= correction;
|
||||
/* invalidate dictionaries on overflow correction */
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->dictMatchState = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! ZSTD_compress_frameChunk() :
|
||||
* Compress a chunk of data into one or multiple blocks.
|
||||
* All blocks will be terminated, all input will be consumed.
|
||||
@ -2854,20 +2872,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
||||
"not enough space to store compressed block");
|
||||
if (remaining < blockSize) blockSize = remaining;
|
||||
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
|
||||
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
||||
ZSTD_reduceIndex(cctx, correction);
|
||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||
else ms->nextToUpdate -= correction;
|
||||
/* invalidate dictionaries on overflow correction */
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->dictMatchState = NULL;
|
||||
}
|
||||
|
||||
ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
|
||||
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||
|
||||
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
||||
@ -3007,18 +3012,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
||||
|
||||
if (!frame) {
|
||||
/* overflow check and correction for block mode */
|
||||
if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
|
||||
U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
|
||||
U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
|
||||
ZSTD_reduceIndex(cctx, correction);
|
||||
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
|
||||
else ms->nextToUpdate -= correction;
|
||||
ms->loadedDictEnd = 0;
|
||||
ms->dictMatchState = NULL;
|
||||
}
|
||||
ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
|
||||
}
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
||||
@ -3074,7 +3068,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
const void* src, size_t srcSize,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const BYTE* const ip = (const BYTE*) src;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
const BYTE* const iend = ip + srcSize;
|
||||
|
||||
ZSTD_window_update(&ms->window, src, srcSize);
|
||||
@ -3085,32 +3079,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
|
||||
if (srcSize <= HASH_READ_SIZE) return 0;
|
||||
|
||||
switch(params->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, iend, dtlm);
|
||||
break;
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
|
||||
break;
|
||||
while (iend - ip > HASH_READ_SIZE) {
|
||||
size_t const remaining = iend - ip;
|
||||
size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
|
||||
const BYTE* const ichunk = ip + chunk;
|
||||
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
if (srcSize >= HASH_READ_SIZE)
|
||||
ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
|
||||
break;
|
||||
ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
|
||||
|
||||
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
if (srcSize >= HASH_READ_SIZE)
|
||||
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
|
||||
break;
|
||||
switch(params->cParams.strategy)
|
||||
{
|
||||
case ZSTD_fast:
|
||||
ZSTD_fillHashTable(ms, ichunk, dtlm);
|
||||
break;
|
||||
case ZSTD_dfast:
|
||||
ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0); /* not possible : not a valid strategy id */
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
if (chunk >= HASH_READ_SIZE)
|
||||
ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
|
||||
break;
|
||||
|
||||
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
if (chunk >= HASH_READ_SIZE)
|
||||
ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0); /* not possible : not a valid strategy id */
|
||||
}
|
||||
|
||||
ip = ichunk;
|
||||
}
|
||||
|
||||
ms->nextToUpdate = (U32)(iend - ms->window.base);
|
||||
|
Loading…
Reference in New Issue
Block a user