btlazy2: fixed interaction between unsortedMark and reduceTable

This commit is contained in:
Yann Collet 2017-12-29 19:08:51 +01:00
parent 4c7f137bd2
commit 02f64ef955
4 changed files with 47 additions and 10 deletions

View File

@ -1125,8 +1125,15 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
}
if (zc->appliedParams.cParams.strategy != ZSTD_btlazy2) {
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
}
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
if (zc->appliedParams.cParams.strategy != ZSTD_btlazy2)
ZSTD_preserveUnsortedMark(zc->chainTable, chainSize, reducerValue);
ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
}
@ -1749,7 +1756,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
if (cctx->dictLimit < cctx->lowLimit)
DEBUGLOG(2, "ZSTD_compress_frameChunk : update dictLimit from %u to %u ",
DEBUGLOG(5, "ZSTD_compress_frameChunk : update dictLimit from %u to %u ",
cctx->dictLimit, cctx->lowLimit);
if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
if (cctx->nextToUpdate < cctx->lowLimit) cctx->nextToUpdate = cctx->lowLimit;
@ -2210,7 +2217,6 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
return op-ostart;
}
size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)

View File

@ -15,7 +15,36 @@
/*-*************************************
* Binary Tree search
***************************************/
#define ZSTD_DUBT_UNSORTED ((U32)(-1))
#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor.
It's not a big deal though : the candidate will just be considered unsorted, and be sorted again.
Additionnally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a moderate loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled by a table re-use using a different strategy */
#define ZSTD_ROWSIZE 16
/*! ZSTD_preserveUnsortedMark_internal() :
* Helps auto-vectorization */
static void ZSTD_preserveUnsortedMark_internal (U32* const table, int const nbRows, U32 const reducerValue)
{
int cellNb = 0;
int rowNb;
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
int column;
for (column=0; column<ZSTD_ROWSIZE; column++) {
if (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK)
table[cellNb] = ZSTD_DUBT_UNSORTED_MARK + reducerValue;
} }
}
/*! ZSTD_preserveUnsortedMark() :
* pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK
* to preserve it since table is going to be offset by ZSTD_reduceTable() */
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
{
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
assert(size < (1U<<31)); /* can be casted to int */
ZSTD_preserveUnsortedMark_internal(table, size/ZSTD_ROWSIZE, reducerValue);
}
void ZSTD_updateDUBT(ZSTD_CCtx* zc,
const BYTE* ip, const BYTE* iend,
@ -33,7 +62,7 @@ void ZSTD_updateDUBT(ZSTD_CCtx* zc,
U32 idx = zc->nextToUpdate;
if (idx != target)
DEBUGLOG(2, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
idx, target, zc->dictLimit);
assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
(void)iend;
@ -48,7 +77,7 @@ void ZSTD_updateDUBT(ZSTD_CCtx* zc,
hashTable[h] = idx; /* Update Hash Table */
*nextCandidatePtr = matchIndex; /* update BT like a chain */
*sortMarkPtr = ZSTD_DUBT_UNSORTED;
*sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
}
zc->nextToUpdate = target;
}
@ -166,7 +195,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
/* reach end of unsorted candidates list */
while ( (matchIndex > unsortLimit)
&& (*unsortedMark == ZSTD_DUBT_UNSORTED)
&& (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
&& (nbCandidates > 1) ) {
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch: candidate %u is unsorted",
matchIndex);
@ -179,7 +208,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
}
if ( (matchIndex > unsortLimit)
&& (*unsortedMark==ZSTD_DUBT_UNSORTED) ) {
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch: nullify last unsorted candidate %u",
matchIndex);
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */

View File

@ -20,6 +20,7 @@ extern "C" {
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls); /* used in ZSTD_loadDictionaryContent() */
void ZSTD_updateDUBT(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iend, U32 mls); /* used in ZSTD_loadDictionaryContent() */
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);

View File

@ -63,7 +63,7 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (g_displayLevel>=4) fflush(stderr); } }
if (g_displayLevel>=4) fflush(stdout); } }
/*-*******************************************************
* Fuzzer functions
@ -1337,10 +1337,11 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
crcOrig = XXH64(sampleBuffer, sampleSize, 0);
/* compression tests */
{ unsigned const cLevel =
{ int const cLevel =
( FUZ_rand(&lseed) %
(ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / cLevelLimiter)) )
+ 1;
+ 1;
DISPLAYLEVEL(5, "fuzzer t%u: Simple compression test (level %i) \n", testNb, cLevel);
cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize));