fixed compression ratio regression when dictionary-compressing medium-size inputs at levels 1-3

This commit is contained in:
Yann Collet 2019-08-01 15:58:17 +02:00
parent c25df8ba3f
commit 98692c2838
6 changed files with 76 additions and 48 deletions

View File

@ -2270,7 +2270,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
{
size_t cSize;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
(unsigned)zc->blockState.matchState.nextToUpdate);
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
FORWARD_IF_ERROR(bss);

View File

@ -134,9 +134,13 @@ typedef struct {
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
* When loadedDictEnd != 0, a dictionary is in use, and still valid.
* When dict referential is copied into active context (i.e. not attached),
* loadedDictEnd == dictSize, since referential starts from zero.
*/
U32 nextToUpdate; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
@ -763,24 +767,37 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size. */
* when input progresses beyond window size.
* assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
* loadedDictEnd uses same referential as window->base
* maxDist is the window size */
MEM_STATIC void
ZSTD_checkDictValidity(ZSTD_window_t* window,
ZSTD_checkDictValidity(const ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(loadedDictEndPtr != NULL);
assert(dictMatchStatePtr != NULL);
{ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = *loadedDictEndPtr;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(blockEndIdx >= loadedDictEnd);
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
if (blockEndIdx > loadedDictEnd + maxDist) {
/* On reaching window size, dictionaries are invalidated.
* For simplification, if window size is reached anywhere within next block,
* the dictionary is invalidated for the full block.
*/
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0;
*dictMatchStatePtr = NULL;
} else {
if (*loadedDictEndPtr != 0) {
DEBUGLOG(6, "dictionary considered valid for current block");
} } }
}
/**

View File

@ -370,8 +370,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const base = ms->window.base;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowestValid = ms->window.lowLimit;
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const U32 validLowest = ms->window.lowLimit;
const int isDictionary = (ms->loadedDictEnd != 0);
const U32 withinWindow = (endIndex - validLowest > maxDistance) ? endIndex - maxDistance : validLowest;
const U32 lowLimit = isDictionary ? validLowest : withinWindow;
const U32 dictStartIndex = lowLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;

View File

@ -71,6 +71,7 @@ size_t ZSTD_compressBlock_fast_generic(
U32 offsetSaved = 0;
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
ip0 += (ip0 == prefixStart);
ip1 = ip0 + 1;
{
@ -239,6 +240,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
@ -380,8 +382,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validLow = ms->window.lowLimit;
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
const U32 validLowest = ms->window.lowLimit;
const int isDictionary = (ms->loadedDictEnd != 0);
const U32 withinWindow = (endIndex - validLowest > maxDistance) ? endIndex - maxDistance : validLowest;
const U32 lowLimit = isDictionary ? validLowest : withinWindow;
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 dictLimit = ms->window.dictLimit;
@ -392,6 +396,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
@ -412,8 +418,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else {
@ -423,8 +429,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
{ const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
U32 offset;
mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
@ -451,7 +457,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;

View File

@ -619,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
/* *******************************
* Common parser - lazy strategy
*********************************/
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_lazy_generic(
typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
const U32 searchMethod, const U32 depth,
const searchMethod_e searchMethod, const U32 depth,
ZSTD_dictMode_e const dictMode)
{
const BYTE* const istart = (const BYTE*)src;
@ -640,8 +642,8 @@ size_t ZSTD_compressBlock_lazy_generic(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
(searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
(searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
@ -858,56 +860,56 @@ size_t ZSTD_compressBlock_btlazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
}
size_t ZSTD_compressBlock_lazy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
}
size_t ZSTD_compressBlock_greedy(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
}
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy2_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_lazy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
}
size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
}
@ -916,7 +918,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize,
const U32 searchMethod, const U32 depth)
const searchMethod_e searchMethod, const U32 depth)
{
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
@ -934,7 +936,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
typedef size_t (*searchMax_f)(
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
U32 offset_1 = rep[0], offset_2 = rep[1];
@ -1075,7 +1077,7 @@ _storeSequence:
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
@ -1083,7 +1085,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
}
size_t ZSTD_compressBlock_lazy_extDict(
@ -1091,7 +1093,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
}
size_t ZSTD_compressBlock_lazy2_extDict(
@ -1099,7 +1101,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
}
size_t ZSTD_compressBlock_btlazy2_extDict(
@ -1107,5 +1109,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
}

View File

@ -231,7 +231,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
/* advanced MT API test */
if (part <= 3)
{ unsigned nbThreads;
{ int nbThreads;
for (nbThreads=1; nbThreads<=4; nbThreads++) {
int compressionLevel;
for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
@ -242,7 +242,7 @@ static int FUZ_mallocTests_internal(unsigned seed, double compressibility, unsig
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads) );
CHECK_Z( ZSTD_compress2(cctx, outBuffer, outSize, inBuffer, inSize) );
ZSTD_freeCCtx(cctx);
DISPLAYLEVEL(3, "compress_generic,-T%u,end level %i : ",
DISPLAYLEVEL(3, "compress_generic,-T%i,end level %i : ",
nbThreads, compressionLevel);
FUZ_displayMallocStats(malcount);
} } }
@ -450,7 +450,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressBound test with content size missing : ", testNb++);
{ /* create compressed buffer with content size missing */
ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) );
CHECKPLUS(r, ZSTD_compress2(cctx,
compressedBuffer, compressedBufferSize,
@ -777,7 +777,7 @@ static int basicUnitTests(U32 seed, double compressibility)
CHECK( ZSTD_initCStream(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
DISPLAYLEVEL(3, "test%3i : init static CStream with dictionary (should fail) : ", testNb++);
{ size_t const r = ZSTD_initCStream_usingDict(staticCCtx, CNBuffer, 64 KB, 1);
if (!ZSTD_isError(r)) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
@ -1963,7 +1963,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : compress lots 3-bytes sequences : ", testNb++);
{ CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
CNBuffer, _3BYTESTESTLENGTH, 19) );
CNBuffer, _3BYTESTESTLENGTH, 19) );
cSize = r; }
DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (unsigned)cSize, (double)cSize/_3BYTESTESTLENGTH*100);