Merge pull request #1624 from facebook/smallwlog

Improves compression ratio for small windowLog
This commit is contained in:
Yann Collet 2019-06-14 17:28:21 -07:00 committed by GitHub
commit 9af909bf35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 50 deletions

View File

@ -1387,7 +1387,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_CCtx_params params,
U64 pledgedSrcSize,
U64 const pledgedSrcSize,
ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff)
{
@ -2868,7 +2868,8 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
ms->dictMatchState = NULL;
}
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
@ -3296,12 +3297,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_continue, zbuff) );
{
size_t const dictID = ZSTD_compress_insertDictionary(
{ size_t const dictID = ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
FORWARD_IF_ERROR(dictID);
assert(dictID <= (size_t)(U32)-1);
assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID;
}
return 0;

View File

@ -141,7 +141,7 @@ struct ZSTD_matchState_t {
U32* hashTable3;
U32* chainTable;
optState_t opt; /* optimal parser state */
const ZSTD_matchState_t * dictMatchState;
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
};
@ -731,6 +731,28 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
}
}
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size. */
MEM_STATIC void
ZSTD_checkDictValidity(ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}
/**
* ZSTD_window_update():
* Updates the window by appending [src, src + srcSize) to the window.

View File

@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
}
}
} }
}
@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 prefixLowestIndex = ms->window.dictLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowestValid = ms->window.dictLimit;
const U32 maxDistance = 1U << cParams->windowLog;
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
dictCParams->chainLog : hBitsS;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
/* if a dictionary is attached, it must be within window range */
if (dictMode == ZSTD_dictMatchState) {
assert(lowestValid + maxDistance >= endIndex);
}
/* init */
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
goto _match_stored;
}
@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
goto _match_stored;
}
@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
goto _match_found;
}
}
} }
if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */
@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
}
}
} }
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
_search_next_long:
{
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3;
@ -221,9 +227,7 @@ _search_next_long:
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
goto _match_found;
}
}
}
} } }
/* if no long +1 match, explore the short match we found */
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@ -242,7 +246,7 @@ _match_found:
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored:
/* match found */
@ -278,8 +282,7 @@ _match_stored:
continue;
}
break;
}
}
} }
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit)
@ -294,14 +297,15 @@ _match_stored:
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } } }
} } }
} /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
@ -360,10 +364,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const base = ms->window.base;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowestValid = ms->window.lowLimit;
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const U32 dictStartIndex = lowLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const U32 dictStartIndex = ms->window.lowLimit;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
@ -371,6 +380,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@ -396,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -407,7 +420,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -432,7 +445,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -475,7 +488,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}

View File

@ -13,7 +13,8 @@
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } }
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validStartIndex = ms->window.dictLimit;
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog;
/* otherwise, we would get index underflow when translating a dict index
* into a local index */
/* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
/* ensure there will be no no underflow
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast_dictMatchState(
@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 dictStartIndex = ms->window.lowLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validLow = ms->window.lowLimit;
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 prefixStartIndex = ms->window.dictLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}

View File

@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
U32 const windowValid = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow);
@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const windowLow = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowValid = ms->window.lowLimit;
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;

View File

@ -547,6 +547,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
U32 const maxDistance = 1U << cParams->windowLog;
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const hashLog = cParams->hashLog;
@ -562,8 +563,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32 const btLow = btMask >= current ? 0 : current - btMask;
U32 const windowLow = ms->window.lowLimit;
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
U32 const matchLow = windowLow ? windowLow : 1;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;