Add a ZSTD_dedicatedDictSearch ZSTD_dictMode_e to Allow Const Propagation

Speed +1.5%.
This commit is contained in:
W. Felix Handte 2020-08-11 18:48:22 -04:00
parent beefdb0d3d
commit 34b545acb0
4 changed files with 85 additions and 20 deletions

View File

@ -2212,7 +2212,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
* assumption : strat is a valid strategy */
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
{
static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast,
ZSTD_compressBlock_doubleFast,
@ -2242,7 +2242,17 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
ZSTD_compressBlock_btlazy2_dictMatchState,
ZSTD_compressBlock_btopt_dictMatchState,
ZSTD_compressBlock_btultra_dictMatchState,
ZSTD_compressBlock_btultra_dictMatchState }
ZSTD_compressBlock_btultra_dictMatchState },
{ NULL /* default for 0 */,
NULL,
NULL,
ZSTD_compressBlock_greedy_dedicatedDictSearch,
ZSTD_compressBlock_lazy_dedicatedDictSearch,
ZSTD_compressBlock_lazy2_dedicatedDictSearch,
NULL,
NULL,
NULL,
NULL }
};
ZSTD_blockCompressor selectedCompressor;
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);

View File

@ -299,7 +299,12 @@ struct ZSTD_CCtx_s {
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
typedef enum {
ZSTD_noDict = 0,
ZSTD_extDict = 1,
ZSTD_dictMatchState = 2,
ZSTD_dedicatedDictSearch = 3
} ZSTD_dictMode_e;
typedef size_t (*ZSTD_blockCompressor) (
@ -763,7 +768,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
return ZSTD_window_hasExtDict(ms->window) ?
ZSTD_extDict :
ms->dictMatchState != NULL ?
ZSTD_dictMatchState :
(ms->dictMatchState->enableDedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
ZSTD_noDict;
}

View File

@ -258,6 +258,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
assert(ip <= iend-8); /* required for h calculation */
assert(dictMode != ZSTD_dedicatedDictSearch);
/* reach end of unsorted candidates list */
while ( (matchIndex > unsortLimit)
@ -525,7 +526,7 @@ size_t ZSTD_HcFindBestMatch_generic (
/* HC4 match finder */
U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
if (dictMode == ZSTD_dictMatchState && ms->dictMatchState->enableDedicatedDictSearch)
if (dictMode == ZSTD_dedicatedDictSearch)
PREFETCH_L1(ms->dictMatchState->hashTable +
(ZSTD_hashPtr(ip, ms->dictMatchState->cParams.hashLog - DD_BLOG,
ms->dictMatchState->cParams.minMatch) << DD_BLOG));
@ -555,7 +556,7 @@ size_t ZSTD_HcFindBestMatch_generic (
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
if (dictMode == ZSTD_dictMatchState && ms->dictMatchState->enableDedicatedDictSearch) {
if (dictMode == ZSTD_dedicatedDictSearch) {
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32 dmsChainSize = (1 << dms->cParams.chainLog);
const U32 dmsChainMask = dmsChainSize - 1;
@ -665,6 +666,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
}
static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr)
{
switch(ms->cParams.minMatch)
{
default : /* includes case 3 */
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
case 7 :
case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
}
}
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
ZSTD_matchState_t* ms,
const BYTE* ip, const BYTE* const iLimit,
@ -709,20 +726,21 @@ ZSTD_compressBlock_lazy_generic(
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
(dictMode == ZSTD_dedicatedDictSearch ? ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS :
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
: ZSTD_HcFindBestMatch_selectMLS);
: ZSTD_HcFindBestMatch_selectMLS));
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ?
dms->window.base : NULL;
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ?
dictBase + dictLowestIndex : NULL;
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
@ -738,7 +756,7 @@ ZSTD_compressBlock_lazy_generic(
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
}
if (dictMode == ZSTD_dictMatchState) {
if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) {
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
@ -758,9 +776,9 @@ ZSTD_compressBlock_lazy_generic(
const BYTE* start=ip+1;
/* check repCode */
if (dictMode == ZSTD_dictMatchState) {
if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) {
const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
&& repIndex < prefixLowestIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
@ -801,7 +819,7 @@ ZSTD_compressBlock_lazy_generic(
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
if (dictMode == ZSTD_dictMatchState) {
if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) {
const U32 repIndex = (U32)(ip - base) - offset_1;
const BYTE* repMatch = repIndex < prefixLowestIndex ?
dictBase + (repIndex - dictIndexDelta) :
@ -836,7 +854,7 @@ ZSTD_compressBlock_lazy_generic(
if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offset = 0, start = ip;
}
if (dictMode == ZSTD_dictMatchState) {
if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) {
const U32 repIndex = (U32)(ip - base) - offset_1;
const BYTE* repMatch = repIndex < prefixLowestIndex ?
dictBase + (repIndex - dictIndexDelta) :
@ -874,7 +892,7 @@ ZSTD_compressBlock_lazy_generic(
&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
{ start--; matchLength++; }
}
if (dictMode == ZSTD_dictMatchState) {
if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) {
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
@ -890,11 +908,11 @@ _storeSequence:
}
/* check immediate repcode */
if (dictMode == ZSTD_dictMatchState) {
if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) {
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
U32 const repIndex = current2 - offset_2;
const BYTE* repMatch = dictMode == ZSTD_dictMatchState
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
&& repIndex < prefixLowestIndex ?
dictBase - dictIndexDelta + repIndex :
base + repIndex;
@ -990,6 +1008,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
}
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
}
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
}
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
{
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_lazy_extDict_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore,

View File

@ -49,6 +49,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
size_t ZSTD_compressBlock_greedy_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);