Merge pull request #2276 from felixhandte/dedicated-dict-search-structure
DDSS For Lazy: Implement a Dedicated Dictionary Hash Table
This commit is contained in:
commit
005ceaa052
@ -156,6 +156,18 @@ createDictionaryBuffer(const char* dictionaryName,
|
||||
}
|
||||
}
|
||||
|
||||
static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
|
||||
ZSTD_CCtxParams_init(params, compressionLevel);
|
||||
ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1);
|
||||
ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel);
|
||||
|
||||
ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem);
|
||||
|
||||
ZSTD_freeCCtxParams(params);
|
||||
return cdict;
|
||||
}
|
||||
|
||||
/*! BMK_loadFiles() :
|
||||
* Loads `buffer`, with content from files listed within `fileNamesTable`.
|
||||
@ -449,12 +461,14 @@ static void freeCDictCollection(cdict_collection_t cdictc)
|
||||
}
|
||||
|
||||
/* returns .buffers=NULL if operation fails */
|
||||
static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel)
|
||||
static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch)
|
||||
{
|
||||
ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
|
||||
if (cdicts==NULL) return kNullCDictCollection;
|
||||
for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
|
||||
cdicts[dictNb] = ZSTD_createCDict(dictBuffer, dictSize, cLevel);
|
||||
cdicts[dictNb] = dedicatedDictSearch ?
|
||||
createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) :
|
||||
ZSTD_createCDict(dictBuffer, dictSize, cLevel);
|
||||
CONTROL(cdicts[dictNb] != NULL);
|
||||
}
|
||||
cdict_collection_t cdictc;
|
||||
@ -720,7 +734,8 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
const char* dictionary,
|
||||
size_t blockSize, int clevel,
|
||||
unsigned nbDictMax, unsigned nbBlocks,
|
||||
unsigned nbRounds, int benchCompression)
|
||||
unsigned nbRounds, int benchCompression,
|
||||
int dedicatedDictSearch)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
@ -775,7 +790,9 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
DICTSIZE);
|
||||
CONTROL(dictBuffer.ptr != NULL);
|
||||
|
||||
ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
|
||||
ZSTD_CDict* const cdict = dedicatedDictSearch ?
|
||||
createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) :
|
||||
ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
|
||||
CONTROL(cdict != NULL);
|
||||
|
||||
size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
|
||||
@ -798,7 +815,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
||||
|
||||
unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
|
||||
|
||||
cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel);
|
||||
cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch);
|
||||
CONTROL(cdictionaries.cdicts != NULL);
|
||||
|
||||
ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
|
||||
@ -924,6 +941,7 @@ int main (int argc, const char** argv)
|
||||
{
|
||||
int recursiveMode = 0;
|
||||
int benchCompression = 1;
|
||||
int dedicatedDictSearch = 0;
|
||||
unsigned nbRounds = BENCH_TIME_DEFAULT_S;
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
@ -953,6 +971,7 @@ int main (int argc, const char** argv)
|
||||
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
|
||||
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
|
||||
/* anything that's not a command is a filename */
|
||||
nameTable[nameIdx++] = argument;
|
||||
@ -970,7 +989,7 @@ int main (int argc, const char** argv)
|
||||
nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */
|
||||
}
|
||||
|
||||
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression);
|
||||
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch);
|
||||
|
||||
UTIL_freeFileNamesTable(filenameTable);
|
||||
free(nameTable);
|
||||
|
@ -353,6 +353,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
#endif
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_enableDedicatedDictSearch:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 1;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_enableLongDistanceMatching:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 1;
|
||||
@ -396,7 +401,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_forceAttachDict:
|
||||
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
|
||||
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
|
||||
bounds.lowerBound = ZSTD_dictDefaultAttach;
|
||||
bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
|
||||
return bounds;
|
||||
@ -464,6 +469,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_jobSize:
|
||||
case ZSTD_c_overlapLog:
|
||||
case ZSTD_c_rsyncable:
|
||||
case ZSTD_c_enableDedicatedDictSearch:
|
||||
case ZSTD_c_enableLongDistanceMatching:
|
||||
case ZSTD_c_ldmHashLog:
|
||||
case ZSTD_c_ldmMinMatch:
|
||||
@ -514,6 +520,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||
case ZSTD_c_jobSize:
|
||||
case ZSTD_c_overlapLog:
|
||||
case ZSTD_c_rsyncable:
|
||||
case ZSTD_c_enableDedicatedDictSearch:
|
||||
case ZSTD_c_enableLongDistanceMatching:
|
||||
case ZSTD_c_ldmHashLog:
|
||||
case ZSTD_c_ldmMinMatch:
|
||||
@ -667,6 +674,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||
return CCtxParams->rsyncable;
|
||||
#endif
|
||||
|
||||
case ZSTD_c_enableDedicatedDictSearch :
|
||||
CCtxParams->enableDedicatedDictSearch = (value!=0);
|
||||
return CCtxParams->enableDedicatedDictSearch;
|
||||
|
||||
case ZSTD_c_enableLongDistanceMatching :
|
||||
CCtxParams->ldmParams.enableLdm = (value!=0);
|
||||
return CCtxParams->ldmParams.enableLdm;
|
||||
@ -794,6 +805,9 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
*value = CCtxParams->rsyncable;
|
||||
break;
|
||||
#endif
|
||||
case ZSTD_c_enableDedicatedDictSearch :
|
||||
*value = CCtxParams->enableDedicatedDictSearch;
|
||||
break;
|
||||
case ZSTD_c_enableLongDistanceMatching :
|
||||
*value = CCtxParams->ldmParams.enableLdm;
|
||||
break;
|
||||
@ -850,6 +864,17 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
|
||||
int const compressionLevel,
|
||||
unsigned long long srcSizeHint,
|
||||
size_t const dictSize);
|
||||
static int ZSTD_dedicatedDictSearch_isSupported(
|
||||
int const compressionLevel,
|
||||
unsigned long long srcSizeHint,
|
||||
size_t const dictSize);
|
||||
static void ZSTD_dedicatedDictSearch_revertCParams(
|
||||
ZSTD_compressionParameters* cParams);
|
||||
|
||||
/**
|
||||
* Initializes the local dict using the requested parameters.
|
||||
* NOTE: This does not use the pledged src size, because it may be used for more
|
||||
@ -858,8 +883,6 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
|
||||
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
||||
{
|
||||
ZSTD_localDict* const dl = &cctx->localDict;
|
||||
ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
|
||||
&cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize);
|
||||
if (dl->dict == NULL) {
|
||||
/* No local dictionary. */
|
||||
assert(dl->dictBuffer == NULL);
|
||||
@ -876,12 +899,12 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
|
||||
assert(cctx->cdict == NULL);
|
||||
assert(cctx->prefixDict.dict == NULL);
|
||||
|
||||
dl->cdict = ZSTD_createCDict_advanced(
|
||||
dl->cdict = ZSTD_createCDict_advanced2(
|
||||
dl->dict,
|
||||
dl->dictSize,
|
||||
ZSTD_dlm_byRef,
|
||||
dl->dictContentType,
|
||||
cParams,
|
||||
&cctx->requestedParams,
|
||||
cctx->customMem);
|
||||
RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
|
||||
cctx->cdict = dl->cdict;
|
||||
@ -1623,7 +1646,9 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
|
||||
U64 pledgedSrcSize)
|
||||
{
|
||||
size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
|
||||
return ( pledgedSrcSize <= cutoff
|
||||
int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
|
||||
return ( dedicatedDictSearch
|
||||
|| pledgedSrcSize <= cutoff
|
||||
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
|
||||
|| params->attachDictPref == ZSTD_dictForceAttach )
|
||||
&& params->attachDictPref != ZSTD_dictForceCopy
|
||||
@ -1638,17 +1663,23 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
||||
U64 pledgedSrcSize,
|
||||
ZSTD_buffered_policy_e zbuff)
|
||||
{
|
||||
{ const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
|
||||
{
|
||||
ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
|
||||
unsigned const windowLog = params.cParams.windowLog;
|
||||
assert(windowLog != 0);
|
||||
/* Resize working context table params for input only, since the dict
|
||||
* has its own tables. */
|
||||
/* pledgedSrcSize == 0 means 0! */
|
||||
params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
|
||||
|
||||
if (cdict->matchState.dedicatedDictSearch) {
|
||||
ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
|
||||
}
|
||||
|
||||
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 0);
|
||||
params.cParams.windowLog = windowLog;
|
||||
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
||||
ZSTDcrp_makeClean, zbuff), "");
|
||||
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
|
||||
assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
|
||||
}
|
||||
|
||||
{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
|
||||
@ -1688,6 +1719,8 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
||||
{
|
||||
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
|
||||
|
||||
assert(!cdict->matchState.dedicatedDictSearch);
|
||||
|
||||
DEBUGLOG(4, "copying dictionary into context");
|
||||
|
||||
{ unsigned const windowLog = params.cParams.windowLog;
|
||||
@ -2191,7 +2224,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
|
||||
* assumption : strat is a valid strategy */
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
|
||||
{
|
||||
static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
|
||||
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
|
||||
{ ZSTD_compressBlock_fast /* default for 0 */,
|
||||
ZSTD_compressBlock_fast,
|
||||
ZSTD_compressBlock_doubleFast,
|
||||
@ -2221,7 +2254,17 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
|
||||
ZSTD_compressBlock_btlazy2_dictMatchState,
|
||||
ZSTD_compressBlock_btopt_dictMatchState,
|
||||
ZSTD_compressBlock_btultra_dictMatchState,
|
||||
ZSTD_compressBlock_btultra_dictMatchState }
|
||||
ZSTD_compressBlock_btultra_dictMatchState },
|
||||
{ NULL /* default for 0 */,
|
||||
NULL,
|
||||
NULL,
|
||||
ZSTD_compressBlock_greedy_dedicatedDictSearch,
|
||||
ZSTD_compressBlock_lazy_dedicatedDictSearch,
|
||||
ZSTD_compressBlock_lazy2_dedicatedDictSearch,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL }
|
||||
};
|
||||
ZSTD_blockCompressor selectedCompressor;
|
||||
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
|
||||
@ -2871,7 +2914,9 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
if (chunk >= HASH_READ_SIZE)
|
||||
if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch)
|
||||
ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
|
||||
else if (chunk >= HASH_READ_SIZE)
|
||||
ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
|
||||
break;
|
||||
|
||||
@ -3364,11 +3409,13 @@ static size_t ZSTD_initCDict_internal(
|
||||
const void* dictBuffer, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_compressionParameters cParams)
|
||||
ZSTD_compressionParameters cParams,
|
||||
ZSTD_CCtx_params params)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
|
||||
assert(!ZSTD_checkCParams(cParams));
|
||||
cdict->matchState.cParams = cParams;
|
||||
cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
|
||||
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
|
||||
cdict->dictContent = dictBuffer;
|
||||
} else {
|
||||
@ -3394,9 +3441,7 @@ static size_t ZSTD_initCDict_internal(
|
||||
/* (Maybe) load the dictionary
|
||||
* Skips loading the dictionary if it is < 8 bytes.
|
||||
*/
|
||||
{ ZSTD_CCtx_params params;
|
||||
ZSTD_memset(¶ms, 0, sizeof(params));
|
||||
params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
||||
{ params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
params.cParams = cParams;
|
||||
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
||||
@ -3412,12 +3457,10 @@ static size_t ZSTD_initCDict_internal(
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
||||
static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
|
||||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
||||
|
||||
{ size_t const workspaceSize =
|
||||
@ -3443,10 +3486,64 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
||||
cdict->customMem = customMem;
|
||||
cdict->compressionLevel = 0; /* signals advanced API usage */
|
||||
|
||||
return cdict;
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
|
||||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
||||
|
||||
{ ZSTD_CDict* cdict = ZSTD_createCDict_advanced_internal(dictSize,
|
||||
dictLoadMethod, cParams,
|
||||
customMem);
|
||||
|
||||
ZSTD_CCtx_params params;
|
||||
ZSTD_memset(¶ms, 0, sizeof(params));
|
||||
|
||||
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
||||
dictBuffer, dictSize,
|
||||
dictLoadMethod, dictContentType,
|
||||
cParams) )) {
|
||||
cParams, params) )) {
|
||||
ZSTD_freeCDict(cdict);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return cdict;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_customMem customMem)
|
||||
{
|
||||
int const dedicatedDictSearch = cctxParams->enableDedicatedDictSearch &&
|
||||
ZSTD_dedicatedDictSearch_isSupported(
|
||||
cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
if (!dedicatedDictSearch) {
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(
|
||||
cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
return ZSTD_createCDict_advanced(dict, dictSize,
|
||||
dictLoadMethod, dictContentType, cParams,
|
||||
customMem);
|
||||
}
|
||||
{ ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams(
|
||||
cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
|
||||
ZSTD_CDict* cdict = ZSTD_createCDict_advanced_internal(dictSize,
|
||||
dictLoadMethod, cParams,
|
||||
customMem);
|
||||
|
||||
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
||||
dict, dictSize,
|
||||
dictLoadMethod, dictContentType,
|
||||
cParams, *cctxParams) )) {
|
||||
ZSTD_freeCDict(cdict);
|
||||
return NULL;
|
||||
}
|
||||
@ -3514,6 +3611,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
||||
+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
|
||||
+ matchStateSize;
|
||||
ZSTD_CDict* cdict;
|
||||
ZSTD_CCtx_params params;
|
||||
|
||||
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
|
||||
|
||||
@ -3529,10 +3627,12 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
||||
(unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
|
||||
if (workspaceSize < neededSize) return NULL;
|
||||
|
||||
ZSTD_memset(¶ms, 0, sizeof(params));
|
||||
|
||||
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
||||
dict, dictSize,
|
||||
dictLoadMethod, dictContentType,
|
||||
cParams) ))
|
||||
cParams, params) ))
|
||||
return NULL;
|
||||
|
||||
return cdict;
|
||||
@ -4234,6 +4334,57 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
|
||||
},
|
||||
};
|
||||
|
||||
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize)
|
||||
{
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
|
||||
switch (cParams.strategy) {
|
||||
case ZSTD_fast:
|
||||
case ZSTD_dfast:
|
||||
break;
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
break;
|
||||
case ZSTD_btlazy2:
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
break;
|
||||
}
|
||||
return cParams;
|
||||
}
|
||||
|
||||
static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize)
|
||||
{
|
||||
ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams(compressionLevel, srcSizeHint, dictSize);
|
||||
return (cParams.strategy >= ZSTD_greedy) && (cParams.strategy <= ZSTD_lazy2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses the adjustment applied to cparams when enabling dedicated dict
|
||||
* search. This is used to recover the params set to be used in the working
|
||||
* context. (Otherwise, those tables would also grow.)
|
||||
*/
|
||||
static void ZSTD_dedicatedDictSearch_revertCParams(
|
||||
ZSTD_compressionParameters* cParams) {
|
||||
switch (cParams->strategy) {
|
||||
case ZSTD_fast:
|
||||
case ZSTD_dfast:
|
||||
break;
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_lazy2:
|
||||
cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
break;
|
||||
case ZSTD_btlazy2:
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_getCParams_internal() :
|
||||
* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
|
||||
* Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
|
||||
|
@ -28,7 +28,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
@ -147,6 +146,9 @@ struct ZSTD_matchState_t {
|
||||
U32* hashTable;
|
||||
U32* hashTable3;
|
||||
U32* chainTable;
|
||||
int dedicatedDictSearch; /* Indicates whether this matchState is using the
|
||||
* dedicated dictionary search structure.
|
||||
*/
|
||||
optState_t opt; /* optimal parser state */
|
||||
const ZSTD_matchState_t* dictMatchState;
|
||||
ZSTD_compressionParameters cParams;
|
||||
@ -228,6 +230,9 @@ struct ZSTD_CCtx_params_s {
|
||||
/* Long distance matching parameters */
|
||||
ldmParams_t ldmParams;
|
||||
|
||||
/* Dedicated dict search algorithm trigger */
|
||||
int enableDedicatedDictSearch;
|
||||
|
||||
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
||||
ZSTD_customMem customMem;
|
||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||
@ -289,7 +294,12 @@ struct ZSTD_CCtx_s {
|
||||
|
||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||
|
||||
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
|
||||
typedef enum {
|
||||
ZSTD_noDict = 0,
|
||||
ZSTD_extDict = 1,
|
||||
ZSTD_dictMatchState = 2,
|
||||
ZSTD_dedicatedDictSearch = 3
|
||||
} ZSTD_dictMode_e;
|
||||
|
||||
|
||||
typedef size_t (*ZSTD_blockCompressor) (
|
||||
@ -753,7 +763,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
||||
return ZSTD_window_hasExtDict(ms->window) ?
|
||||
ZSTD_extDict :
|
||||
ms->dictMatchState != NULL ?
|
||||
ZSTD_dictMatchState :
|
||||
(ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
|
||||
ZSTD_noDict;
|
||||
}
|
||||
|
||||
|
@ -258,6 +258,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
|
||||
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
assert(dictMode != ZSTD_dedicatedDictSearch);
|
||||
|
||||
/* reach end of unsorted candidates list */
|
||||
while ( (matchIndex > unsortLimit)
|
||||
@ -446,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
|
||||
|
||||
/* Update chains up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
static U32 ZSTD_insertAndFindFirstIndex_internal(
|
||||
FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
|
||||
ZSTD_matchState_t* ms,
|
||||
const ZSTD_compressionParameters* const cParams,
|
||||
const BYTE* ip, U32 const mls)
|
||||
@ -475,6 +476,30 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
|
||||
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
||||
}
|
||||
|
||||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
|
||||
{
|
||||
U32 const target = (U32)(ip - ms->window.base);
|
||||
U32* const chainTable = ms->chainTable;
|
||||
U32 const chainMask = (1 << ms->cParams.chainLog) - 1;
|
||||
U32 idx = ms->nextToUpdate;
|
||||
U32 bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
for ( ; idx < target; idx++) {
|
||||
U32 i;
|
||||
size_t const h = ZSTD_hashPtr(
|
||||
ms->window.base + idx,
|
||||
ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG,
|
||||
ms->cParams.minMatch) << ZSTD_LAZY_DDSS_BUCKET_LOG;
|
||||
/* Shift hash cache down 1. */
|
||||
for (i = bucketSize - 1; i; i--)
|
||||
ms->hashTable[h + i] = ms->hashTable[h + i - 1];
|
||||
/* Insert new position. */
|
||||
chainTable[idx & chainMask] = ms->hashTable[h];
|
||||
ms->hashTable[h] = idx;
|
||||
}
|
||||
|
||||
ms->nextToUpdate = target;
|
||||
}
|
||||
|
||||
|
||||
/* inlining is important to hardwire a hot branch (template emulation) */
|
||||
FORCE_INLINE_TEMPLATE
|
||||
@ -503,10 +528,23 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
U32 nbAttempts = 1U << cParams->searchLog;
|
||||
size_t ml=4-1;
|
||||
|
||||
/* HC4 match finder */
|
||||
U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
|
||||
? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
||||
const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
|
||||
? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
|
||||
|
||||
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
||||
U32 matchIndex;
|
||||
|
||||
if (dictMode == ZSTD_dedicatedDictSearch) {
|
||||
const U32* entry = &dms->hashTable[ddsIdx];
|
||||
PREFETCH_L1(entry);
|
||||
}
|
||||
|
||||
/* HC4 match finder */
|
||||
matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
|
||||
|
||||
for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
|
||||
size_t currentMl=0;
|
||||
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
||||
const BYTE* const match = base + matchIndex;
|
||||
@ -531,8 +569,74 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
|
||||
}
|
||||
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
if (dictMode == ZSTD_dedicatedDictSearch) {
|
||||
const U32 ddsChainSize = (1 << dms->cParams.chainLog);
|
||||
const U32 ddsChainMask = ddsChainSize - 1;
|
||||
const U32 ddsLowestIndex = dms->window.dictLimit;
|
||||
const BYTE* const ddsBase = dms->window.base;
|
||||
const BYTE* const ddsEnd = dms->window.nextSrc;
|
||||
const U32 ddsSize = (U32)(ddsEnd - ddsBase);
|
||||
const U32 ddsIndexDelta = dictLimit - ddsSize;
|
||||
const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0;
|
||||
const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
|
||||
const U32 bucketLimit = nbAttempts < bucketSize ? nbAttempts : bucketSize;
|
||||
U32 ddsAttempt;
|
||||
|
||||
for (ddsAttempt = 0; ddsAttempt < bucketSize; ddsAttempt++) {
|
||||
PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
|
||||
}
|
||||
|
||||
for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
|
||||
size_t currentMl=0;
|
||||
const BYTE* match;
|
||||
matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
|
||||
match = ddsBase + matchIndex;
|
||||
|
||||
if (matchIndex < ddsLowestIndex) {
|
||||
return ml;
|
||||
}
|
||||
|
||||
assert(match+4 <= ddsEnd);
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
||||
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
|
||||
}
|
||||
|
||||
/* save best solution */
|
||||
if (currentMl > ml) {
|
||||
ml = currentMl;
|
||||
*offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
|
||||
if (ip+currentMl == iLimit) {
|
||||
/* best possible, avoids read overflow on next attempt */
|
||||
return ml;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ( ; (ddsAttempt < nbAttempts) & (matchIndex >= ddsMinChain); ddsAttempt++) {
|
||||
size_t currentMl=0;
|
||||
const BYTE* match;
|
||||
matchIndex = dms->chainTable[matchIndex & ddsChainMask];
|
||||
match = ddsBase + matchIndex;
|
||||
|
||||
if (matchIndex < ddsLowestIndex) {
|
||||
break;
|
||||
}
|
||||
|
||||
assert(match+4 <= ddsEnd);
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
|
||||
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
|
||||
}
|
||||
|
||||
/* save best solution */
|
||||
if (currentMl > ml) {
|
||||
ml = currentMl;
|
||||
*offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
|
||||
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
|
||||
}
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
const U32* const dmsChainTable = dms->chainTable;
|
||||
const U32 dmsChainSize = (1 << dms->cParams.chainLog);
|
||||
const U32 dmsChainMask = dmsChainSize - 1;
|
||||
@ -545,7 +649,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
|
||||
matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
|
||||
|
||||
for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
|
||||
for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
|
||||
size_t currentMl=0;
|
||||
const BYTE* const match = dmsBase + matchIndex;
|
||||
assert(match+4 <= dmsEnd);
|
||||
@ -560,6 +664,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
}
|
||||
|
||||
if (matchIndex <= dmsMinChain) break;
|
||||
|
||||
matchIndex = dmsChainTable[matchIndex & dmsChainMask];
|
||||
}
|
||||
}
|
||||
@ -600,6 +705,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
|
||||
case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
|
||||
case 7 :
|
||||
case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
@ -641,27 +762,47 @@ ZSTD_compressBlock_lazy_generic(
|
||||
typedef size_t (*searchMax_f)(
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
||||
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
|
||||
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
||||
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
|
||||
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
|
||||
: ZSTD_HcFindBestMatch_selectMLS);
|
||||
|
||||
const searchMax_f searchFuncs[4][2] = {
|
||||
{
|
||||
ZSTD_HcFindBestMatch_selectMLS,
|
||||
ZSTD_BtFindBestMatch_selectMLS
|
||||
},
|
||||
{
|
||||
NULL,
|
||||
NULL
|
||||
},
|
||||
{
|
||||
ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
|
||||
ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
||||
},
|
||||
{
|
||||
ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
|
||||
NULL
|
||||
}
|
||||
};
|
||||
|
||||
searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
|
||||
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
|
||||
|
||||
const int isDMS = dictMode == ZSTD_dictMatchState;
|
||||
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
|
||||
const U32 dictLowestIndex = isDMS || isDDS ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
const BYTE* const dictBase = isDMS || isDDS ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
|
||||
const BYTE* const dictLowest = isDMS || isDDS ?
|
||||
dictBase + dictLowestIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
const BYTE* const dictEnd = isDMS || isDDS ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
const U32 dictIndexDelta = isDMS || isDDS ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
||||
|
||||
assert(searchMax != NULL);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
|
||||
|
||||
/* init */
|
||||
@ -673,7 +814,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDMS || isDDS) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
@ -693,9 +834,9 @@ ZSTD_compressBlock_lazy_generic(
|
||||
const BYTE* start=ip+1;
|
||||
|
||||
/* check repCode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDMS || isDDS) {
|
||||
const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
@ -736,7 +877,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
if ((mlRep >= 4) && (gain2 > gain1))
|
||||
matchLength = mlRep, offset = 0, start = ip;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDMS || isDDS) {
|
||||
const U32 repIndex = (U32)(ip - base) - offset_1;
|
||||
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
@ -771,7 +912,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
if ((mlRep >= 4) && (gain2 > gain1))
|
||||
matchLength = mlRep, offset = 0, start = ip;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDMS || isDDS) {
|
||||
const U32 repIndex = (U32)(ip - base) - offset_1;
|
||||
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
@ -809,7 +950,7 @@ ZSTD_compressBlock_lazy_generic(
|
||||
&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
|
||||
{ start--; matchLength++; }
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDMS || isDDS) {
|
||||
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
|
||||
const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
|
||||
const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
|
||||
@ -825,12 +966,11 @@ _storeSequence:
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
if (isDMS || isDDS) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex = current2 - offset_2;
|
||||
const BYTE* repMatch = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex ?
|
||||
const BYTE* repMatch = repIndex < prefixLowestIndex ?
|
||||
dictBase - dictIndexDelta + repIndex :
|
||||
base + repIndex;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
|
||||
@ -925,6 +1065,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
||||
|
@ -17,8 +17,18 @@ extern "C" {
|
||||
|
||||
#include "zstd_compress_internal.h"
|
||||
|
||||
/**
|
||||
* Dedicated Dictionary Search Structure bucket log. In the
|
||||
* ZSTD_dedicatedDictSearch mode, the hashTable has
|
||||
* 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
|
||||
* one.
|
||||
*/
|
||||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
|
||||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
|
||||
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2(
|
||||
@ -47,6 +57,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_greedy_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
62
lib/zstd.h
62
lib/zstd.h
@ -412,6 +412,7 @@ typedef enum {
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* ZSTD_c_srcSizeHint
|
||||
* ZSTD_c_enableDedicatedDictSearch
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -422,7 +423,8 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
ZSTD_c_experimentalParam7=1004
|
||||
ZSTD_c_experimentalParam7=1004,
|
||||
ZSTD_c_experimentalParam8=1005
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@ -1402,6 +1404,16 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS
|
||||
ZSTD_compressionParameters cParams,
|
||||
ZSTD_customMem customMem);
|
||||
|
||||
/**
|
||||
* This API is temporary and is expected to change or disappear in the future!
|
||||
*/
|
||||
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_customMem customMem);
|
||||
|
||||
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
@ -1535,6 +1547,54 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
||||
* but compression ratio may regress significantly if guess considerably underestimates */
|
||||
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
|
||||
|
||||
/* Controls whether the new and experimental "dedicated dictionary search
|
||||
* structure" can be used.
|
||||
*
|
||||
* How to use it:
|
||||
*
|
||||
* When using a CDict, whether to use this feature or not is controlled at
|
||||
* CDict creation, and it must be set in a CCtxParams set passed into that
|
||||
* construction. A compression will then use the feature or not based on how
|
||||
* the CDict was constructed; the value of this param, set in the CCtx, will
|
||||
* have no effect.
|
||||
*
|
||||
* However, when a dictionary buffer is passed into a CCtx, such as via
|
||||
* ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
|
||||
* whether the CDict that is created internally can use the feature or not.
|
||||
*
|
||||
* What it does:
|
||||
*
|
||||
* Normally, the internal data structures of the CDict are analogous to what
|
||||
* would be stored in a CCtx after compressing the contents of a dictionary.
|
||||
* To an approximation, a compression using a dictionary can then use those
|
||||
* data structures to simply continue what is effectively a streaming
|
||||
* compression where the simulated compression of the dictionary left off.
|
||||
* Which is to say, the search structures in the CDict are normally the same
|
||||
* format as in the CCtx.
|
||||
*
|
||||
* It is possible to do better, since the CDict is not like a CCtx: the search
|
||||
* structures are written once during CDict creation, and then are only read
|
||||
* after that, while the search structures in the CCtx are both read and
|
||||
* written as the compression goes along. This means we can choose a search
|
||||
* structure for the dictionary that is read-optimized.
|
||||
*
|
||||
* This feature enables the use of that different structure. Note that this
|
||||
* means that the CDict tables can no longer be copied into the CCtx, so
|
||||
* the dict attachment mode ZSTD_dictForceCopy will no longer be useable. The
|
||||
* dictionary can only be attached or reloaded.
|
||||
*
|
||||
* Effects:
|
||||
*
|
||||
* This will only have any effect when the selected ZSTD_strategy
|
||||
* implementation supports this feature. Currently, that's limited to
|
||||
* ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
|
||||
*
|
||||
* In general, you should expect compression to be faster, and CDict creation
|
||||
* to be slightly slower. Eventually, we will probably make this mode the
|
||||
* default.
|
||||
*/
|
||||
#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
|
@ -931,6 +931,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
|
||||
/* multi-threading */
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
|
||||
|
@ -2922,6 +2922,60 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : testing cdict compression with different attachment strategies : ", testNb++);
|
||||
{
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
size_t dictSize = CNBuffSize > 110 KB ? 110 KB : CNBuffSize;
|
||||
void* dict = (void*)malloc(dictSize);
|
||||
ZSTD_CCtx_params* cctx_params = ZSTD_createCCtxParams();
|
||||
ZSTD_dictAttachPref_e const attachPrefs[] = {
|
||||
ZSTD_dictDefaultAttach,
|
||||
ZSTD_dictForceAttach,
|
||||
ZSTD_dictForceCopy,
|
||||
ZSTD_dictForceLoad,
|
||||
ZSTD_dictForceAttach
|
||||
};
|
||||
int const enableDedicatedDictSearch[] = {0, 0, 0, 0, 1};
|
||||
int const cLevel = 6;
|
||||
int i;
|
||||
|
||||
RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed);
|
||||
RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
|
||||
|
||||
CHECK(cctx_params != NULL);
|
||||
|
||||
for (i = 0; i < 5; ++i) {
|
||||
ZSTD_dictAttachPref_e const attachPref = attachPrefs[i];
|
||||
int const enableDDS = enableDedicatedDictSearch[i];
|
||||
ZSTD_CDict* cdict;
|
||||
|
||||
DISPLAYLEVEL(5, "\n iter %d ", i);
|
||||
|
||||
ZSTD_CCtxParams_init(cctx_params, cLevel);
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctx_params, ZSTD_c_enableDedicatedDictSearch, enableDDS));
|
||||
|
||||
cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctx_params, ZSTD_defaultCMem);
|
||||
CHECK(cdict != NULL);
|
||||
|
||||
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, attachPref));
|
||||
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||
CHECK_Z(cSize);
|
||||
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
|
||||
|
||||
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
|
||||
ZSTD_freeCDict(cdict);
|
||||
}
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtxParams(cctx_params);
|
||||
free(dict);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
_end:
|
||||
free(CNBuffer);
|
||||
free(compressedBuffer);
|
||||
|
Loading…
Reference in New Issue
Block a user