Merge pull request #1824 from senhuang42/new_path_for_cdict
Avoid using CDict params when input is large.
This commit is contained in:
commit
a9a216a846
@ -50,6 +50,7 @@ struct ZSTD_CDict_s {
|
||||
ZSTD_compressedBlockState_t cBlockState;
|
||||
ZSTD_customMem customMem;
|
||||
U32 dictID;
|
||||
int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
|
||||
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
|
||||
|
||||
ZSTD_CCtx* ZSTD_createCCtx(void)
|
||||
@ -387,7 +388,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
case ZSTD_c_forceAttachDict:
|
||||
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
|
||||
bounds.lowerBound = ZSTD_dictDefaultAttach;
|
||||
bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */
|
||||
bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_literalCompressionMode:
|
||||
@ -2912,6 +2913,9 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
bs, ms, ws, params, dict, dictSize, dtlm, workspace);
|
||||
}
|
||||
|
||||
#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
|
||||
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
|
||||
|
||||
/*! ZSTD_compressBegin_internal() :
|
||||
* @return : 0, or an error code */
|
||||
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
||||
@ -2926,17 +2930,26 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
||||
/* params are supposed to be fully validated at this point */
|
||||
assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
|
||||
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
|
||||
|
||||
if (cdict && cdict->dictContentSize>0) {
|
||||
if ( (cdict)
|
||||
&& (cdict->dictContentSize > 0)
|
||||
&& ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
|
||||
|| pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
|
||||
|| cdict->compressionLevel == 0)
|
||||
&& (params->attachDictPref != ZSTD_dictForceLoad) ) {
|
||||
return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
|
||||
}
|
||||
|
||||
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
|
||||
ZSTDcrp_makeClean, zbuff) );
|
||||
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||
&cctx->workspace, params, dict, dictSize, dictContentType, dtlm,
|
||||
cctx->entropyWorkspace);
|
||||
{ size_t const dictID = cdict ?
|
||||
ZSTD_compress_insertDictionary(
|
||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||
&cctx->workspace, params, cdict->dictContent, cdict->dictContentSize,
|
||||
dictContentType, dtlm, cctx->entropyWorkspace)
|
||||
: ZSTD_compress_insertDictionary(
|
||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||
&cctx->workspace, params, dict, dictSize,
|
||||
dictContentType, dtlm, cctx->entropyWorkspace);
|
||||
FORWARD_IF_ERROR(dictID);
|
||||
assert(dictID <= UINT_MAX);
|
||||
cctx->dictID = (U32)dictID;
|
||||
@ -3254,6 +3267,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
||||
assert(cdict != NULL);
|
||||
ZSTD_cwksp_move(&cdict->workspace, &ws);
|
||||
cdict->customMem = customMem;
|
||||
cdict->compressionLevel = 0; /* signals advanced API usage */
|
||||
|
||||
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
|
||||
dictBuffer, dictSize,
|
||||
dictLoadMethod, dictContentType,
|
||||
@ -3269,9 +3284,12 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
||||
ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
|
||||
{
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
|
||||
return ZSTD_createCDict_advanced(dict, dictSize,
|
||||
ZSTD_dlm_byCopy, ZSTD_dct_auto,
|
||||
cParams, ZSTD_defaultCMem);
|
||||
ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
|
||||
ZSTD_dlm_byCopy, ZSTD_dct_auto,
|
||||
cParams, ZSTD_defaultCMem);
|
||||
if (cdict)
|
||||
cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
|
||||
return cdict;
|
||||
}
|
||||
|
||||
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
|
||||
@ -3361,7 +3379,14 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
|
||||
DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
|
||||
RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
|
||||
{ ZSTD_CCtx_params params = cctx->requestedParams;
|
||||
params.cParams = ZSTD_getCParamsFromCDict(cdict);
|
||||
params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
|
||||
|| pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
|
||||
|| cdict->compressionLevel == 0 )
|
||||
&& (params.attachDictPref != ZSTD_dictForceLoad) ?
|
||||
ZSTD_getCParamsFromCDict(cdict)
|
||||
: ZSTD_getCParams(cdict->compressionLevel,
|
||||
pledgedSrcSize,
|
||||
cdict->dictContentSize);
|
||||
/* Increase window log to fit the entire dictionary and source if the
|
||||
* source size is known. Limit the increase to 19, which is the
|
||||
* window log for compression level 1 with the largest source size.
|
||||
|
11
lib/zstd.h
11
lib/zstd.h
@ -1152,7 +1152,7 @@ typedef enum {
|
||||
* to evolve and should be considered only in the context of extremely
|
||||
* advanced performance tuning.
|
||||
*
|
||||
* Zstd currently supports the use of a CDict in two ways:
|
||||
* Zstd currently supports the use of a CDict in three ways:
|
||||
*
|
||||
* - The contents of the CDict can be copied into the working context. This
|
||||
* means that the compression can search both the dictionary and input
|
||||
@ -1167,6 +1167,12 @@ typedef enum {
|
||||
* tables. However, this model incurs no start-up cost (as long as the
|
||||
* working context's tables can be reused). For small inputs, this can be
|
||||
* faster than copying the CDict's tables.
|
||||
*
|
||||
* - The CDict's tables are not used at all, and instead we use the working
|
||||
* context alone to reload the dictionary and use params based on the source
|
||||
* size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
|
||||
* This method is effective when the dictionary sizes are very small relative
|
||||
* to the input size, and the input size is fairly large to begin with.
|
||||
*
|
||||
* Zstd has a simple internal heuristic that selects which strategy to use
|
||||
* at the beginning of a compression. However, if experimentation shows that
|
||||
@ -1175,7 +1181,8 @@ typedef enum {
|
||||
*/
|
||||
ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
|
||||
ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
|
||||
ZSTD_dictForceCopy = 2 /* Always copy the dictionary. */
|
||||
ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
|
||||
ZSTD_dictForceLoad = 3 /* Always reload the dictionary */
|
||||
} ZSTD_dictAttachPref_e;
|
||||
|
||||
typedef enum {
|
||||
|
Loading…
Reference in New Issue
Block a user