This commit is contained in:
Sen Huang 2019-10-21 18:46:17 -04:00
commit b6c3459d50
2 changed files with 18 additions and 12 deletions

View File

@ -387,7 +387,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
case ZSTD_c_forceAttachDict: case ZSTD_c_forceAttachDict:
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
bounds.lowerBound = ZSTD_dictDefaultAttach; bounds.lowerBound = ZSTD_dictDefaultAttach;
bounds.upperBound = ZSTD_dictForceSource; /* note : how to ensure at compile time that this is the highest value enum ? */ bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
return bounds; return bounds;
case ZSTD_c_literalCompressionMode: case ZSTD_c_literalCompressionMode:
@ -2890,7 +2890,8 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
bs, ms, ws, params, dict, dictSize, dtlm, workspace); bs, ms, ws, params, dict, dictSize, dtlm, workspace);
} }
#define ZSTD_USE_CDICT_PARAMS_CUTOFF (1 MB) #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
/*! ZSTD_compressBegin_internal() : /*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */ * @return : 0, or an error code */
@ -2908,13 +2909,15 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
assert(!((dict) && (cdict))); /* either dict or cdict, not both */ assert(!((dict) && (cdict))); /* either dict or cdict, not both */
if ( (cdict) if ( (cdict)
&& (cdict->dictContentSize > 0) && (cdict->dictContentSize > 0)
&& (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) && ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
&& (params->attachDictPref != ZSTD_dictForceSource) ) { || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER)
|| cdict->compressionLevel == 0)
&& (params->attachDictPref != ZSTD_dictForceLoad) ) {
return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
} }
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
ZSTDcrp_makeClean, zbuff) ); ZSTDcrp_continue, zbuff) );
{ size_t const dictID = cdict ? { size_t const dictID = cdict ?
ZSTD_compress_insertDictionary( ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState, cctx->blockState.prevCBlock, &cctx->blockState.matchState,
@ -2922,8 +2925,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
dictContentType, dtlm, cctx->entropyWorkspace) dictContentType, dtlm, cctx->entropyWorkspace)
: ZSTD_compress_insertDictionary( : ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState, cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->workspace, params, dict, dictSize, dictContentType, dtlm, params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
cctx->entropyWorkspace);
FORWARD_IF_ERROR(dictID); FORWARD_IF_ERROR(dictID);
assert(dictID <= UINT_MAX); assert(dictID <= UINT_MAX);
cctx->dictID = (U32)dictID; cctx->dictID = (U32)dictID;
@ -3348,8 +3350,10 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
{ ZSTD_CCtx_params params = cctx->requestedParams; { ZSTD_CCtx_params params = cctx->requestedParams;
params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
&& (params.attachDictPref != ZSTD_dictForceSource) ? || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER)
|| (cdict->compressionLevel == 0) )
&& (params.attachDictPref != ZSTD_dictForceLoad) ?
ZSTD_getCParamsFromCDict(cdict) ZSTD_getCParamsFromCDict(cdict)
: ZSTD_getCParams(cdict->compressionLevel, : ZSTD_getCParams(cdict->compressionLevel,
pledgedSrcSize, pledgedSrcSize,

View File

@ -1166,8 +1166,10 @@ typedef enum {
* faster than copying the CDict's tables. * faster than copying the CDict's tables.
* *
* - The CDict's tables are not used at all, and instead we use the working * - The CDict's tables are not used at all, and instead we use the working
* context alone to determine how our tables are initialized. This method * context alone to reload the dictionary and use params based on the source
* should be used when using a small dictionary to compress a large input. * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
* This method is effective when the dictionary sizes are very small relative
* to the input size, and the input size is fairly large to begin with.
* *
* Zstd has a simple internal heuristic that selects which strategy to use * Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that * at the beginning of a compression. However, if experimentation shows that
@ -1177,7 +1179,7 @@ typedef enum {
ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ ZSTD_dictForceLoad = 3, /* Always reload the dictionary */
} ZSTD_dictAttachPref_e; } ZSTD_dictAttachPref_e;
typedef enum { typedef enum {