diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 98d06439..c0ec593f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -387,7 +387,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) case ZSTD_c_forceAttachDict: ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceSource; /* note : how to ensure at compile time that this is the highest value enum ? */ + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; case ZSTD_c_literalCompressionMode: @@ -2890,7 +2890,8 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, bs, ms, ws, params, dict, dictSize, dtlm, workspace); } -#define ZSTD_USE_CDICT_PARAMS_CUTOFF (1 MB) +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ @@ -2908,13 +2909,15 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) - && (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceSource) ) { + && ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER) + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); } FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) ); + ZSTDcrp_continue, zbuff) ); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, @@ -2922,8 +2925,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, dictContentType, dtlm, cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->workspace, params, dict, dictSize, dictContentType, dtlm, - cctx->entropyWorkspace); + params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; @@ -3348,8 +3350,10 @@ size_t ZSTD_compressBegin_usingCDict_advanced( DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_CUTOFF) || (cdict->compressionLevel == 0) ) - && (params.attachDictPref != ZSTD_dictForceSource) ? + params.cParams = ( (pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER) + || (cdict->compressionLevel == 0) ) + && (params.attachDictPref != ZSTD_dictForceLoad) ? ZSTD_getCParamsFromCDict(cdict) : ZSTD_getCParams(cdict->compressionLevel, pledgedSrcSize, diff --git a/lib/zstd.h b/lib/zstd.h index 5468c34c..3ba476e8 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1166,8 +1166,10 @@ typedef enum { * faster than copying the CDict's tables. * * - The CDict's tables are not used at all, and instead we use the working - * context alone to determine how our tables are initialized. This method - * should be used when using a small dictionary to compress a large input. + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. * * Zstd has a simple internal heuristic that selects which strategy to use * at the beginning of a compression. However, if experimentation shows that @@ -1177,7 +1179,7 @@ typedef enum { ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceSource = 3, /* Always use src input to determine tables */ + ZSTD_dictForceLoad = 3, /* Always reload the dictionary */ } ZSTD_dictAttachPref_e; typedef enum {