From c497cb67166a380bddd0504cdaced7dcf74c7d8a Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 13:21:28 -0700 Subject: [PATCH 01/48] Add ZSTD_c_enableDedicatedDictSearch Param --- lib/compress/zstd_compress.c | 14 ++++++++++++++ lib/compress/zstd_compress_internal.h | 3 +++ lib/zstd.h | 15 +++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 61369687..0e3eb9e2 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -353,6 +353,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) #endif return bounds; + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + case ZSTD_c_enableLongDistanceMatching: bounds.lowerBound = 0; bounds.upperBound = 1; @@ -464,6 +469,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_jobSize: case ZSTD_c_overlapLog: case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: case ZSTD_c_enableLongDistanceMatching: case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: @@ -514,6 +520,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_jobSize: case ZSTD_c_overlapLog: case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: case ZSTD_c_enableLongDistanceMatching: case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: @@ -667,6 +674,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, return CCtxParams->rsyncable; #endif + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + case ZSTD_c_enableLongDistanceMatching : CCtxParams->ldmParams.enableLdm = (value!=0); return CCtxParams->ldmParams.enableLdm; @@ -794,6 +805,9 @@ size_t ZSTD_CCtxParams_getParameter( *value = CCtxParams->rsyncable; break; #endif + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; case ZSTD_c_enableLongDistanceMatching : *value = CCtxParams->ldmParams.enableLdm; break; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 4760f6b0..b8e7496c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -228,6 +228,9 @@ struct ZSTD_CCtx_params_s { /* Long distance matching parameters */ ldmParams_t ldmParams; + /* Dedicated dict search algorithm trigger */ + int enableDedicatedDictSearch; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ diff --git a/lib/zstd.h b/lib/zstd.h index 4e9fd93b..ea3e4653 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -361,6 +361,21 @@ typedef enum { * Deviating far from default value will likely result in a compression ratio decrease. * Special: value 0 means "automatically determine hashRateLog". */ + ZSTD_c_enableDedicatedDictSearch=170, /* Enable the use of the match finder specifically for + * dictionaries. This has several implications: + * 1) We may override cDict params supplied using + * ZSTD_refCDict because the dedicated match finder + * needs to enforce some unique invariants on the + * hashLog and chainLog. + * 2) We will force the dict to be attached + * 3) We will pick cParams based on ZSTD_c_compressionLevel + * and the size of the dictionary which will increase + * the cDict memory usage. + * 4) We will only do this for certain supported levels. + * The exact levels which are supported are determined + * by ZSTD_c_compressionLevel and dictionary size. + * (only ZSTD_greedy, ZSTD_lazy and ZSTD_lazy2) */ + /* frame parameters */ ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) * Content size must be known at the beginning of compression. From f10d4e313cf72c22ef03cd63f71000a0381ef78a Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 13:33:52 -0700 Subject: [PATCH 02/48] adding ZSTD_dedicatedDictSearch_defaultCParameters variable --- lib/compress/zstd_compress.c | 108 ++++++++++++++++++++++++++ lib/compress/zstd_compress_internal.h | 6 ++ 2 files changed, 114 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0e3eb9e2..dac2bf5d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4248,6 +4248,114 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, }; +static const ZSTD_compressionParameters +ZSTD_dedicatedDictSearch_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any dictSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 4 (not adjusted) */ + { 21, 18, 19 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19 + DD_BLOG, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19 + DD_BLOG, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19 + DD_BLOG, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22 + DD_BLOG, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ +}, +{ /* for dictSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ + { 18, 16, 17 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 4 */ + { 18, 18, 18 + DD_BLOG, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 18, 18, 19 + DD_BLOG, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 18, 18, 19 + DD_BLOG, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ +}, +{ /* for dictSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 4 (not adjusted) */ + { 17, 16, 17 + DD_BLOG, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17 + DD_BLOG, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ +}, +{ /* for dictSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ + { 14, 14, 14 + DD_BLOG, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 5 */ + { 14, 14, 14 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14 + DD_BLOG, 8, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 9 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 10 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ +}, +}; + /*! ZSTD_getCParams_internal() : * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index b8e7496c..fe5b0f2d 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -29,6 +29,12 @@ extern "C" { #endif +/* Dedicated dict search bucket log: + * --------------------------------- + * This determines the additional space we need for the hash table. + * We will have 2^DD_BLOG slots in our bucket. */ +#define DD_BLOG 2 + /*-************************************* * Constants ***************************************/ From e36a373df43dbb7fd9868003a8b442f3e4fb8af0 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 13:48:57 -0700 Subject: [PATCH 03/48] adding dedicatedDictSearch cParams helper methods --- lib/compress/zstd_compress.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index dac2bf5d..b7d7bcd0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -864,6 +864,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo return 0; } +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, size_t const dictSize); + /** * Initializes the local dict using the requested parameters. * NOTE: This does not use the pledged src size, because it may be used for more @@ -4356,6 +4359,19 @@ ZSTD_dedicatedDictSearch_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { }, }; +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + size_t const tableID = (dictSize <= 256 KB) + (dictSize <= 128 KB) + (dictSize <= 16 KB); + size_t const row = compressionLevel; + return ZSTD_dedicatedDictSearch_defaultCParameters[tableID][row]; +} + +static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams(compressionLevel, dictSize); + return (cParams.strategy >= ZSTD_greedy) && (cParams.strategy <= ZSTD_lazy2); +} + /*! ZSTD_getCParams_internal() : * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. From b7dddbe89b19d97e806552e3a79bff17aef3eb09 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 14:19:12 -0700 Subject: [PATCH 04/48] always attach dict when using dedicatedDictSearch --- lib/compress/zstd_compress.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b7d7bcd0..3671a662 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1640,7 +1640,11 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, U64 pledgedSrcSize) { size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; - return ( pledgedSrcSize <= cutoff + int const useDedicatedDictSearch = + params->enableDedicatedDictSearch && + ZSTD_dedicatedDictSearch_isSupported(params->compressionLevel, cdict->dictContentSize); + return ( useDedicatedDictSearch + || pledgedSrcSize <= cutoff || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN || params->attachDictPref == ZSTD_dictForceAttach ) && params->attachDictPref != ZSTD_dictForceCopy From 75b63600361b6ddfa34b6c33f91f8035a8b3ad38 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 18:12:48 -0700 Subject: [PATCH 05/48] adding ZSTD_createCDict_advanced2 to zstd.h --- lib/compress/zstd_compress.c | 20 ++++++++++++++++++++ lib/zstd.h | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3671a662..00a31e85 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3476,6 +3476,26 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, } } +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params cctxParams, + ZSTD_customMem customMem) +{ + int const enableDedicatedDictSearch = cctxParams.enableDedicatedDictSearch && + ZSTD_dedicatedDictSearch_isSupported(cctxParams.compressionLevel, dictSize); + if (!enableDedicatedDictSearch) + return ZSTD_createCDict_advanced(dict, dictSize, + dictLoadMethod, dictContentType, cctxParams.cParams, + customMem); + { + ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + return ZSTD_createCDict_advanced(dict, dictSize, + dictLoadMethod, dictContentType, cParams, customMem); + } +} + ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); diff --git a/lib/zstd.h b/lib/zstd.h index ea3e4653..fe5a865d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1417,6 +1417,12 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS ZSTD_compressionParameters cParams, ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params cctxParams, + ZSTD_customMem customMem); + ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, From 50550a14addea833aef2fe2a4137cb3195ee2b2e Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 18:27:07 -0700 Subject: [PATCH 06/48] adding dedicated dict load method to lazy --- lib/compress/zstd_lazy.c | 19 +++++++++++++++++++ lib/compress/zstd_lazy.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 6371863f..b1171f14 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -475,6 +475,25 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); } +void ZSTD_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + U32 const target = (U32)(ip - ms->window.base); + U32* const chainTable = ms->chainTable; + U32 const chainMask = (1 << ms->cParams.chainLog) - 1; + for (U32 idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = ZSTD_hashPtr( + ms->window.base + idx, + ms->cParams.hashLog - DD_BLOG, + ms->cParams.minMatch) << DD_BLOG; + chainTable[idx & chainMask] = ms->hashTable[h]; + ms->hashTable[h] = idx; + /* Same logic as before. But now, just copy the into bucket */ + for (U32 i = 0; i < (1 << DD_BLOG); i++) + ms->hashTable[h + i] = chainTable[ms->hashTable[h + i] & chainMask]; + } + ms->nextToUpdate = target; +} + /* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 581936f0..74e9368e 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -19,6 +19,8 @@ extern "C" { U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ size_t ZSTD_compressBlock_btlazy2( From 31e581bf653516089f79b0fdbe268fc8392fd3fe Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 18:34:14 -0700 Subject: [PATCH 07/48] adding enableDedicatedDictSearch to matchState_t --- lib/compress/zstd_compress.c | 4 +++- lib/compress/zstd_compress_internal.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 00a31e85..31c49a9c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3491,8 +3491,10 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict { ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( cctxParams.compressionLevel, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, cParams, customMem); + cdict->matchState.enableDedicatedDictSearch = enableDedicatedDictSearch; + return cdict; } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index fe5b0f2d..812dd7ac 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -153,6 +153,7 @@ struct ZSTD_matchState_t { U32* hashTable; U32* hashTable3; U32* chainTable; + int enableDedicatedDictSearch; optState_t opt; /* optimal parser state */ const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; From 22705f0c93ab44be6b61730d4fb3786cb19bf54e Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 18:54:44 -0700 Subject: [PATCH 08/48] adding dedicatedDictSearch algorithm --- lib/compress/zstd_lazy.c | 48 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index b1171f14..f58e6a27 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -487,9 +487,9 @@ void ZSTD_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) ms->cParams.minMatch) << DD_BLOG; chainTable[idx & chainMask] = ms->hashTable[h]; ms->hashTable[h] = idx; - /* Same logic as before. But now, just copy the into bucket */ - for (U32 i = 0; i < (1 << DD_BLOG); i++) - ms->hashTable[h + i] = chainTable[ms->hashTable[h + i] & chainMask]; + /* Same logic as before. But now, just copy the chain into the bucket */ + for (U32 i = 0; i < (1 << DD_BLOG) - 1; i++) + ms->hashTable[h + i + 1] = chainTable[ms->hashTable[h + i] & chainMask]; } ms->nextToUpdate = target; } @@ -550,7 +550,46 @@ size_t ZSTD_HcFindBestMatch_generic ( matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState && ms->dictMatchState->enableDedicatedDictSearch) { + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + const U32 bucketSize = (1 << DD_BLOG); + + U32 hash = ZSTD_hashPtr(ip, dms->cParams.hashLog - DD_BLOG, mls) << DD_BLOG; + U32 attemptNb = 0; + matchIndex = dms->hashTable[hash]; + + /* Empty chain */ + if (!matchIndex) + return ml; + + for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + if (attemptNb < bucketSize - 1) matchIndex = dms->hashTable[++hash]; + else matchIndex = dms->chainTable[matchIndex & dmsChainMask]; + } + } else if (dictMode == ZSTD_dictMatchState) { const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); @@ -579,6 +618,7 @@ size_t ZSTD_HcFindBestMatch_generic ( } if (matchIndex <= dmsMinChain) break; + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; } } From 628559d0e4c8ace6c29c187ac59a897b7ee4b7de Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 19:41:03 -0700 Subject: [PATCH 09/48] loading dict using new algorithm --- lib/compress/zstd_compress.c | 4 +++- lib/compress/zstd_lazy.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 31c49a9c..4ad60400 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2892,7 +2892,9 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE) + if (chunk >= HASH_READ_SIZE && params->enableDedicatedDictSearch) + ZSTD_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + else if (chunk >= HASH_READ_SIZE) ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); break; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index f58e6a27..e717ef74 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -580,7 +580,7 @@ size_t ZSTD_HcFindBestMatch_generic ( /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } From 71fda0362ff9dc594401bfbc3a793aa884b46e51 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:42:11 -0700 Subject: [PATCH 10/48] making cctxParams a pointer --- lib/compress/zstd_compress.c | 10 +++++----- lib/zstd.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4ad60400..6f1df86a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3481,18 +3481,18 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, - ZSTD_CCtx_params cctxParams, + ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem) { - int const enableDedicatedDictSearch = cctxParams.enableDedicatedDictSearch && - ZSTD_dedicatedDictSearch_isSupported(cctxParams.compressionLevel, dictSize); + int const enableDedicatedDictSearch = cctxParams->enableDedicatedDictSearch && + ZSTD_dedicatedDictSearch_isSupported(cctxParams->compressionLevel, dictSize); if (!enableDedicatedDictSearch) return ZSTD_createCDict_advanced(dict, dictSize, - dictLoadMethod, dictContentType, cctxParams.cParams, + dictLoadMethod, dictContentType, cctxParams->cParams, customMem); { ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( - cctxParams.compressionLevel, dictSize); + cctxParams->compressionLevel, dictSize); ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, cParams, customMem); cdict->matchState.enableDedicatedDictSearch = enableDedicatedDictSearch; diff --git a/lib/zstd.h b/lib/zstd.h index fe5a865d..c876f970 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1420,7 +1420,7 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, - ZSTD_CCtx_params cctxParams, + ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem); ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, From a3f6e4026ef60170e7584a16e031bc1750cd805e Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:42:57 -0700 Subject: [PATCH 11/48] removing wrong comment --- lib/zstd.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index c876f970..ea7b9ba6 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -361,20 +361,7 @@ typedef enum { * Deviating far from default value will likely result in a compression ratio decrease. * Special: value 0 means "automatically determine hashRateLog". */ - ZSTD_c_enableDedicatedDictSearch=170, /* Enable the use of the match finder specifically for - * dictionaries. This has several implications: - * 1) We may override cDict params supplied using - * ZSTD_refCDict because the dedicated match finder - * needs to enforce some unique invariants on the - * hashLog and chainLog. - * 2) We will force the dict to be attached - * 3) We will pick cParams based on ZSTD_c_compressionLevel - * and the size of the dictionary which will increase - * the cDict memory usage. - * 4) We will only do this for certain supported levels. - * The exact levels which are supported are determined - * by ZSTD_c_compressionLevel and dictionary size. - * (only ZSTD_greedy, ZSTD_lazy and ZSTD_lazy2) */ + ZSTD_c_enableDedicatedDictSearch=170, /* frame parameters */ ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) From b30f71becf55adddd9f4e14791748007870721ac Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:46:16 -0700 Subject: [PATCH 12/48] pass correct cparams --- lib/compress/zstd_compress.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6f1df86a..6768c5a1 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3486,10 +3486,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict { int const enableDedicatedDictSearch = cctxParams->enableDedicatedDictSearch && ZSTD_dedicatedDictSearch_isSupported(cctxParams->compressionLevel, dictSize); - if (!enableDedicatedDictSearch) + if (!enableDedicatedDictSearch) { + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal( + cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); return ZSTD_createCDict_advanced(dict, dictSize, - dictLoadMethod, dictContentType, cctxParams->cParams, + dictLoadMethod, dictContentType, cParams, customMem); + } { ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( cctxParams->compressionLevel, dictSize); From 5d5507788dc4628db525cba1223f6fa59934bfb2 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:48:48 -0700 Subject: [PATCH 13/48] change method name for consistency --- lib/compress/zstd_compress.c | 2 +- lib/compress/zstd_lazy.c | 2 +- lib/compress/zstd_lazy.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6768c5a1..7855e362 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2893,7 +2893,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_lazy: case ZSTD_lazy2: if (chunk >= HASH_READ_SIZE && params->enableDedicatedDictSearch) - ZSTD_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); else if (chunk >= HASH_READ_SIZE) ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); break; diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index e717ef74..096980db 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -475,7 +475,7 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); } -void ZSTD_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) { U32 const target = (U32)(ip - ms->window.base); U32* const chainTable = ms->chainTable; diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 74e9368e..5e5eb514 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -19,7 +19,7 @@ extern "C" { U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); -void ZSTD_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ From 145c2d12f96d14c41896c91ef517404fbeaf350d Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:54:36 -0700 Subject: [PATCH 14/48] add hashtable head prefetching --- lib/compress/zstd_lazy.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 096980db..fd76a55b 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -525,6 +525,11 @@ size_t ZSTD_HcFindBestMatch_generic ( /* HC4 match finder */ U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + if (dictMode == ZSTD_dictMatchState && ms->dictMatchState->enableDedicatedDictSearch) + PREFETCH_L1(ms->dictMatchState->hashTable + + (ZSTD_hashPtr(ip, ms->dictMatchState->cParams.hashLog - DD_BLOG, + ms->dictMatchState->cParams.minMatch) << DD_BLOG)); + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { From e29bc3a0097d8aa5a6a85ac6100277667e025014 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:55:32 -0700 Subject: [PATCH 15/48] using dict mls instead of src mls --- lib/compress/zstd_lazy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index fd76a55b..c6bcff87 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -567,7 +567,8 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; const U32 bucketSize = (1 << DD_BLOG); - U32 hash = ZSTD_hashPtr(ip, dms->cParams.hashLog - DD_BLOG, mls) << DD_BLOG; + U32 hash = ZSTD_hashPtr(ip, dms->cParams.hashLog - DD_BLOG, + dms->cParams.minMatch) << DD_BLOG; U32 attemptNb = 0; matchIndex = dms->hashTable[hash]; From 0a9787c3e19255ae3c018c7906450887c5a832a9 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 11 Jun 2020 22:57:53 -0700 Subject: [PATCH 16/48] changing to int for consistency --- lib/compress/zstd_compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7855e362..65b69aad 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4392,8 +4392,8 @@ ZSTD_dedicatedDictSearch_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) { - size_t const tableID = (dictSize <= 256 KB) + (dictSize <= 128 KB) + (dictSize <= 16 KB); - size_t const row = compressionLevel; + int const tableID = (dictSize <= 256 KB) + (dictSize <= 128 KB) + (dictSize <= 16 KB); + int const row = compressionLevel; return ZSTD_dedicatedDictSearch_defaultCParameters[tableID][row]; } From 9c628238d366143532a2246ff421a4a6c067aef6 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 12 Jun 2020 08:01:18 -0700 Subject: [PATCH 17/48] creating ZSTD_createCDict_advanced_internal --- lib/compress/zstd_compress.c | 55 +++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 65b69aad..ad4087fb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3387,7 +3387,8 @@ static size_t ZSTD_initCDict_internal( const void* dictBuffer, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) + ZSTD_compressionParameters cParams, + ZSTD_CCtx_params params) { DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); assert(!ZSTD_checkCParams(cParams)); @@ -3417,9 +3418,7 @@ static size_t ZSTD_initCDict_internal( /* (Maybe) load the dictionary * Skips loading the dictionary if it is < 8 bytes. */ - { ZSTD_CCtx_params params; - ZSTD_memset(¶ms, 0, sizeof(params)); - params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; params.fParams.contentSizeFlag = 1; params.cParams = cParams; { size_t const dictID = ZSTD_compress_insertDictionary( @@ -3435,12 +3434,10 @@ static size_t ZSTD_initCDict_internal( return 0; } -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams, ZSTD_customMem customMem) { - DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); if (!customMem.customAlloc ^ !customMem.customFree) return NULL; { size_t const workspaceSize = @@ -3466,16 +3463,36 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, cdict->customMem = customMem; cdict->compressionLevel = 0; /* signals advanced API usage */ + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + { ZSTD_CDict* cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cParams, + customMem); + + ZSTD_CCtx_params params; + ZSTD_memset(¶ms, 0, sizeof(params)); + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dictBuffer, dictSize, dictLoadMethod, dictContentType, - cParams) )) { + cParams, params) )) { ZSTD_freeCDict(cdict); return NULL; } return cdict; } + } ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, @@ -3493,12 +3510,21 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict dictLoadMethod, dictContentType, cParams, customMem); } - { - ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( + { ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( cctxParams->compressionLevel, dictSize); - ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, - dictLoadMethod, dictContentType, cParams, customMem); + ZSTD_CDict* cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cParams, + customMem); cdict->matchState.enableDedicatedDictSearch = enableDedicatedDictSearch; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cParams, *cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + return cdict; } } @@ -3577,10 +3603,13 @@ const ZSTD_CDict* ZSTD_initStaticCDict( (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); if (workspaceSize < neededSize) return NULL; + ZSTD_CCtx_params params; + ZSTD_memset(¶ms, 0, sizeof(params)); + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, dictLoadMethod, dictContentType, - cParams) )) + cParams, params) )) return NULL; return cdict; From 80053bdae3f950a1949222d57b8e6976ec98113e Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 12 Jun 2020 08:53:58 -0700 Subject: [PATCH 18/48] updating cold benchmark --- contrib/largeNbDicts/largeNbDicts.c | 31 +++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index c84d48b2..ddbb3e08 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -156,6 +156,18 @@ createDictionaryBuffer(const char* dictionaryName, } } +static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params* params = ZSTD_createCCtxParams(); + ZSTD_CCtxParams_init(params, compressionLevel); + ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1); + ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel); + + ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem); + + ZSTD_freeCCtxParams(params); + return cdict; +} /*! BMK_loadFiles() : * Loads `buffer`, with content from files listed within `fileNamesTable`. @@ -449,12 +461,14 @@ static void freeCDictCollection(cdict_collection_t cdictc) } /* returns .buffers=NULL if operation fails */ -static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel) +static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch) { ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*)); if (cdicts==NULL) return kNullCDictCollection; for (size_t dictNb=0; dictNb < nbCDict; dictNb++) { - cdicts[dictNb] = ZSTD_createCDict(dictBuffer, dictSize, cLevel); + cdicts[dictNb] = dedicatedDictSearch ? + createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) : + ZSTD_createCDict(dictBuffer, dictSize, cLevel); CONTROL(cdicts[dictNb] != NULL); } cdict_collection_t cdictc; @@ -720,7 +734,8 @@ int bench(const char** fileNameTable, unsigned nbFiles, const char* dictionary, size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks, - unsigned nbRounds, int benchCompression) + unsigned nbRounds, int benchCompression, + int dedicatedDictSearch) { int result = 0; @@ -775,7 +790,9 @@ int bench(const char** fileNameTable, unsigned nbFiles, DICTSIZE); CONTROL(dictBuffer.ptr != NULL); - ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel); + ZSTD_CDict* const cdict = dedicatedDictSearch ? + createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) : + ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel); CONTROL(cdict != NULL); size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel); @@ -798,7 +815,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks; - cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel); + cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch); CONTROL(cdictionaries.cdicts != NULL); ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts); @@ -924,6 +941,7 @@ int main (int argc, const char** argv) { int recursiveMode = 0; int benchCompression = 1; + int dedicatedDictSearch = 0; unsigned nbRounds = BENCH_TIME_DEFAULT_S; const char* const exeName = argv[0]; @@ -953,6 +971,7 @@ int main (int argc, const char** argv) if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; } if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; } /* anything that's not a command is a filename */ nameTable[nameIdx++] = argument; @@ -970,7 +989,7 @@ int main (int argc, const char** argv) nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */ } - int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression); + int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch); UTIL_freeFileNamesTable(filenameTable); free(nameTable); From def62e2d3e3dd585cd33d3f277a067b4dd0f12ee Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 7 Aug 2020 12:02:18 -0400 Subject: [PATCH 19/48] Fix Compilation Warnings --- lib/compress/zstd_compress.c | 186 +++++++++++++++++------------------ 1 file changed, 93 insertions(+), 93 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ad4087fb..b24ab55a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3588,6 +3588,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict( + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + matchStateSize; ZSTD_CDict* cdict; + ZSTD_CCtx_params params; if ((size_t)workspace & 7) return NULL; /* 8-aligned */ @@ -3603,7 +3604,6 @@ const ZSTD_CDict* ZSTD_initStaticCDict( (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); if (workspaceSize < neededSize) return NULL; - ZSTD_CCtx_params params; ZSTD_memset(¶ms, 0, sizeof(params)); if (ZSTD_isError( ZSTD_initCDict_internal(cdict, @@ -4315,107 +4315,107 @@ static const ZSTD_compressionParameters ZSTD_dedicatedDictSearch_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { { /* "default" - for any dictSize > 256 KB */ /* W, C, H, S, L, TL, strat */ - { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 4 (not adjusted) */ - { 21, 18, 19 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19 + DD_BLOG, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19 + DD_BLOG, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19 + DD_BLOG, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22 + DD_BLOG, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ + { 21, 18, 19 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19 + DD_BLOG, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19 + DD_BLOG, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19 + DD_BLOG, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22 + DD_BLOG, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, { /* for dictSize <= 256 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ - { 18, 16, 17 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 4 */ - { 18, 18, 18 + DD_BLOG, 3, 5, 2, ZSTD_greedy }, /* level 5 */ - { 18, 18, 19 + DD_BLOG, 3, 5, 4, ZSTD_lazy }, /* level 6 */ - { 18, 18, 19 + DD_BLOG, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 18, 16, 17 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 4 */ + { 18, 18, 18 + DD_BLOG, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 18, 18, 19 + DD_BLOG, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 18, 18, 19 + DD_BLOG, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, { /* for dictSize <= 128 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 4 (not adjusted) */ - { 17, 16, 17 + DD_BLOG, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17 + DD_BLOG, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ + { 17, 16, 17 + DD_BLOG, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17 + DD_BLOG, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, { /* for dictSize <= 16 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, 0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 3 (not adjusted) */ - { 14, 14, 14 + DD_BLOG, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 5 */ - { 14, 14, 14 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14 + DD_BLOG, 8, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 9 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 10 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, 0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 14, 14, 14 + DD_BLOG, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 5 */ + { 14, 14, 14 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14 + DD_BLOG, 8, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 9 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 10 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, }; From ae4ebf6b8ccb8e4ae238360afdde5c71a381b258 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 7 Aug 2020 16:41:16 -0400 Subject: [PATCH 20/48] TODO: Comment --- lib/zstd.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/zstd.h b/lib/zstd.h index ea7b9ba6..f34e4002 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1404,6 +1404,9 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS ZSTD_compressionParameters cParams, ZSTD_customMem customMem); +/** + * TODO: document! + */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType, From 2cf6cfc55f59c20a22c6935da18bb34afb83b37b Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 11 Aug 2020 15:29:12 -0400 Subject: [PATCH 21/48] Add Fuzzer Test for the Various Dict Attachment Strategies --- tests/fuzzer.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b9b6f307..66ecd724 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2922,6 +2922,60 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : testing cdict compression with different attachment strategies : ", testNb++); + { + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + size_t dictSize = CNBuffSize > 110 KB ? 110 KB : CNBuffSize; + void* dict = (void*)malloc(dictSize); + ZSTD_CCtx_params* cctx_params = ZSTD_createCCtxParams(); + ZSTD_dictAttachPref_e const attachPrefs[] = { + ZSTD_dictDefaultAttach, + ZSTD_dictForceAttach, + ZSTD_dictForceCopy, + ZSTD_dictForceLoad, + ZSTD_dictForceAttach + }; + int const enableDedicatedDictSearch[] = {0, 0, 0, 0, 1}; + int const cLevel = 6; + int i; + + RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed); + RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed); + + CHECK(cctx_params != NULL); + + for (i = 0; i < 5; ++i) { + ZSTD_dictAttachPref_e const attachPref = attachPrefs[i]; + int const enableDDS = enableDedicatedDictSearch[i]; + ZSTD_CDict* cdict; + + DISPLAYLEVEL(5, "\n iter %d ", i); + + ZSTD_CCtxParams_init(cctx_params, cLevel); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctx_params, ZSTD_c_enableDedicatedDictSearch, enableDDS)); + + cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctx_params, ZSTD_defaultCMem); + CHECK(cdict != NULL); + + CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, attachPref)); + + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, CNBuffSize)); + + CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters)); + ZSTD_freeCDict(cdict); + } + + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtxParams(cctx_params); + free(dict); + } + DISPLAYLEVEL(3, "OK \n"); + _end: free(CNBuffer); free(compressedBuffer); From c204110effd899a5c6d68a852995dd3f1b635ecc Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 11 Aug 2020 18:03:38 -0400 Subject: [PATCH 22/48] Make ZSTD_c_enableDedicatedDictSearch an Experimental Param --- lib/zstd.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index f34e4002..c79eb70e 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -361,8 +361,6 @@ typedef enum { * Deviating far from default value will likely result in a compression ratio decrease. * Special: value 0 means "automatically determine hashRateLog". */ - ZSTD_c_enableDedicatedDictSearch=170, - /* frame parameters */ ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) * Content size must be known at the beginning of compression. @@ -414,6 +412,7 @@ typedef enum { * ZSTD_c_literalCompressionMode * ZSTD_c_targetCBlockSize * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -424,7 +423,8 @@ typedef enum { ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, ZSTD_c_experimentalParam6=1003, - ZSTD_c_experimentalParam7=1004 + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005 } ZSTD_cParameter; typedef struct { @@ -1546,6 +1546,10 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * but compression ratio may regress significantly if guess considerably underestimates */ #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 +/* TODO: document. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. From beefdb0d3d904ac0914b1df482799d8be07183ab Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 11 Aug 2020 18:24:11 -0400 Subject: [PATCH 23/48] Fix ZSTD_c_forceAttachDict Bounds --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b24ab55a..8aa754db 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -401,7 +401,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_forceAttachDict: - ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); bounds.lowerBound = ZSTD_dictDefaultAttach; bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; From 34b545acb07e927fd4c251f5620fb7d41f6d48d1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 11 Aug 2020 18:48:22 -0400 Subject: [PATCH 24/48] Add a ZSTD_dedicatedDictSearch ZSTD_dictMode_e to Allow Const Propagation Speed +1.5%. --- lib/compress/zstd_compress.c | 14 +++++- lib/compress/zstd_compress_internal.h | 9 +++- lib/compress/zstd_lazy.c | 72 +++++++++++++++++++++------ lib/compress/zstd_lazy.h | 10 ++++ 4 files changed, 85 insertions(+), 20 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 8aa754db..f8019b0e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2212,7 +2212,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, * assumption : strat is a valid strategy */ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) { - static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, @@ -2242,7 +2242,17 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_compressBlock_btlazy2_dictMatchState, ZSTD_compressBlock_btopt_dictMatchState, ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState } + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } }; ZSTD_blockCompressor selectedCompressor; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 812dd7ac..8f4d2c55 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -299,7 +299,12 @@ struct ZSTD_CCtx_s { typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; -typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e; +typedef enum { + ZSTD_noDict = 0, + ZSTD_extDict = 1, + ZSTD_dictMatchState = 2, + ZSTD_dedicatedDictSearch = 3 +} ZSTD_dictMode_e; typedef size_t (*ZSTD_blockCompressor) ( @@ -763,7 +768,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) return ZSTD_window_hasExtDict(ms->window) ? ZSTD_extDict : ms->dictMatchState != NULL ? - ZSTD_dictMatchState : + (ms->dictMatchState->enableDedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : ZSTD_noDict; } diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index c6bcff87..b3143070 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -258,6 +258,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); /* reach end of unsorted candidates list */ while ( (matchIndex > unsortLimit) @@ -525,7 +526,7 @@ size_t ZSTD_HcFindBestMatch_generic ( /* HC4 match finder */ U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); - if (dictMode == ZSTD_dictMatchState && ms->dictMatchState->enableDedicatedDictSearch) + if (dictMode == ZSTD_dedicatedDictSearch) PREFETCH_L1(ms->dictMatchState->hashTable + (ZSTD_hashPtr(ip, ms->dictMatchState->cParams.hashLog - DD_BLOG, ms->dictMatchState->cParams.minMatch) << DD_BLOG)); @@ -555,7 +556,7 @@ size_t ZSTD_HcFindBestMatch_generic ( matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); } - if (dictMode == ZSTD_dictMatchState && ms->dictMatchState->enableDedicatedDictSearch) { + if (dictMode == ZSTD_dedicatedDictSearch) { const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32 dmsChainSize = (1 << dms->cParams.chainLog); const U32 dmsChainMask = dmsChainSize - 1; @@ -665,6 +666,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( } +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* const iLimit, @@ -709,20 +726,21 @@ ZSTD_compressBlock_lazy_generic( searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : + (dictMode == ZSTD_dedicatedDictSearch ? ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS : (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS - : ZSTD_HcFindBestMatch_selectMLS); + : ZSTD_HcFindBestMatch_selectMLS)); U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? dms->window.base : NULL; - const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? dictBase + dictLowestIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); @@ -738,7 +756,7 @@ ZSTD_compressBlock_lazy_generic( if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { /* dictMatchState repCode checks don't currently handle repCode == 0 * disabling. */ assert(offset_1 <= dictAndPrefixLength); @@ -758,9 +776,9 @@ ZSTD_compressBlock_lazy_generic( const BYTE* start=ip+1; /* check repCode */ - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { const U32 repIndex = (U32)(ip - base) + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) && repIndex < prefixLowestIndex) ? dictBase + (repIndex - dictIndexDelta) : base + repIndex; @@ -801,7 +819,7 @@ ZSTD_compressBlock_lazy_generic( if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : @@ -836,7 +854,7 @@ ZSTD_compressBlock_lazy_generic( if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : @@ -874,7 +892,7 @@ ZSTD_compressBlock_lazy_generic( && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } } - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; @@ -890,11 +908,11 @@ _storeSequence: } /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex = current2 - offset_2; - const BYTE* repMatch = dictMode == ZSTD_dictMatchState + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) && repIndex < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex : base + repIndex; @@ -990,6 +1008,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState( } +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 5e5eb514..9898e90c 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -49,6 +49,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); From 41012193ad2f85050163bf26f87a37023bc46bde Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 11 Aug 2020 18:57:12 -0400 Subject: [PATCH 25/48] Always Init CDict's enableDedicatedDictSearch Field --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f8019b0e..8fd10a68 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3403,6 +3403,7 @@ static size_t ZSTD_initCDict_internal( DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); assert(!ZSTD_checkCParams(cParams)); cdict->matchState.cParams = cParams; + cdict->matchState.enableDedicatedDictSearch = params.enableDedicatedDictSearch; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictContent = dictBuffer; } else { @@ -3525,7 +3526,6 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict ZSTD_CDict* cdict = ZSTD_createCDict_advanced_internal(dictSize, dictLoadMethod, cParams, customMem); - cdict->matchState.enableDedicatedDictSearch = enableDedicatedDictSearch; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, From f1b428fdac89480ffeb865337b1444b4348e13be Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 12 Aug 2020 16:50:44 -0400 Subject: [PATCH 26/48] Rename enableDedicatedDictSearch to dedicatedDictSearch in MatchState This makes it clear that not only is the feature allowed here, we're actually using it, as opposed to the CCtxParam field, in which it's enabled, but we may or may not be using it. --- lib/compress/zstd_compress.c | 6 +++--- lib/compress/zstd_compress_internal.h | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 8fd10a68..c3d317d2 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3403,7 +3403,7 @@ static size_t ZSTD_initCDict_internal( DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); assert(!ZSTD_checkCParams(cParams)); cdict->matchState.cParams = cParams; - cdict->matchState.enableDedicatedDictSearch = params.enableDedicatedDictSearch; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictContent = dictBuffer; } else { @@ -3512,9 +3512,9 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict ZSTD_CCtx_params* cctxParams, ZSTD_customMem customMem) { - int const enableDedicatedDictSearch = cctxParams->enableDedicatedDictSearch && + int const dedicatedDictSearch = cctxParams->enableDedicatedDictSearch && ZSTD_dedicatedDictSearch_isSupported(cctxParams->compressionLevel, dictSize); - if (!enableDedicatedDictSearch) { + if (!dedicatedDictSearch) { ZSTD_compressionParameters cParams = ZSTD_getCParams_internal( cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); return ZSTD_createCDict_advanced(dict, dictSize, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 8f4d2c55..7b827275 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -153,7 +153,9 @@ struct ZSTD_matchState_t { U32* hashTable; U32* hashTable3; U32* chainTable; - int enableDedicatedDictSearch; + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ optState_t opt; /* optimal parser state */ const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; @@ -768,7 +770,7 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) return ZSTD_window_hasExtDict(ms->window) ? ZSTD_extDict : ms->dictMatchState != NULL ? - (ms->dictMatchState->enableDedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : ZSTD_noDict; } From eede46a47e91162ecf38ced2d61bd684c2c485e3 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 13 Aug 2020 11:57:31 -0400 Subject: [PATCH 27/48] Misc Refactor of DDS Search Code --- lib/compress/zstd_lazy.c | 68 +++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index b3143070..45b24847 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -523,13 +523,21 @@ size_t ZSTD_HcFindBestMatch_generic ( U32 nbAttempts = 1U << cParams->searchLog; size_t ml=4-1; - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - DD_BLOG : 0; + const U32 ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << DD_BLOG : 0; - if (dictMode == ZSTD_dedicatedDictSearch) - PREFETCH_L1(ms->dictMatchState->hashTable + - (ZSTD_hashPtr(ip, ms->dictMatchState->cParams.hashLog - DD_BLOG, - ms->dictMatchState->cParams.minMatch) << DD_BLOG)); + U32 matchIndex; + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; @@ -557,47 +565,51 @@ size_t ZSTD_HcFindBestMatch_generic ( } if (dictMode == ZSTD_dedicatedDictSearch) { - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dmsChainSize = (1 << dms->cParams.chainLog); - const U32 dmsChainMask = dmsChainSize - 1; - const U32 dmsLowestIndex = dms->window.dictLimit; - const BYTE* const dmsBase = dms->window.base; - const BYTE* const dmsEnd = dms->window.nextSrc; - const U32 dmsSize = (U32)(dmsEnd - dmsBase); - const U32 dmsIndexDelta = dictLimit - dmsSize; - const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + const U32 ddsChainSize = (1 << dms->cParams.chainLog); + const U32 ddsChainMask = ddsChainSize - 1; + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; const U32 bucketSize = (1 << DD_BLOG); - U32 hash = ZSTD_hashPtr(ip, dms->cParams.hashLog - DD_BLOG, - dms->cParams.minMatch) << DD_BLOG; U32 attemptNb = 0; - matchIndex = dms->hashTable[hash]; + + matchIndex = dms->hashTable[ddsIdx]; /* Empty chain */ if (!matchIndex) return ml; - for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { + for ( ; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { size_t currentMl=0; - const BYTE* const match = dmsBase + matchIndex; - assert(match+4 <= dmsEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + const BYTE* const match = ddsBase + matchIndex; + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } /* save best solution */ if (currentMl > ml) { ml = currentMl; - *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } - if (matchIndex <= dmsMinChain) break; + if (matchIndex <= ddsMinChain) { + break; + } - if (attemptNb < bucketSize - 1) matchIndex = dms->hashTable[++hash]; - else matchIndex = dms->chainTable[matchIndex & dmsChainMask]; + if (attemptNb < bucketSize - 1) { + matchIndex = dms->hashTable[ddsIdx + attemptNb]; + } else { + matchIndex = dms->chainTable[matchIndex & ddsChainMask]; + } } } else if (dictMode == ZSTD_dictMatchState) { - const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); const U32 dmsChainMask = dmsChainSize - 1; From a494111385bd7d61dbf8f4e3d817e51d2ba6a81c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 13 Aug 2020 14:54:10 -0400 Subject: [PATCH 28/48] Move Prefetch Before Insertion; Speed Up ~6% --- lib/compress/zstd_lazy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 45b24847..0ee9f650 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -531,14 +531,14 @@ size_t ZSTD_HcFindBestMatch_generic ( U32 matchIndex; - /* HC4 match finder */ - matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); - if (dictMode == ZSTD_dedicatedDictSearch) { const U32* entry = &dms->hashTable[ddsIdx]; PREFETCH_L1(entry); } + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { From db2aa252523325dfba15ab8870cc3be29585dd10 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 13 Aug 2020 16:52:07 -0400 Subject: [PATCH 29/48] Decision for Whether to Attach Should be Based on CDict Config, not CCtx --- lib/compress/zstd_compress.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c3d317d2..fc1b4213 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1640,10 +1640,8 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, U64 pledgedSrcSize) { size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; - int const useDedicatedDictSearch = - params->enableDedicatedDictSearch && - ZSTD_dedicatedDictSearch_isSupported(params->compressionLevel, cdict->dictContentSize); - return ( useDedicatedDictSearch + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return ( dedicatedDictSearch || pledgedSrcSize <= cutoff || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN || params->attachDictPref == ZSTD_dictForceAttach ) @@ -1709,6 +1707,8 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, { const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + assert(!cdict->matchState.dedicatedDictSearch); + DEBUGLOG(4, "copying dictionary into context"); { unsigned const windowLog = params.cParams.windowLog; From 914bfe7ee4fc03ceca51f61620e33353cbabcca6 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 17 Aug 2020 12:35:50 -0400 Subject: [PATCH 30/48] Init CCtx's Local Dict with CCtxParams --- lib/compress/zstd_compress.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fc1b4213..461ebc4b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -875,8 +875,6 @@ static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, size static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) { ZSTD_localDict* const dl = &cctx->localDict; - ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); if (dl->dict == NULL) { /* No local dictionary. */ assert(dl->dictBuffer == NULL); @@ -893,12 +891,12 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) assert(cctx->cdict == NULL); assert(cctx->prefixDict.dict == NULL); - dl->cdict = ZSTD_createCDict_advanced( + dl->cdict = ZSTD_createCDict_advanced2( dl->dict, dl->dictSize, ZSTD_dlm_byRef, dl->dictContentType, - cParams, + &cctx->requestedParams, cctx->customMem); RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); cctx->cdict = dl->cdict; From d46306087b4ce37c1d31309d8db9f843b9cb05fc Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 17 Aug 2020 12:37:58 -0400 Subject: [PATCH 31/48] Enable Dedicated Dict Search in the CLI --- programs/fileio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/fileio.c b/programs/fileio.c index 241afd7c..d3285113 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -931,6 +931,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); /* multi-threading */ #ifdef ZSTD_MULTITHREAD DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); From df386b3d8d50cef611d7f9693417d63620170bd1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 17 Aug 2020 17:43:01 -0400 Subject: [PATCH 32/48] Fix Off-By-One Error in Counting DDS Search Attempts This caused us to double-search the first position and fail to search the last position in the chain, slowing down search and making it less effective. --- lib/compress/zstd_lazy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 0ee9f650..5e38e49c 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -575,7 +575,7 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; const U32 bucketSize = (1 << DD_BLOG); - U32 attemptNb = 0; + U32 attemptNb = 1; matchIndex = dms->hashTable[ddsIdx]; @@ -603,7 +603,7 @@ size_t ZSTD_HcFindBestMatch_generic ( break; } - if (attemptNb < bucketSize - 1) { + if (attemptNb < bucketSize) { matchIndex = dms->hashTable[ddsIdx + attemptNb]; } else { matchIndex = dms->chainTable[matchIndex & ddsChainMask]; From 5e91ae27ebdbdde0abb8ef7dcb50cd88e37897f5 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 18 Aug 2020 13:30:41 -0400 Subject: [PATCH 33/48] Prefetch First Batch of Match Positions; +11% Speed in Level 5 w/ 1 Dict --- lib/compress/zstd_lazy.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 5e38e49c..30330a12 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -574,8 +574,7 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 ddsIndexDelta = dictLimit - ddsSize; const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; const U32 bucketSize = (1 << DD_BLOG); - - U32 attemptNb = 1; + U32 attemptNb; matchIndex = dms->hashTable[ddsIdx]; @@ -583,7 +582,11 @@ size_t ZSTD_HcFindBestMatch_generic ( if (!matchIndex) return ml; - for ( ; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { + for (attemptNb = 0; attemptNb < bucketSize; attemptNb++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + attemptNb]); + } + + for (attemptNb = 1; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { size_t currentMl=0; const BYTE* const match = ddsBase + matchIndex; assert(match+4 <= ddsEnd); From 5390fee4f717ddaf7fad316fe2f28ca92ed65ca3 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 18 Aug 2020 15:20:12 -0400 Subject: [PATCH 34/48] Rename and Move DD_BLOG Constant to ZSTD_LAZY_DDSS_BUCKET_LOG --- lib/compress/zstd_compress.c | 184 +++++++++++++------------- lib/compress/zstd_compress_internal.h | 7 - lib/compress/zstd_lazy.c | 12 +- lib/compress/zstd_lazy.h | 8 ++ 4 files changed, 106 insertions(+), 105 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 461ebc4b..8ffc19ed 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -4323,107 +4323,107 @@ static const ZSTD_compressionParameters ZSTD_dedicatedDictSearch_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { { /* "default" - for any dictSize > 256 KB */ /* W, C, H, S, L, TL, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ - { 21, 18, 19 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19 + DD_BLOG, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19 + DD_BLOG, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19 + DD_BLOG, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22 + DD_BLOG, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22 + DD_BLOG, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ + { 21, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22 + ZSTD_LAZY_DDSS_BUCKET_LOG, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, { /* for dictSize <= 256 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 18, 16, 17 + DD_BLOG, 2, 5, 2, ZSTD_greedy }, /* level 4 */ - { 18, 18, 18 + DD_BLOG, 3, 5, 2, ZSTD_greedy }, /* level 5 */ - { 18, 18, 19 + DD_BLOG, 3, 5, 4, ZSTD_lazy }, /* level 6 */ - { 18, 18, 19 + DD_BLOG, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 18, 16, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 2, 5, 2, ZSTD_greedy }, /* level 4 */ + { 18, 18, 18 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, { /* for dictSize <= 128 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ - { 17, 16, 17 + DD_BLOG, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17 + DD_BLOG, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17 + DD_BLOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ + { 17, 16, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, { /* for dictSize <= 16 KB */ /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 14, 14, 14 + DD_BLOG, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14 + DD_BLOG, 3, 4, 4, ZSTD_lazy }, /* level 5 */ - { 14, 14, 14 + DD_BLOG, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14 + DD_BLOG, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14 + DD_BLOG, 8, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 9 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 10 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ + { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 4, ZSTD_lazy }, /* level 5 */ + { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 8, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 9 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 10 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ + { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ }, }; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 7b827275..b161a208 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -28,13 +28,6 @@ extern "C" { #endif - -/* Dedicated dict search bucket log: - * --------------------------------- - * This determines the additional space we need for the hash table. - * We will have 2^DD_BLOG slots in our bucket. */ -#define DD_BLOG 2 - /*-************************************* * Constants ***************************************/ diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 30330a12..add10744 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -484,12 +484,12 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B for (U32 idx = ms->nextToUpdate; idx < target; idx++) { U32 const h = ZSTD_hashPtr( ms->window.base + idx, - ms->cParams.hashLog - DD_BLOG, - ms->cParams.minMatch) << DD_BLOG; + ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG, + ms->cParams.minMatch) << ZSTD_LAZY_DDSS_BUCKET_LOG; chainTable[idx & chainMask] = ms->hashTable[h]; ms->hashTable[h] = idx; /* Same logic as before. But now, just copy the chain into the bucket */ - for (U32 i = 0; i < (1 << DD_BLOG) - 1; i++) + for (U32 i = 0; i < (1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1; i++) ms->hashTable[h + i + 1] = chainTable[ms->hashTable[h + i] & chainMask]; } ms->nextToUpdate = target; @@ -525,9 +525,9 @@ size_t ZSTD_HcFindBestMatch_generic ( const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch - ? dms->cParams.hashLog - DD_BLOG : 0; + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; const U32 ddsIdx = dictMode == ZSTD_dedicatedDictSearch - ? ZSTD_hashPtr(ip, ddsHashLog, mls) << DD_BLOG : 0; + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; U32 matchIndex; @@ -573,7 +573,7 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 ddsSize = (U32)(ddsEnd - ddsBase); const U32 ddsIndexDelta = dictLimit - ddsSize; const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; - const U32 bucketSize = (1 << DD_BLOG); + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); U32 attemptNb; matchIndex = dms->hashTable[ddsIdx]; diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index 9898e90c..d0214d5e 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -17,6 +17,14 @@ extern "C" { #include "zstd_compress_internal.h" +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); From ad9f98ac3f74b59c09ddf76a281af6e4b5ea32a6 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 18 Aug 2020 16:58:11 -0400 Subject: [PATCH 35/48] Document the ZSTD_c_enableDedicatedDictSearch Parameter --- lib/zstd.h | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/lib/zstd.h b/lib/zstd.h index c79eb70e..bd4e79f7 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1546,7 +1546,51 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre * but compression ratio may regress significantly if guess considerably underestimates */ #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 -/* TODO: document. +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction. A compression will then use the feature or not based on how + * the CDict was constructed; the value of this param, set in the CCtx, will + * have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. Note that this + * means that the CDict tables can no longer be copied into the CCtx, so + * the dict attachment mode ZSTD_dictForceCopy will no longer be useable. The + * dictionary can only be attached or reloaded. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * In general, you should expect compression to be faster, and CDict creation + * to be slightly slower. Eventually, we will probably make this mode the + * default. */ #define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 From 9e83c782f828f5a2f2ba6c48af74caf12be6c4f9 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Aug 2020 11:40:47 -0400 Subject: [PATCH 36/48] Simplify DDS Hash Table Construction No need to walk the chainTable; we can just keep shifting the entries in the hashTable. --- lib/compress/zstd_lazy.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index add10744..d8ed30a9 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -481,17 +481,22 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32 const target = (U32)(ip - ms->window.base); U32* const chainTable = ms->chainTable; U32 const chainMask = (1 << ms->cParams.chainLog) - 1; - for (U32 idx = ms->nextToUpdate; idx < target; idx++) { + U32 idx = ms->nextToUpdate; + U32 bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + for ( ; idx < target; idx++) { + U32 i; U32 const h = ZSTD_hashPtr( ms->window.base + idx, ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG, ms->cParams.minMatch) << ZSTD_LAZY_DDSS_BUCKET_LOG; + /* Shift hash cache down 1. */ + for (i = bucketSize - 1; i; i--) + ms->hashTable[h + i] = ms->hashTable[h + i - 1]; + /* Insert new position. */ chainTable[idx & chainMask] = ms->hashTable[h]; ms->hashTable[h] = idx; - /* Same logic as before. But now, just copy the chain into the bucket */ - for (U32 i = 0; i < (1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1; i++) - ms->hashTable[h + i + 1] = chainTable[ms->hashTable[h + i] & chainMask]; } + ms->nextToUpdate = target; } From e8b4011b52c1e4ea4db46f52b77842530025841d Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Aug 2020 12:31:25 -0400 Subject: [PATCH 37/48] Split Lookups in Hash Cache and Chain Table into Two Loops Sliiiight speedup. --- lib/compress/zstd_lazy.c | 83 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index d8ed30a9..d3939840 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -570,30 +570,63 @@ size_t ZSTD_HcFindBestMatch_generic ( } if (dictMode == ZSTD_dedicatedDictSearch) { - const U32 ddsChainSize = (1 << dms->cParams.chainLog); - const U32 ddsChainMask = ddsChainSize - 1; - const U32 ddsLowestIndex = dms->window.dictLimit; - const BYTE* const ddsBase = dms->window.base; - const BYTE* const ddsEnd = dms->window.nextSrc; - const U32 ddsSize = (U32)(ddsEnd - ddsBase); - const U32 ddsIndexDelta = dictLimit - ddsSize; - const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; - const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); - U32 attemptNb; + const U32 ddsChainSize = (1 << dms->cParams.chainLog); + const U32 ddsChainMask = ddsChainSize - 1; + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize ? nbAttempts : bucketSize; + U32 ddsAttempt; - matchIndex = dms->hashTable[ddsIdx]; - - /* Empty chain */ - if (!matchIndex) - return ml; - - for (attemptNb = 0; attemptNb < bucketSize; attemptNb++) { - PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + attemptNb]); + for (ddsAttempt = 0; ddsAttempt < bucketSize; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); } - for (attemptNb = 1; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) { + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { size_t currentMl=0; - const BYTE* const match = ddsBase + matchIndex; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (matchIndex < ddsLowestIndex) { + return ml; + } + + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + + if (matchIndex <= ddsMinChain) { + return ml; + } + } + + for ( ; (ddsAttempt < nbAttempts) & (matchIndex >= ddsMinChain); ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[matchIndex & ddsChainMask]; + match = ddsBase + matchIndex; + + if (matchIndex < ddsLowestIndex) { + break; + } + assert(match+4 <= ddsEnd); if (MEM_read32(match) == MEM_read32(ip)) { /* assumption : matchIndex <= dictLimit-4 (by table construction) */ @@ -606,16 +639,6 @@ size_t ZSTD_HcFindBestMatch_generic ( *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ } - - if (matchIndex <= ddsMinChain) { - break; - } - - if (attemptNb < bucketSize) { - matchIndex = dms->hashTable[ddsIdx + attemptNb]; - } else { - matchIndex = dms->chainTable[matchIndex & ddsChainMask]; - } } } else if (dictMode == ZSTD_dictMatchState) { const U32* const dmsChainTable = dms->chainTable; From 7b9a755ac9a10d211c306f4ee26d7da1a9a07c96 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Aug 2020 17:17:10 -0400 Subject: [PATCH 38/48] Remove Chain Limit on Hash Cache Entries; Slightly Improve Compression Entries in the hashTable chain cache aren't subject to the same aliasing that the circular chain table is subject to. As such, we don't need to stop when we cross the chain limit. We can delve deeper. :) --- lib/compress/zstd_lazy.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index d3939840..684e7e56 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -611,10 +611,6 @@ size_t ZSTD_HcFindBestMatch_generic ( return ml; } } - - if (matchIndex <= ddsMinChain) { - return ml; - } } for ( ; (ddsAttempt < nbAttempts) & (matchIndex >= ddsMinChain); ddsAttempt++) { From b81f3a37f9a21c8ac9f8ccbe67f1d5d089a8a2a1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 21 Aug 2020 17:00:26 -0400 Subject: [PATCH 39/48] Easy: Fix Test --- tests/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 66ecd724..c354c421 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2963,7 +2963,7 @@ static int basicUnitTests(U32 const seed, double compressibility) cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); CHECK_Z(cSize); - CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, CNBuffSize)); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize)); CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters)); ZSTD_freeCDict(cdict); From a3659fe1ef5c0609c40149b836449fc321b6a7a3 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 26 Aug 2020 16:33:13 -0400 Subject: [PATCH 40/48] Make ZSTD_dedicatedDictSearch_getCParams Wrap ZSTD_getCParams Fixes up bounds-checking, and lets us clean up what is at the moment an unnecessary duplication of the default cparams tables. --- lib/compress/zstd_compress.c | 149 ++++++++--------------------------- 1 file changed, 31 insertions(+), 118 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 8ffc19ed..3c1e54b3 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -864,8 +864,14 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo return 0; } -static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize); -static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, size_t const dictSize); +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + unsigned long long srcSizeHint, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + int const compressionLevel, + unsigned long long srcSizeHint, + size_t const dictSize); /** * Initializes the local dict using the requested parameters. @@ -3511,7 +3517,8 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict ZSTD_customMem customMem) { int const dedicatedDictSearch = cctxParams->enableDedicatedDictSearch && - ZSTD_dedicatedDictSearch_isSupported(cctxParams->compressionLevel, dictSize); + ZSTD_dedicatedDictSearch_isSupported( + cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); if (!dedicatedDictSearch) { ZSTD_compressionParameters cParams = ZSTD_getCParams_internal( cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); @@ -3520,7 +3527,7 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dict customMem); } { ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams( - cctxParams->compressionLevel, dictSize); + cctxParams->compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CDict* cdict = ZSTD_createCDict_advanced_internal(dictSize, dictLoadMethod, cParams, customMem); @@ -4319,124 +4326,30 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, }; -static const ZSTD_compressionParameters -ZSTD_dedicatedDictSearch_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" - for any dictSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ - { 21, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22 + ZSTD_LAZY_DDSS_BUCKET_LOG, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ -}, -{ /* for dictSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 18, 16, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 2, 5, 2, ZSTD_greedy }, /* level 4 */ - { 18, 18, 18 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 2, ZSTD_greedy }, /* level 5 */ - { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 5, 4, ZSTD_lazy }, /* level 6 */ - { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19 + ZSTD_LAZY_DDSS_BUCKET_LOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ -}, -{ /* for dictSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 4 (not adjusted) */ - { 17, 16, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17 + ZSTD_LAZY_DDSS_BUCKET_LOG, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ -}, -{ /* for dictSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* base (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 1 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 2 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 3 (not adjusted) */ - { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 3, 4, 4, ZSTD_lazy }, /* level 5 */ - { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14 + ZSTD_LAZY_DDSS_BUCKET_LOG, 8, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 9 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 10 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 11 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 12 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 13 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 14 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 15 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 16 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 17 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 18 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 19 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 20 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 }, /* level 21 (not adjusted) */ - { 0, 0, 0, 0, 0, 0, (ZSTD_strategy)0 } /* level 22 (not adjusted) */ -}, -}; - -static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize) { - int const tableID = (dictSize <= 256 KB) + (dictSize <= 128 KB) + (dictSize <= 16 KB); - int const row = compressionLevel; - return ZSTD_dedicatedDictSearch_defaultCParameters[tableID][row]; + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; } -static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, size_t const dictSize) +static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize) { - ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams(compressionLevel, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_dedicatedDictSearch_getCParams(compressionLevel, srcSizeHint, dictSize); return (cParams.strategy >= ZSTD_greedy) && (cParams.strategy <= ZSTD_lazy2); } From d332f57897394426afb4270de85aecac2a2fce3e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 26 Aug 2020 18:33:44 -0400 Subject: [PATCH 41/48] Permit Matching Against Lowest Valid Position This comparison was previously faulty: the lowest valid position is itself valid, and we should therefore be allowed to match against it. --- lib/compress/zstd_lazy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 684e7e56..15df6125 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -544,7 +544,7 @@ size_t ZSTD_HcFindBestMatch_generic ( /* HC4 match finder */ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { const BYTE* const match = base + matchIndex; @@ -649,7 +649,7 @@ size_t ZSTD_HcFindBestMatch_generic ( matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; - for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { size_t currentMl=0; const BYTE* const match = dmsBase + matchIndex; assert(match+4 <= dmsEnd); From c09454e28f667d6daba8ea30032a59a21c6243a8 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 28 Aug 2020 12:31:01 -0400 Subject: [PATCH 42/48] Add Warning Comment to ZSTD_createCDict_advanced2() Declaration --- lib/zstd.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index bd4e79f7..e42a4bd4 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1405,13 +1405,14 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictS ZSTD_customMem customMem); /** - * TODO: document! + * This API is temporary and is expected to change or disappear in the future! */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_CCtx_params* cctxParams, - ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, From 7b5d2f72ea1762660ddab2d8215aa25414e5facc Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 28 Aug 2020 12:38:50 -0400 Subject: [PATCH 43/48] Adjust Working Context Table Sizes Back Down --- lib/compress/zstd_compress.c | 38 +++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3c1e54b3..bec9afd7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -872,6 +872,8 @@ static int ZSTD_dedicatedDictSearch_isSupported( int const compressionLevel, unsigned long long srcSizeHint, size_t const dictSize); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); /** * Initializes the local dict using the requested parameters. @@ -1661,17 +1663,23 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Resize working context table params for input only, since the dict * has its own tables. */ /* pledgedSrcSize == 0 means 0! */ - params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 0); params.cParams.windowLog = windowLog; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_makeClean, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); } { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc @@ -4353,6 +4361,30 @@ static int ZSTD_dedicatedDictSearch_isSupported(int const compressionLevel, unsi return (cParams.strategy >= ZSTD_greedy) && (cParams.strategy <= ZSTD_lazy2); } +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + /*! ZSTD_getCParams_internal() : * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. From 2ffbde0d9565392438c840c7b34b4a76acb288c6 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 28 Aug 2020 14:14:29 -0400 Subject: [PATCH 44/48] Fix `-Wshorten-64-to-32` Error --- lib/compress/zstd_lazy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 15df6125..aa863b8d 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -485,7 +485,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32 bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; for ( ; idx < target; idx++) { U32 i; - U32 const h = ZSTD_hashPtr( + size_t const h = ZSTD_hashPtr( ms->window.base + idx, ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG, ms->cParams.minMatch) << ZSTD_LAZY_DDSS_BUCKET_LOG; @@ -531,8 +531,8 @@ size_t ZSTD_HcFindBestMatch_generic ( const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; - const U32 ddsIdx = dictMode == ZSTD_dedicatedDictSearch - ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; U32 matchIndex; From cab86b074f267299c690ef9a3534941a82c9b125 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 Sep 2020 12:40:25 -0400 Subject: [PATCH 45/48] Clean Up Search Function Selection --- lib/compress/zstd_lazy.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index aa863b8d..5d32c78b 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -762,12 +762,27 @@ ZSTD_compressBlock_lazy_generic( typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS - : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (dictMode == ZSTD_dedicatedDictSearch ? ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS : - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS - : ZSTD_HcFindBestMatch_selectMLS)); + + const searchMax_f searchFuncs[4][2] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS + }, + { + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const ZSTD_matchState_t* const dms = ms->dictMatchState; @@ -784,6 +799,8 @@ ZSTD_compressBlock_lazy_generic( 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + assert(searchMax != NULL); + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); /* init */ From f49c1563ff2405b0db4b3ef77dd9bbb8ee64deb1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 Sep 2020 12:40:42 -0400 Subject: [PATCH 46/48] Force-Inline ZSTD_insertAndFindFirstIndex_internal() Without this, gcc was declining to inline the function in `ZSTD_noDict` mode, resulting in a ~10% slowdown. --- lib/compress/zstd_lazy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 5d32c78b..33acf687 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -447,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( /* Update chains up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndex_internal( +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( ZSTD_matchState_t* ms, const ZSTD_compressionParameters* const cParams, const BYTE* ip, U32 const mls) From d214d8c859051ea5d760006bc3e4e239359d8655 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 Sep 2020 13:27:11 -0400 Subject: [PATCH 47/48] Shorten Dict Mode Conditionals in Order to Improve Readability --- lib/compress/zstd_lazy.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 33acf687..5ce80532 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -785,16 +785,18 @@ ZSTD_compressBlock_lazy_generic( searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? + const U32 dictLowestIndex = isDMS || isDDS ? dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? + const BYTE* const dictBase = isDMS || isDDS ? dms->window.base : NULL; - const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? + const BYTE* const dictLowest = isDMS || isDDS ? dictBase + dictLowestIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? + const BYTE* const dictEnd = isDMS || isDDS ? dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch ? + const U32 dictIndexDelta = isDMS || isDDS ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); @@ -812,7 +814,7 @@ ZSTD_compressBlock_lazy_generic( if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } - if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { + if (isDMS || isDDS) { /* dictMatchState repCode checks don't currently handle repCode == 0 * disabling. */ assert(offset_1 <= dictAndPrefixLength); @@ -832,7 +834,7 @@ ZSTD_compressBlock_lazy_generic( const BYTE* start=ip+1; /* check repCode */ - if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { + if (isDMS || isDDS) { const U32 repIndex = (U32)(ip - base) + 1 - offset_1; const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) && repIndex < prefixLowestIndex) ? @@ -875,7 +877,7 @@ ZSTD_compressBlock_lazy_generic( if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { + if (isDMS || isDDS) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : @@ -910,7 +912,7 @@ ZSTD_compressBlock_lazy_generic( if ((mlRep >= 4) && (gain2 > gain1)) matchLength = mlRep, offset = 0, start = ip; } - if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { + if (isDMS || isDDS) { const U32 repIndex = (U32)(ip - base) - offset_1; const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase + (repIndex - dictIndexDelta) : @@ -948,7 +950,7 @@ ZSTD_compressBlock_lazy_generic( && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } } - if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { + if (isDMS || isDDS) { U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; @@ -964,12 +966,11 @@ _storeSequence: } /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) { + if (isDMS || isDDS) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex = current2 - offset_2; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) - && repIndex < prefixLowestIndex ? + const BYTE* repMatch = repIndex < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex : base + repIndex; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) From 07793547e649e75452cbb426fb3df1c9b6e0235e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 4 Sep 2020 12:16:35 -0400 Subject: [PATCH 48/48] Fix Bug: Only Use DDSS Insertion on CDict MatchStates Previously, if DDSS was enabled on a CCtx and a dictionary was inserted into the CCtx, the CCtx MatchState would be filled as a DDSS struct, causing segfaults etc. This changes the check to use whether the MatchState is marked as using the DDSS (which is only ever set for CDict MatchStates), rather than looking at the CCtxParams. --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index bec9afd7..3e253006 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2914,7 +2914,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE && params->enableDedicatedDictSearch) + if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); else if (chunk >= HASH_READ_SIZE) ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);