From e717a5b0ddb478108f826076f3efef3a17735735 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 18 Nov 2017 16:24:02 -0800 Subject: [PATCH] zstd_opt: minor speed optimization Calculate reference log2sums only once per serie of sequence (as opposed to once per sequence) Also: improved code comments --- lib/compress/zstd_compress_internal.h | 19 +++++++++--------- lib/compress/zstd_opt.c | 28 +++++++++++++-------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index efcbc450..862558b5 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -72,26 +72,27 @@ typedef struct { } ZSTD_optimal_t; typedef struct { - U32* litFreq; - U32* litLengthFreq; - U32* matchLengthFreq; - U32* offCodeFreq; - ZSTD_match_t* matchTable; - ZSTD_optimal_t* priceTable; + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + U32* litFreq; /* table of literals statistics, of size 256 */ + U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ U32 litSum; /* nb of literals */ U32 litLengthSum; /* nb of litLength codes */ U32 matchLengthSum; /* nb of matchLength codes */ - U32 matchSum; /* one argument to calculate `factor` */ + U32 matchSum; /* a strange argument used in calculating `factor` */ U32 offCodeSum; /* nb of offset codes */ /* begin updated by ZSTD_setLog2Prices */ U32 log2litSum; /* pow2 to compare log2(litfreq) to */ U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */ U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ - U32 factor; /* added to calculate ZSTD_getPrice() (but why?) */ + U32 factor; /* fixed cost added when calculating ZSTD_getPrice() (but why ? seems to favor less sequences) */ /* end : updated by ZSTD_setLog2Prices */ - U32 staticPrices; /* prices follow a static cost structure, statistics are irrelevant */ + U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */ U32 cachedPrice; U32 cachedLitLength; const BYTE* cachedLiterals; diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 04f03ebc..a626a3d1 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -13,7 +13,7 @@ #include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */ -#define ZSTD_LITFREQ_ADD 2 /* sort of scaling factor for litSum and litFreq (why the need ?), but also used for matchSum ? */ +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) @@ -161,25 +161,26 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const } -static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +static void ZSTD_updateStats(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offsetCode, U32 matchLength) { - U32 u; - /* literals */ - optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; - for (u=0; u < litLength; u++) - optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + { U32 u; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + } /* literal Length */ - { const U32 llCode = ZSTD_LLcode(litLength); + { U32 const llCode = ZSTD_LLcode(litLength); optPtr->litLengthFreq[llCode]++; optPtr->litLengthSum++; } - /* match offset */ - { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - optPtr->offCodeSum++; + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; } /* match Length */ @@ -188,8 +189,6 @@ static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* lite optPtr->matchLengthFreq[mlCode]++; optPtr->matchLengthSum++; } - - ZSTD_setLog2Prices(optPtr); } @@ -646,10 +645,11 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ } } - ZSTD_updatePrice(optStatePtr, llen, anchor, offset, mlen); + ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen); ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH); anchor = ip; } } + ZSTD_setLog2Prices(optStatePtr); } /* for (cur=0; cur < last_pos; ) */ /* Save reps for next block */