From a155061328c06587fc3951403b3603cb58e2f297 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 May 2018 12:32:16 -0700 Subject: [PATCH 01/19] minor code refactor for readability removed some useless operations from optimal parser (should not change performance, too small a difference) --- lib/compress/zstd_compress.c | 33 ++++++++---- lib/compress/zstd_compress_internal.h | 14 ++--- lib/compress/zstd_opt.c | 76 ++++++++++++++------------- 3 files changed, 68 insertions(+), 55 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f3a49e67..76b471c5 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -995,7 +995,11 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; -static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx) +static void* +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + void* ptr, + const ZSTD_compressionParameters* cParams, + ZSTD_compResetPolicy_e const crp, U32 const forCCtx) { size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const hSize = ((size_t)1) << cParams->hashLog; @@ -1285,7 +1289,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, /* copy dictionary offsets */ { - ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState; + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; @@ -1985,8 +1989,9 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate); + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)", + dstCapacity, ms->window.dictLimit, ms->nextToUpdate); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength); return 0; /* don't even attempt compression below a certain srcSize */ @@ -1997,6 +2002,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; const U32 current = (U32)(istart-base); + assert(istart >= base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ if (current > ms->nextToUpdate + 384) ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); } @@ -2369,6 +2376,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, size_t dictID; ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); dictPtr += 4; /* skip magic number */ dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); @@ -2447,12 +2456,14 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, /** ZSTD_compress_insertDictionary() : * @return : dictID, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) { DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); if ((dict==NULL) || (dictSize<=8)) return 0; @@ -2726,7 +2737,7 @@ static size_t ZSTD_initCDict_internal( ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams) { - DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType); + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (U32)dictContentType); assert(!ZSTD_checkCParams(cParams)); cdict->cParams = cParams; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0a19b3ec..bbb66a8e 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -95,7 +95,7 @@ typedef struct { U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ /* end : updated by ZSTD_setLog2Prices */ - U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */ + U32 predefPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */ } optState_t; typedef struct { @@ -112,11 +112,11 @@ typedef struct { } ZSTD_window_t; typedef struct { - ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary */ - U32 nextToUpdate; /* index from which to continue table update */ - U32 nextToUpdate3; /* index from which to continue table update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 nextToUpdate3; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32* hashTable; U32* hashTable3; U32* chainTable; @@ -161,7 +161,7 @@ typedef struct { rawSeq* seq; /* The start of the sequences */ size_t pos; /* The position where reading stopped. <= size. */ size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity of the `seq` pointer */ + size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; struct ZSTD_CCtx_params_s { diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index f63f0c58..80b9e5e0 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -32,11 +32,11 @@ static void ZSTD_setLog2Prices(optState_t* optPtr) static void ZSTD_rescaleFreqs(optState_t* const optPtr, const BYTE* const src, size_t const srcSize) { - optPtr->staticPrices = 0; + optPtr->predefPrices = 0; if (optPtr->litLengthSum == 0) { /* first init */ unsigned u; - if (srcSize <= 1024) optPtr->staticPrices = 1; + if (srcSize <= 1024) optPtr->predefPrices = 1; assert(optPtr->litFreq!=NULL); for (u=0; u<=MaxLit; u++) @@ -89,12 +89,12 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, /* ZSTD_rawLiteralsCost() : - * cost of literals (only) in given segment (which length can be null) + * cost of literals (only) in specified segment (which length can be 0). * does not include cost of literalLength symbol */ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, const optState_t* const optPtr) { - if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */ + if (optPtr->predefPrices) return (litLength*6); /* 6 bit per literal - no statistic used */ if (litLength == 0) return 0; /* literals */ @@ -110,7 +110,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, * cost of literalLength symbol */ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) { - if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1); + if (optPtr->predefPrices) return ZSTD_highbit32((U32)litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -135,7 +135,7 @@ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength * to provide a cost which is directly comparable to a match ending at same position */ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) { - if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1); + if (optPtr->predefPrices) return ZSTD_highbit32(litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -166,18 +166,18 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe * Provides the cost of the match part (offset + matchLength) of a sequence * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ -FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice( - U32 const offset, U32 const matchLength, - const optState_t* const optPtr, - int const optLevel) +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) { U32 price; U32 const offCode = ZSTD_highbit32(offset+1); U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); - if (optPtr->staticPrices) /* fixed scheme, do not use statistics */ - return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode; + if (optPtr->predefPrices) /* fixed scheme, do not use statistics */ + return ZSTD_highbit32(mlBase+1) + 16 + offCode; price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ @@ -662,12 +662,13 @@ static int ZSTD_literalsContribution_cached( return contribution; } -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, - const void* src, size_t srcSize, - const int optLevel, const int extDict) +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const ZSTD_compressionParameters* cParams, + const void* src, size_t srcSize, + const int optLevel, const int extDict) { optState_t* const optStatePtr = &ms->opt; const BYTE* const istart = (const BYTE*)src; @@ -705,17 +706,18 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore /* initialize opt[0] */ { U32 i ; for (i=0; i immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; - DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie", - nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart)); + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(7, "found %u matches of maxLength=%u and maxOffset=%u at cPos=%u => start new serie", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); if (maxML > sufficient_len) { best_mlen = maxML; - best_off = matches[nbMatches-1].off; + best_off = maxOffset; DEBUGLOG(7, "large match (%u>%u), immediate encoding", best_mlen, sufficient_len); cur = 0; @@ -727,22 +729,23 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr); U32 pos; U32 matchNb; - for (pos = 0; pos < minMatch; pos++) { - opt[pos].mlen = 1; - opt[pos].price = ZSTD_MAX_PRICE; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ } for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offset = matches[matchNb].off; U32 const end = matches[matchNb].len; repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; DEBUGLOG(7, "rPos:%u => set initial price : %u", - pos, matchPrice); + pos, sequencePrice); opt[pos].mlen = pos; opt[pos].off = offset; opt[pos].litlen = litlen; - opt[pos].price = matchPrice; + opt[pos].price = sequencePrice; + ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); } } last_pos = pos-1; @@ -778,7 +781,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore if (cur == last_pos) break; - if ( (optLevel==0) /*static*/ + if ( (optLevel==0) /*static_test*/ && (opt[cur+1].price <= opt[cur].price) ) continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ @@ -795,13 +798,12 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore cur, nbMatches, maxML); if ( (maxML > sufficient_len) - | (cur + maxML >= ZSTD_OPT_NUM) ) { + || (cur + maxML >= ZSTD_OPT_NUM) ) { best_mlen = maxML; best_off = matches[nbMatches-1].off; last_pos = cur + 1; goto _shortestPath; - } - } + } } /* set prices using matches found at position == cur */ for (matchNb = 0; matchNb < nbMatches; matchNb++) { @@ -814,21 +816,22 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u", matchNb, matches[matchNb].off, lastML, litlen); - for (mlen = lastML; mlen >= startML; mlen--) { + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ U32 const pos = cur + mlen; int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); if ((pos > last_pos) || (price < opt[pos].price)) { DEBUGLOG(7, "rPos:%u => new better price (%u<%u)", pos, price, opt[pos].price); - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ opt[pos].mlen = mlen; opt[pos].off = offset; opt[pos].litlen = litlen; opt[pos].price = price; + ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); } else { - if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */ + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ } } } } } /* for (cur = 1; cur <= last_pos; cur++) */ @@ -878,8 +881,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ if (repCode >= 2) rep[2] = rep[1]; rep[1] = rep[0]; rep[0] = currentOffset; - } - } + } } ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen); ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH); From 338f738c242d857acf3828c27cd475df7657178e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 May 2018 15:37:06 -0700 Subject: [PATCH 02/19] pass entropy tables to optimal parser for proper estimation of symbol's weights when using dictionary compression. Note : using only huffman costs is not good enough, presumably because sequence symbol costs are incorrect. --- lib/common/huf.h | 11 +++++++++-- lib/compress/huf_compress.c | 7 +++++++ lib/compress/zstd_compress.c | 1 + lib/compress/zstd_compress_internal.h | 5 ++++- lib/compress/zstd_opt.c | 24 ++++++++++++++++++------ 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/lib/common/huf.h b/lib/common/huf.h index b4645b4e..1f46fda2 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -208,7 +208,7 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si typedef enum { HUF_repeat_none, /**< Cannot use the previous table */ HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ } HUF_repeat; /** HUF_compress4X_repeat() : * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. @@ -227,7 +227,9 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, */ #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const U32* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); /*! HUF_readStats() : * Read compact Huffman tree, saved by HUF_writeCTable(). @@ -242,6 +244,11 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, * Loading a CTable saved with HUF_writeCTable() */ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); /* * HUF_decompress() does the following: diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 83230b41..c01a2381 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -216,6 +216,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src return readSize; } +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) +{ + const HUF_CElt* table = (const HUF_CElt*)symbolTable; + assert(symbolValue <= HUF_SYMBOLVALUE_MAX); + return table[symbolValue].nbBits; +} + typedef struct nodeElt_s { U32 count; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 76b471c5..d3e52c62 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1997,6 +1997,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, return 0; /* don't even attempt compression below a certain srcSize */ } ZSTD_resetSeqStore(&(zc->seqStore)); + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ /* limited update after a very long match */ { const BYTE* const base = ms->window.base; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index bbb66a8e..eeb7b230 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -76,6 +76,8 @@ typedef struct { U32 rep[ZSTD_REP_NUM]; } ZSTD_optimal_t; +typedef enum { zop_none=0, zop_predef, zop_static } ZSTD_OptPrice_e; + typedef struct { /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ U32* litFreq; /* table of literals statistics, of size 256 */ @@ -95,7 +97,8 @@ typedef struct { U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ /* end : updated by ZSTD_setLog2Prices */ - U32 predefPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */ + ZSTD_OptPrice_e priceType; /* prices follow a pre-defined cost structure, statistics are irrelevant */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated symbol costs, from dictionary */ } optState_t; typedef struct { diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 80b9e5e0..ecd676a6 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -32,11 +32,15 @@ static void ZSTD_setLog2Prices(optState_t* optPtr) static void ZSTD_rescaleFreqs(optState_t* const optPtr, const BYTE* const src, size_t const srcSize) { - optPtr->predefPrices = 0; + optPtr->priceType = zop_none; if (optPtr->litLengthSum == 0) { /* first init */ unsigned u; - if (srcSize <= 1024) optPtr->predefPrices = 1; + if (srcSize <= 1024) optPtr->priceType = zop_predef; + assert(optPtr->symbolCosts != NULL); + if (0 && optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_static; + } assert(optPtr->litFreq!=NULL); for (u=0; u<=MaxLit; u++) @@ -94,7 +98,15 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, const optState_t* const optPtr) { - if (optPtr->predefPrices) return (litLength*6); /* 6 bit per literal - no statistic used */ + if (optPtr->priceType == zop_static) { + U32 u, cost; + assert(optPtr->symbolCosts != NULL); + assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid); + for (u=0, cost=0; u < litLength; u++) + cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]); + return cost; + } + if (optPtr->priceType == zop_predef) return (litLength*6); /* 6 bit per literal - no statistic used */ if (litLength == 0) return 0; /* literals */ @@ -110,7 +122,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, * cost of literalLength symbol */ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) { - if (optPtr->predefPrices) return ZSTD_highbit32((U32)litLength+1); + if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -135,7 +147,7 @@ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength * to provide a cost which is directly comparable to a match ending at same position */ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) { - if (optPtr->predefPrices) return ZSTD_highbit32(litLength+1); + if (optPtr->priceType == zop_predef) return ZSTD_highbit32(litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -176,7 +188,7 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); - if (optPtr->predefPrices) /* fixed scheme, do not use statistics */ + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ return ZSTD_highbit32(mlBase+1) + 16 + offCode; price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); From 6a3c34aa58813d55cab5e5c884c95d8ae70b3205 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 May 2018 16:11:21 -0700 Subject: [PATCH 03/19] opt: estimate cost of both Hufman and FSE symbols For FSE symbols : provide an upper bound, in nb of bits, since cost function is not able to store fractional bit costs. --- lib/common/fse.h | 6 ++++++ lib/compress/zstd_opt.c | 28 ++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 6a1d272b..556e6c52 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -575,6 +575,12 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt BIT_flushBits(bitC); } +MEM_STATIC U32 FSE_getMaxNbBits(const FSE_symbolCompressionTransform* symbolTT, U32 symbolValue) +{ + assert(symbolValue <= FSE_MAX_SYMBOL_VALUE); + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + /* ====== Decompression ====== */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index ecd676a6..a107fccc 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -38,7 +38,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, unsigned u; if (srcSize <= 1024) optPtr->priceType = zop_predef; assert(optPtr->symbolCosts != NULL); - if (0 && optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ + if (optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ optPtr->priceType = zop_static; } @@ -122,12 +122,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, * cost of literalLength symbol */ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) { + if (optPtr->priceType == zop_static) { + U32 const llCode = ZSTD_LLcode(litLength); + FSE_CState_t cstate; + FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); + return LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode); + } if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); - U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); - return price; + return LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); } } @@ -147,7 +152,13 @@ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength * to provide a cost which is directly comparable to a match ending at same position */ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) { - if (optPtr->priceType == zop_predef) return ZSTD_highbit32(litLength+1); + if (optPtr->priceType == zop_static) { + U32 const llCode = ZSTD_LLcode(litLength); + FSE_CState_t cstate; + FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); + return (int)(LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) - FSE_getMaxNbBits(cstate.symbolTT, 0); + } + if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); @@ -188,6 +199,15 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); + if (optPtr->priceType == zop_static) { + U32 const mlCode = ZSTD_MLcode(mlBase); + FSE_CState_t mlstate, offstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); + FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); + return FSE_getMaxNbBits(offstate.symbolTT, offCode) + offCode + + FSE_getMaxNbBits(mlstate.symbolTT, mlCode) + ML_bits[mlCode]; + } + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ return ZSTD_highbit32(mlBase+1) + 16 + offCode; From 1aff63b114d0ca329d6066d9d9d5ce002dbf9f59 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 May 2018 16:19:04 -0700 Subject: [PATCH 04/19] opt: shift all costs by 8 bits (* 256) making it possible to represent fractional bit costs. --- lib/compress/zstd_opt.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index a107fccc..d800254c 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -104,7 +104,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid); for (u=0, cost=0; u < litLength; u++) cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]); - return cost; + return cost << 8; } if (optPtr->priceType == zop_predef) return (litLength*6); /* 6 bit per literal - no statistic used */ if (litLength == 0) return 0; @@ -114,7 +114,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, U32 cost = litLength * optPtr->log2litSum; for (u=0; u < litLength; u++) cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); - return cost; + return cost << 8; } } @@ -126,13 +126,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode); + return (LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) << 8; } if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); - return LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); + return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) << 8; } } @@ -156,15 +156,16 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return (int)(LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) - FSE_getMaxNbBits(cstate.symbolTT, 0); + return ((int)(LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) - FSE_getMaxNbBits(cstate.symbolTT, 0)) * 256; } if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1); /* literal Length */ { U32 const llCode = ZSTD_LLcode(litLength); - int const contribution = LL_bits[llCode] + int const contribution = (LL_bits[llCode] + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) - - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); + - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) + * 256; #if 1 return contribution; #else @@ -204,8 +205,9 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, FSE_CState_t mlstate, offstate; FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); - return FSE_getMaxNbBits(offstate.symbolTT, offCode) + offCode - + FSE_getMaxNbBits(mlstate.symbolTT, mlCode) + ML_bits[mlCode]; + return (FSE_getMaxNbBits(offstate.symbolTT, offCode) + offCode + + FSE_getMaxNbBits(mlstate.symbolTT, mlCode) + ML_bits[mlCode]) + * 256; } if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ @@ -220,7 +222,7 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, } DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); - return price; + return price << 8; } static void ZSTD_updateStats(optState_t* const optPtr, From ba2ad9b6b993ace72b153e579be0823a61dc8769 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 8 May 2018 17:43:13 -0700 Subject: [PATCH 05/19] implemented fractional bit cost evaluation for FSE symbols. While it seems to work, the gains are negligible compared to rough maxNbBits evaluation. There are even a few losses sometimes, that still need to be explained. Furthermode, there are still cases where btlazy2 does a better job than btopt, which seems rather strange too. --- lib/common/fse.h | 15 ++++++++++++++- lib/compress/zstd_opt.c | 15 +++++++++------ lib/decompress/huf_decompress.c | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 556e6c52..67707855 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -577,10 +577,23 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt MEM_STATIC U32 FSE_getMaxNbBits(const FSE_symbolCompressionTransform* symbolTT, U32 symbolValue) { - assert(symbolValue <= FSE_MAX_SYMBOL_VALUE); return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; } +/* FSE_bitCost_b256() : + * Approximate symbol cost, + * provide fractional value, using fixed-point format (8 bit) */ +MEM_STATIC U32 FSE_bitCost_b256(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue) +{ + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(symbolTT[symbolValue].deltaNbBits + (1<> tableLog; /* linear interpolation (very approximate) */ + assert(normalizedDeltaFromThreshold <= 256); + return (minNbBits+1)*256 - normalizedDeltaFromThreshold; +} + /* ====== Decompression ====== */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index d800254c..80edb1a7 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -126,11 +126,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return (LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) << 8; + U32 const price = LL_bits[llCode]*256 + FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, llCode); + DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / 256); + return price; } if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); - /* literal Length */ + /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) << 8; } @@ -156,7 +158,9 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return ((int)(LL_bits[llCode] + FSE_getMaxNbBits(cstate.symbolTT, llCode)) - FSE_getMaxNbBits(cstate.symbolTT, 0)) * 256; + return (int)(LL_bits[llCode] * 256) + + FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, llCode) + - FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, 0); } if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1); @@ -205,9 +209,8 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, FSE_CState_t mlstate, offstate; FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); - return (FSE_getMaxNbBits(offstate.symbolTT, offCode) + offCode - + FSE_getMaxNbBits(mlstate.symbolTT, mlCode) + ML_bits[mlCode]) - * 256; + return FSE_bitCost_b256(offstate.symbolTT, offstate.stateLog, offCode) + offCode*256 + + FSE_bitCost_b256(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*256; } if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 73f5c46c..2c2d1380 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -965,7 +965,7 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { assert(dstSize > 0); - assert(dstSize <= 128 KB); + assert(dstSize <= 128*1024); /* decoder timing evaluation */ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ U32 const D256 = (U32)(dstSize >> 8); From c0da0f5e9e038d1b362561c73f821d34bb817f70 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 9 May 2018 10:48:09 -0700 Subject: [PATCH 06/19] switchable bit-approximation / fractional-bit accuracy modes also : makes it possible to select nb of fractional bits. --- lib/common/fse.h | 18 +++++++++++------- lib/compress/zstd_opt.c | 42 +++++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 67707855..8e44c1a4 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -582,16 +582,20 @@ MEM_STATIC U32 FSE_getMaxNbBits(const FSE_symbolCompressionTransform* symbolTT, /* FSE_bitCost_b256() : * Approximate symbol cost, - * provide fractional value, using fixed-point format (8 bit) */ -MEM_STATIC U32 FSE_bitCost_b256(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue) + * provide fractional value, using fixed-point format (accuracyLog fractional bits) */ +MEM_STATIC U32 FSE_bitCost(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue, U32 accuracyLog) { U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; U32 const threshold = (minNbBits+1) << 16; - assert(symbolTT[symbolValue].deltaNbBits + (1<> tableLog; /* linear interpolation (very approximate) */ - assert(normalizedDeltaFromThreshold <= 256); - return (minNbBits+1)*256 - normalizedDeltaFromThreshold; + assert(tableLog < 16); + U32 const tableSize = 1 << tableLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 80edb1a7..67db85eb 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -91,6 +91,15 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, ZSTD_setLog2Prices(optPtr); } +#if 1 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define BITCOST_SYMBOL(t,l,s) ((void)l, FSE_getMaxNbBits(t,s)*BITCOST_MULTIPLIER) +#else /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define BITCOST_SYMBOL(t,l,s) FSE_bitCost(t,l,s,BITCOST_ACCURACY) +#endif /* ZSTD_rawLiteralsCost() : * cost of literals (only) in specified segment (which length can be 0). @@ -98,23 +107,23 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, const optState_t* const optPtr) { + if (litLength == 0) return 0; + if (optPtr->priceType == zop_predef) return (litLength*6); /* 6 bit per literal - no statistic used */ if (optPtr->priceType == zop_static) { U32 u, cost; assert(optPtr->symbolCosts != NULL); assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid); for (u=0, cost=0; u < litLength; u++) cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]); - return cost << 8; + return cost * BITCOST_MULTIPLIER; } - if (optPtr->priceType == zop_predef) return (litLength*6); /* 6 bit per literal - no statistic used */ - if (litLength == 0) return 0; - /* literals */ + /* dynamic statistics */ { U32 u; U32 cost = litLength * optPtr->log2litSum; for (u=0; u < litLength; u++) cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); - return cost << 8; + return cost * BITCOST_MULTIPLIER; } } @@ -126,15 +135,15 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - U32 const price = LL_bits[llCode]*256 + FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, llCode); - DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / 256); + U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode); + DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER); return price; } if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) << 8; + return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) * BITCOST_MULTIPLIER; } } @@ -158,18 +167,18 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - return (int)(LL_bits[llCode] * 256) - + FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, llCode) - - FSE_bitCost_b256(cstate.symbolTT, cstate.stateLog, 0); + return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) + + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode) + - BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0); } if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1); - /* literal Length */ + /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); int const contribution = (LL_bits[llCode] + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) - * 256; + * BITCOST_MULTIPLIER; #if 1 return contribution; #else @@ -209,13 +218,14 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, FSE_CState_t mlstate, offstate; FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable); - return FSE_bitCost_b256(offstate.symbolTT, offstate.stateLog, offCode) + offCode*256 - + FSE_bitCost_b256(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*256; + return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER + + BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER; } if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ return ZSTD_highbit32(mlBase+1) + 16 + offCode; + /* dynamic statistics */ price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ @@ -225,7 +235,7 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, } DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); - return price << 8; + return price * BITCOST_MULTIPLIER; } static void ZSTD_updateStats(optState_t* const optPtr, From 4d5bd32a001a7093fe7091e8605736369ad94aa9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 9 May 2018 12:00:12 -0700 Subject: [PATCH 07/19] added traces to look at symbol costs evaluation looks correct. --- lib/common/bitstream.h | 24 ++++++++++++++++++++++++ lib/common/zstd_internal.h | 2 ++ lib/compress/fse_compress.c | 12 ++++++++++++ lib/compress/zstd_compress.c | 8 ++++---- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index f7f389fe..04440fde 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -63,6 +63,30 @@ extern "C" { # endif #endif +#if defined(BIT_DEBUG) && (BIT_DEBUG>=2) +# include +extern int g_debuglog_enable; +/* recommended values for BIT_DEBUG display levels : + * 1 : no display, enables assert() only + * 2 : reserved for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (*very* verbose) */ +# define RAWLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=BIT_DEBUG)) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=BIT_DEBUG)) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + /*========================================= * Target specific diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 65c08a82..981ce7f5 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -52,6 +52,8 @@ extern "C" { #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } +#undef RAWLOG +#undef DEBUGLOG #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) # include extern int g_debuglog_enable; diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index cb8f1fa3..80044b9c 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -160,6 +160,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi total += normalizedCounter[s]; } } } } +#if 0 /* debug : symbol costs */ + DEBUGLOG(2, "\n --- table statistics : "); + { U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + DEBUGLOG(2, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", + symbol, normalizedCounter[symbol], + FSE_getMaxNbBits(symbolTT, symbol), + (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); + } + } +#endif + return 0; } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d3e52c62..d69b0177 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3450,7 +3450,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */ { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */ - { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */ + { 23, 23, 22, 7, 3,128, ZSTD_btultra }, /* level 19 */ { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */ { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */ { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */ @@ -3476,7 +3476,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ - { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ + { 18, 19, 18, 9, 3,256, ZSTD_btultra }, /* level 19.*/ { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/ { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/ { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/ @@ -3502,7 +3502,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btultra }, /* level 19.*/ { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/ { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/ { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/ @@ -3528,7 +3528,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/ { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/ { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/ { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/ From c39061cb7bef5074f0026e71afdbb89ea0f368c4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 9 May 2018 12:07:25 -0700 Subject: [PATCH 08/19] fixed declaration-after-statement warning --- lib/common/fse.h | 18 ++++++++++-------- lib/compress/zstd_opt.c | 8 ++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 8e44c1a4..86421274 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -582,20 +582,22 @@ MEM_STATIC U32 FSE_getMaxNbBits(const FSE_symbolCompressionTransform* symbolTT, /* FSE_bitCost_b256() : * Approximate symbol cost, - * provide fractional value, using fixed-point format (accuracyLog fractional bits) */ + * provide fractional value, using fixed-point format (accuracyLog fractional bits) + * note: assume symbolValue is valid */ MEM_STATIC U32 FSE_bitCost(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue, U32 accuracyLog) { U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; U32 const threshold = (minNbBits+1) << 16; assert(tableLog < 16); - U32 const tableSize = 1 << tableLog; - assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); - U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ - U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ - U32 const bitMultiplier = 1 << accuracyLog; - assert(normalizedDeltaFromThreshold <= bitMultiplier); - return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + { U32 const tableSize = 1 << tableLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + { U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } } } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 67db85eb..73ecda72 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -135,10 +135,10 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP U32 const llCode = ZSTD_LLcode(litLength); FSE_CState_t cstate; FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable); - U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode); - DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER); - return price; - } + { U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode); + DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER); + return price; + } } if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); /* dynamic statistics */ From ac6105463a69d48652e28e1919ea326d4151dd53 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 9 May 2018 15:46:11 -0700 Subject: [PATCH 09/19] opt: minor improvements to log traces slight improvement when using fractional-bit evaluation (opt:dictionay) --- lib/compress/fse_compress.c | 4 ++-- lib/compress/zstd_compress.c | 10 ++++----- lib/compress/zstd_opt.c | 39 +++++++++++++++++++++++++----------- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 80044b9c..8e170150 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -161,10 +161,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi } } } } #if 0 /* debug : symbol costs */ - DEBUGLOG(2, "\n --- table statistics : "); + DEBUGLOG(5, "\n --- table statistics : "); { U32 symbol; for (symbol=0; symbol<=maxSymbolValue; symbol++) { - DEBUGLOG(2, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", + DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", symbol, normalizedCounter[symbol], FSE_getMaxNbBits(symbolTT, symbol), (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d69b0177..d6e3e6b0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3450,7 +3450,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */ { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */ { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */ - { 23, 23, 22, 7, 3,128, ZSTD_btultra }, /* level 19 */ + { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */ { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */ { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */ { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */ @@ -3476,7 +3476,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ - { 18, 19, 18, 9, 3,256, ZSTD_btultra }, /* level 19.*/ + { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/ { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/ { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/ @@ -3502,7 +3502,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ - { 17, 18, 17, 7, 3,256, ZSTD_btultra }, /* level 19.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/ { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/ { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/ @@ -3528,10 +3528,10 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ - { 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/ { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/ - { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/ + { 14, 15, 15, 10, 3,512, ZSTD_btultra }, /* level 22.*/ }, }; diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 73ecda72..486c8693 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -101,6 +101,12 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, # define BITCOST_SYMBOL(t,l,s) FSE_bitCost(t,l,s,BITCOST_ACCURACY) #endif +MEM_STATIC double +ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} + /* ZSTD_rawLiteralsCost() : * cost of literals (only) in specified segment (which length can be 0). * does not include cost of literalLength symbol */ @@ -433,7 +439,7 @@ void ZSTD_updateTree_internal( const BYTE* const base = ms->window.base; U32 const target = (U32)(ip - base); U32 idx = ms->nextToUpdate; - DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", + DEBUGLOG(8, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", idx, target, extDict); while(idx < target) @@ -481,7 +487,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 nbCompares = 1U << cParams->searchLog; size_t bestLength = lengthToBeat-1; - DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches"); + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches"); /* check repCode */ { U32 const lastR = ZSTD_REP_NUM + ll0; @@ -612,7 +618,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( ZSTD_match_t* matches, U32 const lengthToBeat) { U32 const matchLengthSearch = cParams->searchLength; - DEBUGLOG(7, "ZSTD_BtGetAllMatches"); + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict); switch(matchLengthSearch) @@ -786,8 +792,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, for ( ; pos <= end ; pos++ ) { U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); U32 const sequencePrice = literalsPrice + matchPrice; - DEBUGLOG(7, "rPos:%u => set initial price : %u", - pos, sequencePrice); + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); opt[pos].mlen = pos; opt[pos].off = offset; opt[pos].litlen = litlen; @@ -814,14 +820,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } assert(price < 1000000000); /* overflow check */ if (price <= opt[cur].price) { - DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal", - cur, price, opt[cur].price); + DEBUGLOG(7, "rPos:%u : better price (%.2f<=%.2f) using literal", + cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); opt[cur].mlen = 1; opt[cur].off = 0; opt[cur].litlen = litlen; opt[cur].price = price; memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); - } } + } else { + DEBUGLOG(7, "rPos:%u : literal would cost more (%.2f>%.2f)", + cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); + } + } /* last match must start at a minimum distance of 8 from oend */ if (inr > ilimit) continue; @@ -829,8 +839,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, if (cur == last_pos) break; if ( (optLevel==0) /*static_test*/ - && (opt[cur+1].price <= opt[cur].price) ) + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } { U32 const ll0 = (opt[cur].mlen != 1); U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; @@ -838,7 +850,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr); U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch); U32 matchNb; - if (!nbMatches) continue; + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } { U32 const maxML = matches[nbMatches-1].len; DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u", @@ -868,8 +883,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); if ((pos > last_pos) || (price < opt[pos].price)) { - DEBUGLOG(7, "rPos:%u => new better price (%u<%u)", - pos, price, opt[pos].price); + DEBUGLOG(7, "rPos:%u => new better price (%.2f<%.2f)", + pos, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ opt[pos].mlen = mlen; opt[pos].off = offset; From 74b1c75d64cb531526fd7c60f897f9bc0323986c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 May 2018 16:32:36 -0700 Subject: [PATCH 10/19] btopt : minor adjustment of update frequencies --- lib/compress/zstd_compress_internal.h | 2 +- lib/compress/zstd_opt.c | 37 ++++++++++++++++----------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index eeb7b230..25d3137a 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -76,7 +76,7 @@ typedef struct { U32 rep[ZSTD_REP_NUM]; } ZSTD_optimal_t; -typedef enum { zop_none=0, zop_predef, zop_static } ZSTD_OptPrice_e; +typedef enum { zop_dynamic=0, zop_predef, zop_static } ZSTD_OptPrice_e; typedef struct { /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 486c8693..7e7a6935 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -12,7 +12,7 @@ #include "zstd_opt.h" -#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */ +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) @@ -32,24 +32,31 @@ static void ZSTD_setLog2Prices(optState_t* optPtr) static void ZSTD_rescaleFreqs(optState_t* const optPtr, const BYTE* const src, size_t const srcSize) { - optPtr->priceType = zop_none; + optPtr->priceType = zop_dynamic; - if (optPtr->litLengthSum == 0) { /* first init */ + if (optPtr->litLengthSum == 0) { /* first block : init */ unsigned u; - if (srcSize <= 1024) optPtr->priceType = zop_predef; + if (srcSize <= 1024) /* heuristic */ + optPtr->priceType = zop_predef; + assert(optPtr->symbolCosts != NULL); if (optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ - optPtr->priceType = zop_static; + if (srcSize <= 8192) /* heuristic */ + optPtr->priceType = zop_static; + else { + assert(optPtr->priceType == zop_dynamic); + } + + } - assert(optPtr->litFreq!=NULL); - for (u=0; u<=MaxLit; u++) - optPtr->litFreq[u] = 0; - for (u=0; ulitFreq[src[u]]++; + assert(optPtr->litFreq != NULL); + { unsigned max = MaxLit; + FSE_count(optPtr->litFreq, &max, src, srcSize); /* use raw first block to init statistics */ + } optPtr->litSum = 0; for (u=0; u<=MaxLit; u++) { - optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV); + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1)); optPtr->litSum += optPtr->litFreq[u]; } @@ -63,7 +70,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->offCodeFreq[u] = 1; optPtr->offCodeSum = (MaxOff+1); - } else { + } else { /* new block : re-use previous statistics, scaled down */ unsigned u; optPtr->litSum = 0; @@ -73,17 +80,17 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, } optPtr->litLengthSum = 0; for (u=0; u<=MaxLL; u++) { - optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u] >> ZSTD_FREQ_DIV); optPtr->litLengthSum += optPtr->litLengthFreq[u]; } optPtr->matchLengthSum = 0; for (u=0; u<=MaxML; u++) { - optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV); optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; } optPtr->offCodeSum = 0; for (u=0; u<=MaxOff; u++) { - optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV); optPtr->offCodeSum += optPtr->offCodeFreq[u]; } } From 1a26ec6e8d6575e671e2304f1fd43eeb880e015d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 May 2018 17:59:12 -0700 Subject: [PATCH 11/19] opt: init statistics from dictionary instead of starting from fake "default" statistics. --- lib/common/entropy_common.c | 5 ++ lib/compress/fse_compress.c | 5 +- lib/compress/zstd_compress.c | 3 +- lib/compress/zstd_opt.c | 97 ++++++++++++++++++++++++++++-------- 4 files changed, 87 insertions(+), 23 deletions(-) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index b37a082f..344c3236 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -143,6 +143,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ if (remaining != 1) return ERROR(corruption_detected); if (bitCount > 32) return ERROR(corruption_detected); + /* zeroise the rest */ + { unsigned symbNb = charnum; + for (symbNb=charnum; symbNb <= *maxSVPtr; symbNb++) + normalizedCounter[symbNb] = 0; + } *maxSVPtr = charnum-1; ip += (bitCount+7)>>3; diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 8e170150..5df92db4 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -143,7 +143,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi for (s=0; s<=maxSymbolValue; s++) { switch (normalizedCounter[s]) { - case 0: break; + case 0: + /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */ + symbolTT[s].deltaNbBits = (tableLog+1) << 16; + break; case -1: case 1: diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d6e3e6b0..58daf5d0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2396,7 +2396,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE), + /* fill all offset symbols to avoid garbage at end of table */ + CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE), dictionary_corrupted); dictPtr += offcodeHeaderSize; } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 7e7a6935..9233d5d6 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -35,7 +35,6 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->priceType = zop_dynamic; if (optPtr->litLengthSum == 0) { /* first block : init */ - unsigned u; if (srcSize <= 1024) /* heuristic */ optPtr->priceType = zop_predef; @@ -47,29 +46,85 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, assert(optPtr->priceType == zop_dynamic); } + assert(optPtr->litFreq != NULL); + assert(optPtr->symbolCosts != NULL); + optPtr->litSum = 0; + { unsigned lit; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 12; /* scale to 4K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit); + assert(bitCost < scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + { unsigned lit = MaxLit; + FSE_count(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + for (lit=0; lit<=MaxLit; lit++) { + optPtr->litFreq[lit] = 1 + (optPtr->litFreq[lit] >> (ZSTD_FREQ_DIV+1)); + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + optPtr->litLengthSum = MaxLL+1; + } + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + optPtr->matchLengthSum = MaxML+1; + } + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + optPtr->offCodeSum = MaxOff+1; + } } - assert(optPtr->litFreq != NULL); - { unsigned max = MaxLit; - FSE_count(optPtr->litFreq, &max, src, srcSize); /* use raw first block to init statistics */ - } - optPtr->litSum = 0; - for (u=0; u<=MaxLit; u++) { - optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1)); - optPtr->litSum += optPtr->litFreq[u]; - } - - for (u=0; u<=MaxLL; u++) - optPtr->litLengthFreq[u] = 1; - optPtr->litLengthSum = MaxLL+1; - for (u=0; u<=MaxML; u++) - optPtr->matchLengthFreq[u] = 1; - optPtr->matchLengthSum = MaxML+1; - for (u=0; u<=MaxOff; u++) - optPtr->offCodeFreq[u] = 1; - optPtr->offCodeSum = (MaxOff+1); - } else { /* new block : re-use previous statistics, scaled down */ unsigned u; From 09d0fa29eefb40d665faa420847deeb24ae99e6a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 May 2018 18:13:48 -0700 Subject: [PATCH 12/19] minor adjusting of weights --- lib/compress/zstd_opt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 9233d5d6..29c4c913 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -51,7 +51,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, optPtr->litSum = 0; { unsigned lit; for (lit=0; lit<=MaxLit; lit++) { - U32 const scaleLog = 12; /* scale to 4K */ + U32 const scaleLog = 11; /* scale to 2K */ U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit); assert(bitCost < scaleLog); optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; @@ -63,7 +63,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable); optPtr->litLengthSum = 0; for (ll=0; ll<=MaxLL; ll++) { - U32 const scaleLog = 11; /* scale to 2K */ + U32 const scaleLog = 10; /* scale to 1K */ U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); assert(bitCost < scaleLog); optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; @@ -75,7 +75,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable); optPtr->matchLengthSum = 0; for (ml=0; ml<=MaxML; ml++) { - U32 const scaleLog = 11; /* scale to 2K */ + U32 const scaleLog = 10; U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); assert(bitCost < scaleLog); optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; @@ -87,7 +87,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable); optPtr->offCodeSum = 0; for (of=0; of<=MaxOff; of++) { - U32 const scaleLog = 11; /* scale to 2K */ + U32 const scaleLog = 10; U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); assert(bitCost < scaleLog); optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; From 0d7626672d19744330c4a2e8fcb022b47a40b618 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 May 2018 18:17:21 -0700 Subject: [PATCH 13/19] fixed c++ conversion warning --- lib/common/fse.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 86421274..5a234444 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -575,8 +575,9 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt BIT_flushBits(bitC); } -MEM_STATIC U32 FSE_getMaxNbBits(const FSE_symbolCompressionTransform* symbolTT, U32 symbolValue) +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) { + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; } From 99ddca43a6252369c3ad062e906c5d4f50b74f3c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 May 2018 19:48:09 -0700 Subject: [PATCH 14/19] fixed wrong assertion base can actually overflow --- lib/compress/zstd_compress.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 58daf5d0..3a45d58d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2003,7 +2003,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; const U32 current = (U32)(istart-base); - assert(istart >= base); if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ if (current > ms->nextToUpdate + 384) ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); From 761758982e8546f1a44230d2a47b3974c6162311 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 11 May 2018 15:54:06 -0700 Subject: [PATCH 15/19] replaced FSE_count by FSE_count_simple to reduce usage of stack memory. Also : tweaked a few comments, as suggested by @terrelln --- lib/compress/zstd_compress_internal.h | 4 ++-- lib/compress/zstd_opt.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 25d3137a..0f1830a5 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -97,8 +97,8 @@ typedef struct { U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ /* end : updated by ZSTD_setLog2Prices */ - ZSTD_OptPrice_e priceType; /* prices follow a pre-defined cost structure, statistics are irrelevant */ - const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated symbol costs, from dictionary */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow dictionary statistics, or a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ } optState_t; typedef struct { diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 29c4c913..76144f73 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -97,9 +97,9 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, } else { /* not a dictionary */ assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; { unsigned lit = MaxLit; - FSE_count(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + FSE_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = 0; for (lit=0; lit<=MaxLit; lit++) { optPtr->litFreq[lit] = 1 + (optPtr->litFreq[lit] >> (ZSTD_FREQ_DIV+1)); optPtr->litSum += optPtr->litFreq[lit]; @@ -244,7 +244,7 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); int const contribution = (LL_bits[llCode] - + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) + + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) /* note: log2litLengthSum cancels out with following one */ - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) * BITCOST_MULTIPLIER; #if 1 From d59cf02df0938c3147f4290f48cf3e533129406e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 14 May 2018 15:32:28 -0700 Subject: [PATCH 16/19] decompress: changed error code when input is too large ZSTD_decompress() can decompress multiple frames sent as a single input. But the input size must be the exact sum of all compressed frames, no more. In the case of a mistake on srcSize, being larger than required, ZSTD_decompress() will try to decompress a new frame after current one, and fail. As a consequence, it will issue an error code, ERROR(prefix_unknown). While the error is technically correct (the decoder could not recognise the header of _next_ frame), it's confusing, as users will believe that the first header of the first frame is wrong, which is not the case (it's correct). It makes it more difficult to understand that the error is in the source size, which is too large. This patch changes the error code provided in such a scenario. If (at least) a first frame was successfully decoded, and then following bytes are garbage values, the decoder assumes the provided input size is wrong (too large), and issue the error code ERROR(srcSize_wrong). --- lib/decompress/zstd_decompress.c | 27 +++++++++++++++++++-------- tests/fuzzer.c | 6 ++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 103dcc64..32a21308 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1882,6 +1882,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { void* const dststart = dst; + int moreThan1Frame = 0; assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ if (ddict) { @@ -1890,7 +1891,6 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } while (srcSize >= ZSTD_frameHeaderSize_prefix) { - U32 magicNumber; #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) { @@ -1912,10 +1912,9 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } #endif - magicNumber = MEM_readLE32(src); - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", - (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); - if (magicNumber != ZSTD_MAGICNUMBER) { + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) @@ -1927,9 +1926,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; continue; - } - return ERROR(prefix_unknown); - } + } } if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ @@ -1943,11 +1940,25 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); + if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1) ) { + /* at least one frame successfully completed, + * but following bytes are garbage : + * it's more likely to be a srcSize error, + * specifying more bytes than compressed size of frame(s). + * This error message replaces ERROR(prefix_unknown), + * which would be confusing, as the first header is actually correct. + * Note that one could be unlucky, it might be a corruption error instead, + * happening right at the place where we expect zstd magic bytes. + * But this is _much_ less likely than a srcSize field error. */ + return ERROR(srcSize_wrong); + } if (ZSTD_isError(res)) return res; /* no need to bound check, ZSTD_decompressFrame already has */ dst = (BYTE*)dst + res; dstCapacity -= res; } + moreThan1Frame = 1; } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index f9ea209f..b0c425e1 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -375,6 +375,12 @@ static int basicUnitTests(U32 seed, double compressibility) if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : decompress too large input : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, compressedBufferSize); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++); { ZSTD_CCtx* cctx = ZSTD_createCCtx(); size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19); From 30d9c84b1ab9a2b100cfe667558e975d70070736 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 15 May 2018 09:46:20 -0700 Subject: [PATCH 17/19] Fix failing Travis tests --- lib/compress/zstd_opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 76144f73..3a48187c 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -53,7 +53,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit); - assert(bitCost < scaleLog); + assert(bitCost <= scaleLog); optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; optPtr->litSum += optPtr->litFreq[lit]; } } From 16bb8f1f9e8ec952458be9681a125d29627cdde9 Mon Sep 17 00:00:00 2001 From: fbrosson Date: Fri, 18 May 2018 17:05:36 +0000 Subject: [PATCH 18/19] Drop colon in asm snippet to make old versions of gcc happy. --- lib/common/cpu.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/common/cpu.h b/lib/common/cpu.h index 4eb48e39..88e0ebf4 100644 --- a/lib/common/cpu.h +++ b/lib/common/cpu.h @@ -72,8 +72,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { "cpuid\n\t" "popl %%ebx\n\t" : "=a"(f1a), "=c"(f1c), "=d"(f1d) - : "a"(1) - :); + : "a"(1)); } if (n >= 7) { __asm__( From 291824f49dbe50a4812c204994ae288c4c43979a Mon Sep 17 00:00:00 2001 From: fbrosson Date: Fri, 18 May 2018 18:40:11 +0000 Subject: [PATCH 19/19] __builtin_prefetch did probably not exist before gcc 3.1. --- lib/common/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common/compiler.h b/lib/common/compiler.h index b588e110..366ed2b4 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -92,7 +92,7 @@ #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) -#elif defined(__GNUC__) +#elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) #else # define PREFETCH(ptr) /* disabled */