Merge branch 'dev' into tableLevels

This commit is contained in:
Yann Collet 2018-05-18 17:17:45 -07:00
commit 38c2c46823
15 changed files with 362 additions and 119 deletions

View File

@ -63,6 +63,30 @@ extern "C" {
# endif
#endif
#if defined(BIT_DEBUG) && (BIT_DEBUG>=2)
# include <stdio.h>
extern int g_debuglog_enable;
/* recommended values for BIT_DEBUG display levels :
* 1 : no display, enables assert() only
* 2 : reserved for currently active debug path
* 3 : events once per object lifetime (CCtx, CDict, etc.)
* 4 : events once per frame
* 5 : events once per block
* 6 : events once per sequence (*very* verbose) */
# define RAWLOG(l, ...) { \
if ((g_debuglog_enable) & (l<=BIT_DEBUG)) { \
fprintf(stderr, __VA_ARGS__); \
} }
# define DEBUGLOG(l, ...) { \
if ((g_debuglog_enable) & (l<=BIT_DEBUG)) { \
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
fprintf(stderr, " \n"); \
} }
#else
# define RAWLOG(l, ...) {} /* disabled */
# define DEBUGLOG(l, ...) {} /* disabled */
#endif
/*=========================================
* Target specific

View File

@ -92,7 +92,7 @@
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
#elif defined(__GNUC__)
#elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
#else
# define PREFETCH(ptr) /* disabled */

View File

@ -72,8 +72,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
: "a"(1)
:);
: "a"(1));
}
if (n >= 7) {
__asm__(

View File

@ -143,6 +143,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
if (remaining != 1) return ERROR(corruption_detected);
if (bitCount > 32) return ERROR(corruption_detected);
/* zeroise the rest */
{ unsigned symbNb = charnum;
for (symbNb=charnum; symbNb <= *maxSVPtr; symbNb++)
normalizedCounter[symbNb] = 0;
}
*maxSVPtr = charnum-1;
ip += (bitCount+7)>>3;

View File

@ -575,6 +575,32 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
BIT_flushBits(bitC);
}
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
{
const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
}
/* FSE_bitCost_b256() :
* Approximate symbol cost,
* provide fractional value, using fixed-point format (accuracyLog fractional bits)
* note: assume symbolValue is valid */
MEM_STATIC U32 FSE_bitCost(const FSE_symbolCompressionTransform* symbolTT, U32 tableLog, U32 symbolValue, U32 accuracyLog)
{
U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
U32 const threshold = (minNbBits+1) << 16;
assert(tableLog < 16);
assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
{ U32 const tableSize = 1 << tableLog;
assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
{ U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
U32 const bitMultiplier = 1 << accuracyLog;
assert(normalizedDeltaFromThreshold <= bitMultiplier);
return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
} }
}
/* ====== Decompression ====== */

View File

@ -208,7 +208,7 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
typedef enum {
HUF_repeat_none, /**< Cannot use the previous table */
HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
@ -227,7 +227,9 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
*/
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
const U32* count, U32 maxSymbolValue, U32 maxNbBits,
void* workSpace, size_t wkspSize);
/*! HUF_readStats() :
* Read compact Huffman tree, saved by HUF_writeCTable().
@ -242,6 +244,11 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/** HUF_getNbBits() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
/*
* HUF_decompress() does the following:

View File

@ -52,6 +52,8 @@ extern "C" {
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
#undef RAWLOG
#undef DEBUGLOG
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
# include <stdio.h>
extern int g_debuglog_enable;

View File

@ -143,7 +143,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
for (s=0; s<=maxSymbolValue; s++) {
switch (normalizedCounter[s])
{
case 0: break;
case 0:
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
symbolTT[s].deltaNbBits = (tableLog+1) << 16;
break;
case -1:
case 1:
@ -160,6 +163,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
total += normalizedCounter[s];
} } } }
#if 0 /* debug : symbol costs */
DEBUGLOG(5, "\n --- table statistics : ");
{ U32 symbol;
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
symbol, normalizedCounter[symbol],
FSE_getMaxNbBits(symbolTT, symbol),
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
}
}
#endif
return 0;
}

View File

@ -216,6 +216,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src
return readSize;
}
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
{
const HUF_CElt* table = (const HUF_CElt*)symbolTable;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
return table[symbolValue].nbBits;
}
typedef struct nodeElt_s {
U32 count;

View File

@ -995,7 +995,11 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
static void*
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
void* ptr,
const ZSTD_compressionParameters* cParams,
ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
{
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
@ -1285,7 +1289,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
/* copy dictionary offsets */
{
ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState;
const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
@ -1985,18 +1989,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
const void* src, size_t srcSize)
{
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)",
dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
return 0; /* don't even attempt compression below a certain srcSize */
}
ZSTD_resetSeqStore(&(zc->seqStore));
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
/* limited update after a very long match */
{ const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const U32 current = (U32)(istart-base);
if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
if (current > ms->nextToUpdate + 384)
ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
}
@ -2369,6 +2376,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
size_t dictID;
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
assert(dictSize > 8);
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
dictPtr += 4; /* skip magic number */
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
@ -2386,7 +2395,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
/* fill all offset symbols to avoid garbage at end of table */
CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
@ -2447,8 +2457,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
/** ZSTD_compress_insertDictionary() :
* @return : dictID, or an error code */
static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms,
ZSTD_CCtx_params const* params,
static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
ZSTD_matchState_t* ms,
const ZSTD_CCtx_params* params,
const void* dict, size_t dictSize,
ZSTD_dictContentType_e dictContentType,
ZSTD_dictTableLoadMethod_e dtlm,
@ -2726,7 +2738,7 @@ static size_t ZSTD_initCDict_internal(
ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams)
{
DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType);
DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (U32)dictContentType);
assert(!ZSTD_checkCParams(cParams));
cdict->cParams = cParams;
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
@ -3519,7 +3531,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/
{ 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
{ 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
{ 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/
{ 14, 15, 15, 10, 3,512, ZSTD_btultra }, /* level 22.*/
},
};

View File

@ -76,6 +76,8 @@ typedef struct {
U32 rep[ZSTD_REP_NUM];
} ZSTD_optimal_t;
typedef enum { zop_dynamic=0, zop_predef, zop_static } ZSTD_OptPrice_e;
typedef struct {
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
U32* litFreq; /* table of literals statistics, of size 256 */
@ -95,7 +97,8 @@ typedef struct {
U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
/* end : updated by ZSTD_setLog2Prices */
U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow dictionary statistics, or a pre-defined cost structure */
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
} optState_t;
typedef struct {
@ -161,7 +164,7 @@ typedef struct {
rawSeq* seq; /* The start of the sequences */
size_t pos; /* The position where reading stopped. <= size. */
size_t size; /* The number of sequences. <= capacity. */
size_t capacity; /* The capacity of the `seq` pointer */
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
struct ZSTD_CCtx_params_s {

View File

@ -12,7 +12,7 @@
#include "zstd_opt.h"
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
#define ZSTD_MAX_PRICE (1<<30)
@ -32,34 +32,100 @@ static void ZSTD_setLog2Prices(optState_t* optPtr)
static void ZSTD_rescaleFreqs(optState_t* const optPtr,
const BYTE* const src, size_t const srcSize)
{
optPtr->staticPrices = 0;
optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* first init */
unsigned u;
if (srcSize <= 1024) optPtr->staticPrices = 1;
if (optPtr->litLengthSum == 0) { /* first block : init */
if (srcSize <= 1024) /* heuristic */
optPtr->priceType = zop_predef;
assert(optPtr->litFreq!=NULL);
for (u=0; u<=MaxLit; u++)
optPtr->litFreq[u] = 0;
for (u=0; u<srcSize; u++)
optPtr->litFreq[src[u]]++;
optPtr->litSum = 0;
for (u=0; u<=MaxLit; u++) {
optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
optPtr->litSum += optPtr->litFreq[u];
assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
if (srcSize <= 8192) /* heuristic */
optPtr->priceType = zop_static;
else {
assert(optPtr->priceType == zop_dynamic);
}
for (u=0; u<=MaxLL; u++)
optPtr->litLengthFreq[u] = 1;
optPtr->litLengthSum = MaxLL+1;
for (u=0; u<=MaxML; u++)
optPtr->matchLengthFreq[u] = 1;
optPtr->matchLengthSum = MaxML+1;
for (u=0; u<=MaxOff; u++)
optPtr->offCodeFreq[u] = 1;
optPtr->offCodeSum = (MaxOff+1);
assert(optPtr->litFreq != NULL);
assert(optPtr->symbolCosts != NULL);
optPtr->litSum = 0;
{ unsigned lit;
for (lit=0; lit<=MaxLit; lit++) {
U32 const scaleLog = 11; /* scale to 2K */
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit);
assert(bitCost <= scaleLog);
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->litSum += optPtr->litFreq[lit];
} }
} else {
{ unsigned ll;
FSE_CState_t llstate;
FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable);
optPtr->litLengthSum = 0;
for (ll=0; ll<=MaxLL; ll++) {
U32 const scaleLog = 10; /* scale to 1K */
U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
assert(bitCost < scaleLog);
optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->litLengthSum += optPtr->litLengthFreq[ll];
} }
{ unsigned ml;
FSE_CState_t mlstate;
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable);
optPtr->matchLengthSum = 0;
for (ml=0; ml<=MaxML; ml++) {
U32 const scaleLog = 10;
U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
assert(bitCost < scaleLog);
optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
} }
{ unsigned of;
FSE_CState_t ofstate;
FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable);
optPtr->offCodeSum = 0;
for (of=0; of<=MaxOff; of++) {
U32 const scaleLog = 10;
U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
assert(bitCost < scaleLog);
optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->offCodeSum += optPtr->offCodeFreq[of];
} }
} else { /* not a dictionary */
assert(optPtr->litFreq != NULL);
{ unsigned lit = MaxLit;
FSE_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
optPtr->litSum = 0;
for (lit=0; lit<=MaxLit; lit++) {
optPtr->litFreq[lit] = 1 + (optPtr->litFreq[lit] >> (ZSTD_FREQ_DIV+1));
optPtr->litSum += optPtr->litFreq[lit];
} }
{ unsigned ll;
for (ll=0; ll<=MaxLL; ll++)
optPtr->litLengthFreq[ll] = 1;
optPtr->litLengthSum = MaxLL+1;
}
{ unsigned ml;
for (ml=0; ml<=MaxML; ml++)
optPtr->matchLengthFreq[ml] = 1;
optPtr->matchLengthSum = MaxML+1;
}
{ unsigned of;
for (of=0; of<=MaxOff; of++)
optPtr->offCodeFreq[of] = 1;
optPtr->offCodeSum = MaxOff+1;
}
}
} else { /* new block : re-use previous statistics, scaled down */
unsigned u;
optPtr->litSum = 0;
@ -69,7 +135,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
}
optPtr->litLengthSum = 0;
for (u=0; u<=MaxLL; u++) {
optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u] >> ZSTD_FREQ_DIV);
optPtr->litLengthSum += optPtr->litLengthFreq[u];
}
optPtr->matchLengthSum = 0;
@ -87,22 +153,45 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
ZSTD_setLog2Prices(optPtr);
}
#if 1 /* approximation at bit level */
# define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define BITCOST_SYMBOL(t,l,s) ((void)l, FSE_getMaxNbBits(t,s)*BITCOST_MULTIPLIER)
#else /* fractional bit accuracy */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define BITCOST_SYMBOL(t,l,s) FSE_bitCost(t,l,s,BITCOST_ACCURACY)
#endif
MEM_STATIC double
ZSTD_fCost(U32 price)
{
return (double)price / (BITCOST_MULTIPLIER*8);
}
/* ZSTD_rawLiteralsCost() :
* cost of literals (only) in given segment (which length can be null)
* cost of literals (only) in specified segment (which length can be 0).
* does not include cost of literalLength symbol */
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
const optState_t* const optPtr)
{
if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
if (litLength == 0) return 0;
if (optPtr->priceType == zop_predef) return (litLength*6); /* 6 bit per literal - no statistic used */
if (optPtr->priceType == zop_static) {
U32 u, cost;
assert(optPtr->symbolCosts != NULL);
assert(optPtr->symbolCosts->hufCTable_repeatMode == HUF_repeat_valid);
for (u=0, cost=0; u < litLength; u++)
cost += HUF_getNbBits(optPtr->symbolCosts->hufCTable, literals[u]);
return cost * BITCOST_MULTIPLIER;
}
/* literals */
/* dynamic statistics */
{ U32 u;
U32 cost = litLength * optPtr->log2litSum;
for (u=0; u < litLength; u++)
cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
return cost;
return cost * BITCOST_MULTIPLIER;
}
}
@ -110,12 +199,19 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
* cost of literalLength symbol */
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
{
if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
/* literal Length */
{ U32 const llCode = ZSTD_LLcode(litLength);
U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
if (optPtr->priceType == zop_static) {
U32 const llCode = ZSTD_LLcode(litLength);
FSE_CState_t cstate;
FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable);
{ U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode);
DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER);
return price;
} }
if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1);
/* dynamic statistics */
{ U32 const llCode = ZSTD_LLcode(litLength);
return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) * BITCOST_MULTIPLIER;
}
}
@ -135,13 +231,22 @@ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength
* to provide a cost which is directly comparable to a match ending at same position */
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
{
if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
if (optPtr->priceType == zop_static) {
U32 const llCode = ZSTD_LLcode(litLength);
FSE_CState_t cstate;
FSE_initCState(&cstate, optPtr->symbolCosts->litlengthCTable);
return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+ BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode)
- BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0);
}
if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1);
/* literal Length */
/* dynamic statistics */
{ U32 const llCode = ZSTD_LLcode(litLength);
int const contribution = LL_bits[llCode]
+ ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
- ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
int const contribution = (LL_bits[llCode]
+ ZSTD_highbit32(optPtr->litLengthFreq[0]+1) /* note: log2litLengthSum cancels out with following one */
- ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1))
* BITCOST_MULTIPLIER;
#if 1
return contribution;
#else
@ -166,8 +271,8 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
* Provides the cost of the match part (offset + matchLength) of a sequence
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
U32 const offset, U32 const matchLength,
FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offset, U32 const matchLength,
const optState_t* const optPtr,
int const optLevel)
{
@ -176,9 +281,19 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH);
if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
if (optPtr->priceType == zop_static) {
U32 const mlCode = ZSTD_MLcode(mlBase);
FSE_CState_t mlstate, offstate;
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable);
FSE_initCState(&offstate, optPtr->symbolCosts->offcodeCTable);
return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER
+ BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER;
}
if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
return ZSTD_highbit32(mlBase+1) + 16 + offCode;
/* dynamic statistics */
price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
@ -188,7 +303,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
}
DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
return price;
return price * BITCOST_MULTIPLIER;
}
static void ZSTD_updateStats(optState_t* const optPtr,
@ -386,7 +501,7 @@ void ZSTD_updateTree_internal(
const BYTE* const base = ms->window.base;
U32 const target = (U32)(ip - base);
U32 idx = ms->nextToUpdate;
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
DEBUGLOG(8, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
idx, target, extDict);
while(idx < target)
@ -434,7 +549,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 nbCompares = 1U << cParams->searchLog;
size_t bestLength = lengthToBeat-1;
DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches");
/* check repCode */
{ U32 const lastR = ZSTD_REP_NUM + ll0;
@ -565,7 +680,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_match_t* matches, U32 const lengthToBeat)
{
U32 const matchLengthSearch = cParams->searchLength;
DEBUGLOG(7, "ZSTD_BtGetAllMatches");
DEBUGLOG(8, "ZSTD_BtGetAllMatches");
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
switch(matchLengthSearch)
@ -662,10 +777,11 @@ static int ZSTD_literalsContribution_cached(
return contribution;
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
seqStore_t* seqStore,
U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams,
const ZSTD_compressionParameters* cParams,
const void* src, size_t srcSize,
const int optLevel, const int extDict)
{
@ -705,17 +821,18 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
/* initialize opt[0] */
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
opt[0].mlen = 1;
opt[0].mlen = 1; /* means is_a_literal */
opt[0].litlen = litlen;
/* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len;
DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
U32 const maxOffset = matches[nbMatches-1].off;
DEBUGLOG(7, "found %u matches of maxLength=%u and maxOffset=%u at cPos=%u => start new serie",
nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
if (maxML > sufficient_len) {
best_mlen = maxML;
best_off = matches[nbMatches-1].off;
best_off = maxOffset;
DEBUGLOG(7, "large match (%u>%u), immediate encoding",
best_mlen, sufficient_len);
cur = 0;
@ -727,22 +844,23 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
{ U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
U32 pos;
U32 matchNb;
for (pos = 0; pos < minMatch; pos++) {
opt[pos].mlen = 1;
opt[pos].price = ZSTD_MAX_PRICE;
for (pos = 1; pos < minMatch; pos++) {
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
}
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offset = matches[matchNb].off;
U32 const end = matches[matchNb].len;
repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
for ( ; pos <= end ; pos++ ) {
U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
DEBUGLOG(7, "rPos:%u => set initial price : %u",
pos, matchPrice);
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
pos, ZSTD_fCost(sequencePrice));
opt[pos].mlen = pos;
opt[pos].off = offset;
opt[pos].litlen = litlen;
opt[pos].price = matchPrice;
opt[pos].price = sequencePrice;
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
} }
last_pos = pos-1;
@ -764,23 +882,29 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
}
assert(price < 1000000000); /* overflow check */
if (price <= opt[cur].price) {
DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
cur, price, opt[cur].price);
DEBUGLOG(7, "rPos:%u : better price (%.2f<=%.2f) using literal",
cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
opt[cur].mlen = 1;
opt[cur].off = 0;
opt[cur].litlen = litlen;
opt[cur].price = price;
memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
} }
} else {
DEBUGLOG(7, "rPos:%u : literal would cost more (%.2f>%.2f)",
cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
}
}
/* last match must start at a minimum distance of 8 from oend */
if (inr > ilimit) continue;
if (cur == last_pos) break;
if ( (optLevel==0) /*static*/
&& (opt[cur+1].price <= opt[cur].price) )
if ( (optLevel==0) /*static_test*/
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
}
{ U32 const ll0 = (opt[cur].mlen != 1);
U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
@ -788,20 +912,22 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
U32 matchNb;
if (!nbMatches) continue;
if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur);
continue;
}
{ U32 const maxML = matches[nbMatches-1].len;
DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
cur, nbMatches, maxML);
if ( (maxML > sufficient_len)
| (cur + maxML >= ZSTD_OPT_NUM) ) {
|| (cur + maxML >= ZSTD_OPT_NUM) ) {
best_mlen = maxML;
best_off = matches[nbMatches-1].off;
last_pos = cur + 1;
goto _shortestPath;
}
}
} }
/* set prices using matches found at position == cur */
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
@ -814,21 +940,22 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
matchNb, matches[matchNb].off, lastML, litlen);
for (mlen = lastML; mlen >= startML; mlen--) {
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
U32 const pos = cur + mlen;
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
if ((pos > last_pos) || (price < opt[pos].price)) {
DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
pos, price, opt[pos].price);
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
DEBUGLOG(7, "rPos:%u => new better price (%.2f<%.2f)",
pos, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
opt[pos].mlen = mlen;
opt[pos].off = offset;
opt[pos].litlen = litlen;
opt[pos].price = price;
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
} else {
if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
}
} } }
} /* for (cur = 1; cur <= last_pos; cur++) */
@ -878,8 +1005,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
if (repCode >= 2) rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = currentOffset;
}
}
} }
ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);

View File

@ -965,7 +965,7 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
{
assert(dstSize > 0);
assert(dstSize <= 128 KB);
assert(dstSize <= 128*1024);
/* decoder timing evaluation */
{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
U32 const D256 = (U32)(dstSize >> 8);

View File

@ -1882,6 +1882,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
const ZSTD_DDict* ddict)
{
void* const dststart = dst;
int moreThan1Frame = 0;
assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
if (ddict) {
@ -1890,7 +1891,6 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
while (srcSize >= ZSTD_frameHeaderSize_prefix) {
U32 magicNumber;
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
@ -1912,10 +1912,9 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
#endif
magicNumber = MEM_readLE32(src);
{ U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
(U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
if (magicNumber != ZSTD_MAGICNUMBER) {
if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t skippableSize;
if (srcSize < ZSTD_skippableHeaderSize)
@ -1927,9 +1926,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
continue;
}
return ERROR(prefix_unknown);
}
} }
if (ddict) {
/* we were called from ZSTD_decompress_usingDDict */
@ -1943,11 +1940,25 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
&src, &srcSize);
if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
&& (moreThan1Frame==1) ) {
/* at least one frame successfully completed,
* but following bytes are garbage :
* it's more likely to be a srcSize error,
* specifying more bytes than compressed size of frame(s).
* This error message replaces ERROR(prefix_unknown),
* which would be confusing, as the first header is actually correct.
* Note that one could be unlucky, it might be a corruption error instead,
* happening right at the place where we expect zstd magic bytes.
* But this is _much_ less likely than a srcSize field error. */
return ERROR(srcSize_wrong);
}
if (ZSTD_isError(res)) return res;
/* no need to bound check, ZSTD_decompressFrame already has */
dst = (BYTE*)dst + res;
dstCapacity -= res;
}
moreThan1Frame = 1;
} /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */

View File

@ -375,6 +375,12 @@ static int basicUnitTests(U32 seed, double compressibility)
if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : decompress too large input : ", testNb++);
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, compressedBufferSize);
if (!ZSTD_isError(r)) goto _output_error;
if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++);
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19);