e103d7b4a6
Fixes: Enable RLE blocks for superblock mode Fix the limitation that the literals block must shrink. Instead, when we're within 200 bytes of the next header byte size, we will just use the next one up. That way we should (almost?) always have space for the table. Remove the limitation that the first sub-block MUST have compressed literals and be compressed. Now one sub-block MUST be compressed (otherwise we fall back to raw block which is okay, since that is streamable). If no block has compressed literals that is okay, we will fix up the next Huffman table. Handle the case where the last sub-block is uncompressed (maybe it is very small). Before it would skip superblock in this case, now we allow the last sub-block to be uncompressed. To do this we need to regenerate the correct repcodes. Respect disableLiteralsCompression in superblock mode Fix superblock mode to handle a block consisting of only compressed literals Fix a off by 1 error in superblock mode that disabled it whenever there were last literals Fix superblock mode with long literals/matches (> 0xFFFF) Allow superblock mode to repeat Huffman tables Respect ZSTD_minGain(). Tests: Simple check for the condition in #2096. When the simple_round_trip fuzzer enables superblock mode, it checks that the compressed size isn't expanded too much. Remaining limitations: O(targetCBlockSize^2) because we recompute statistics every sequence Unable to split literals of length > targetCBlockSize into multiple sequences Refuses to generate sub-blocks that don't shrink the compressed data, so we could end up with large sub-blocks. We should emit those sections as uncompressed blocks instead. ... Fixes #2096
1198 lines
54 KiB
C
1198 lines
54 KiB
C
/*
|
|
* Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under both the BSD-style license (found in the
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
* in the COPYING file in the root directory of this source tree).
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
*/
|
|
|
|
#include "zstd_compress_internal.h"
|
|
#include "hist.h"
|
|
#include "zstd_opt.h"
|
|
|
|
|
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
|
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
|
#define ZSTD_MAX_PRICE (1<<30)
|
|
|
|
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
|
|
|
|
|
/*-*************************************
|
|
* Price functions for optimal parser
|
|
***************************************/
|
|
|
|
#if 0 /* approximation at bit level */
|
|
# define BITCOST_ACCURACY 0
|
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
|
|
#elif 0 /* fractional bit accuracy */
|
|
# define BITCOST_ACCURACY 8
|
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
|
#else /* opt==approx, ultra==accurate */
|
|
# define BITCOST_ACCURACY 8
|
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
|
#endif
|
|
|
|
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
|
|
{
|
|
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
|
|
}
|
|
|
|
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
{
|
|
U32 const stat = rawStat + 1;
|
|
U32 const hb = ZSTD_highbit32(stat);
|
|
U32 const BWeight = hb * BITCOST_MULTIPLIER;
|
|
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
|
|
U32 const weight = BWeight + FWeight;
|
|
assert(hb + BITCOST_ACCURACY < 31);
|
|
return weight;
|
|
}
|
|
|
|
#if (DEBUGLEVEL>=2)
|
|
/* debugging function,
|
|
* @return price in bytes as fractional value
|
|
* for debug messages only */
|
|
MEM_STATIC double ZSTD_fCost(U32 price)
|
|
{
|
|
return (double)price / (BITCOST_MULTIPLIER*8);
|
|
}
|
|
#endif
|
|
|
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
|
{
|
|
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
|
|
}
|
|
|
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
{
|
|
if (ZSTD_compressedLiterals(optPtr))
|
|
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
|
|
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
|
|
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
|
|
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
|
|
}
|
|
|
|
|
|
/* ZSTD_downscaleStat() :
|
|
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
|
|
* return the resulting sum of elements */
|
|
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
|
|
{
|
|
U32 s, sum=0;
|
|
DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
|
|
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
|
|
for (s=0; s<lastEltIndex+1; s++) {
|
|
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
|
|
sum += table[s];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/* ZSTD_rescaleFreqs() :
|
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
|
* take hints from dictionary if there is one
|
|
* or init from zero, using src for literals stats, or flat 1 for match symbols
|
|
* otherwise downscale existing stats, to be used as seed for next block.
|
|
*/
|
|
static void
|
|
ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
const BYTE* const src, size_t const srcSize,
|
|
int const optLevel)
|
|
{
|
|
int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
|
|
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
|
optPtr->priceType = zop_dynamic;
|
|
|
|
if (optPtr->litLengthSum == 0) { /* first block : init */
|
|
if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
|
|
DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
|
|
optPtr->priceType = zop_predef;
|
|
}
|
|
|
|
assert(optPtr->symbolCosts != NULL);
|
|
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
|
|
/* huffman table presumed generated by dictionary */
|
|
optPtr->priceType = zop_dynamic;
|
|
|
|
if (compressedLiterals) {
|
|
unsigned lit;
|
|
assert(optPtr->litFreq != NULL);
|
|
optPtr->litSum = 0;
|
|
for (lit=0; lit<=MaxLit; lit++) {
|
|
U32 const scaleLog = 11; /* scale to 2K */
|
|
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
|
assert(bitCost <= scaleLog);
|
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
|
optPtr->litSum += optPtr->litFreq[lit];
|
|
} }
|
|
|
|
{ unsigned ll;
|
|
FSE_CState_t llstate;
|
|
FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
|
|
optPtr->litLengthSum = 0;
|
|
for (ll=0; ll<=MaxLL; ll++) {
|
|
U32 const scaleLog = 10; /* scale to 1K */
|
|
U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
|
|
assert(bitCost < scaleLog);
|
|
optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
|
optPtr->litLengthSum += optPtr->litLengthFreq[ll];
|
|
} }
|
|
|
|
{ unsigned ml;
|
|
FSE_CState_t mlstate;
|
|
FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
|
|
optPtr->matchLengthSum = 0;
|
|
for (ml=0; ml<=MaxML; ml++) {
|
|
U32 const scaleLog = 10;
|
|
U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
|
|
assert(bitCost < scaleLog);
|
|
optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
|
optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
|
|
} }
|
|
|
|
{ unsigned of;
|
|
FSE_CState_t ofstate;
|
|
FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
|
|
optPtr->offCodeSum = 0;
|
|
for (of=0; of<=MaxOff; of++) {
|
|
U32 const scaleLog = 10;
|
|
U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
|
|
assert(bitCost < scaleLog);
|
|
optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
|
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
|
} }
|
|
|
|
} else { /* not a dictionary */
|
|
|
|
assert(optPtr->litFreq != NULL);
|
|
if (compressedLiterals) {
|
|
unsigned lit = MaxLit;
|
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
|
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
|
}
|
|
|
|
{ unsigned ll;
|
|
for (ll=0; ll<=MaxLL; ll++)
|
|
optPtr->litLengthFreq[ll] = 1;
|
|
}
|
|
optPtr->litLengthSum = MaxLL+1;
|
|
|
|
{ unsigned ml;
|
|
for (ml=0; ml<=MaxML; ml++)
|
|
optPtr->matchLengthFreq[ml] = 1;
|
|
}
|
|
optPtr->matchLengthSum = MaxML+1;
|
|
|
|
{ unsigned of;
|
|
for (of=0; of<=MaxOff; of++)
|
|
optPtr->offCodeFreq[of] = 1;
|
|
}
|
|
optPtr->offCodeSum = MaxOff+1;
|
|
|
|
}
|
|
|
|
} else { /* new block : re-use previous statistics, scaled down */
|
|
|
|
if (compressedLiterals)
|
|
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
|
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
|
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
|
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
|
}
|
|
|
|
ZSTD_setBasePrices(optPtr, optLevel);
|
|
}
|
|
|
|
/* ZSTD_rawLiteralsCost() :
|
|
* price of literals (only) in specified segment (which length can be 0).
|
|
* does not include price of literalLength symbol */
|
|
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
const optState_t* const optPtr,
|
|
int optLevel)
|
|
{
|
|
if (litLength == 0) return 0;
|
|
|
|
if (!ZSTD_compressedLiterals(optPtr))
|
|
return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
|
|
|
|
if (optPtr->priceType == zop_predef)
|
|
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
|
|
|
/* dynamic statistics */
|
|
{ U32 price = litLength * optPtr->litSumBasePrice;
|
|
U32 u;
|
|
for (u=0; u < litLength; u++) {
|
|
assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
|
|
price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
|
|
}
|
|
return price;
|
|
}
|
|
}
|
|
|
|
/* ZSTD_litLengthPrice() :
|
|
* cost of literalLength symbol */
|
|
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
|
{
|
|
if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
|
|
|
|
/* dynamic statistics */
|
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
|
return (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
|
+ optPtr->litLengthSumBasePrice
|
|
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
|
}
|
|
}
|
|
|
|
/* ZSTD_getMatchPrice() :
|
|
* Provides the cost of the match part (offset + matchLength) of a sequence
|
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
|
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
|
|
FORCE_INLINE_TEMPLATE U32
|
|
ZSTD_getMatchPrice(U32 const offset,
|
|
U32 const matchLength,
|
|
const optState_t* const optPtr,
|
|
int const optLevel)
|
|
{
|
|
U32 price;
|
|
U32 const offCode = ZSTD_highbit32(offset+1);
|
|
U32 const mlBase = matchLength - MINMATCH;
|
|
assert(matchLength >= MINMATCH);
|
|
|
|
if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
|
|
return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
|
|
|
|
/* dynamic statistics */
|
|
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
|
|
if ((optLevel<2) /*static*/ && offCode >= 20)
|
|
price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
|
|
|
|
/* match Length */
|
|
{ U32 const mlCode = ZSTD_MLcode(mlBase);
|
|
price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
|
|
}
|
|
|
|
price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
|
|
|
|
DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
|
|
return price;
|
|
}
|
|
|
|
/* ZSTD_updateStats() :
|
|
* assumption : literals + litLengtn <= iend */
|
|
static void ZSTD_updateStats(optState_t* const optPtr,
|
|
U32 litLength, const BYTE* literals,
|
|
U32 offsetCode, U32 matchLength)
|
|
{
|
|
/* literals */
|
|
if (ZSTD_compressedLiterals(optPtr)) {
|
|
U32 u;
|
|
for (u=0; u < litLength; u++)
|
|
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
|
|
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
|
|
}
|
|
|
|
/* literal Length */
|
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
|
optPtr->litLengthFreq[llCode]++;
|
|
optPtr->litLengthSum++;
|
|
}
|
|
|
|
/* match offset code (0-2=>repCode; 3+=>offset+2) */
|
|
{ U32 const offCode = ZSTD_highbit32(offsetCode+1);
|
|
assert(offCode <= MaxOff);
|
|
optPtr->offCodeFreq[offCode]++;
|
|
optPtr->offCodeSum++;
|
|
}
|
|
|
|
/* match Length */
|
|
{ U32 const mlBase = matchLength - MINMATCH;
|
|
U32 const mlCode = ZSTD_MLcode(mlBase);
|
|
optPtr->matchLengthFreq[mlCode]++;
|
|
optPtr->matchLengthSum++;
|
|
}
|
|
}
|
|
|
|
|
|
/* ZSTD_readMINMATCH() :
|
|
* function safe only for comparisons
|
|
* assumption : memPtr must be at least 4 bytes before end of buffer */
|
|
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
{
|
|
switch (length)
|
|
{
|
|
default :
|
|
case 4 : return MEM_read32(memPtr);
|
|
case 3 : if (MEM_isLittleEndian())
|
|
return MEM_read32(memPtr)<<8;
|
|
else
|
|
return MEM_read32(memPtr)>>8;
|
|
}
|
|
}
|
|
|
|
|
|
/* Update hashTable3 up to ip (excluded)
|
|
Assumption : always within prefix (i.e. not within extDict) */
|
|
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
U32* nextToUpdate3,
|
|
const BYTE* const ip)
|
|
{
|
|
U32* const hashTable3 = ms->hashTable3;
|
|
U32 const hashLog3 = ms->hashLog3;
|
|
const BYTE* const base = ms->window.base;
|
|
U32 idx = *nextToUpdate3;
|
|
U32 const target = (U32)(ip - base);
|
|
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
|
|
assert(hashLog3 > 0);
|
|
|
|
while(idx < target) {
|
|
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
|
|
idx++;
|
|
}
|
|
|
|
*nextToUpdate3 = target;
|
|
return hashTable3[hash3];
|
|
}
|
|
|
|
|
|
/*-*************************************
|
|
* Binary Tree search
|
|
***************************************/
|
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
|
* ip : assumed <= iend-8 .
|
|
* @return : nb of positions added */
|
|
static U32 ZSTD_insertBt1(
|
|
ZSTD_matchState_t* ms,
|
|
const BYTE* const ip, const BYTE* const iend,
|
|
U32 const mls, const int extDict)
|
|
{
|
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
U32* const hashTable = ms->hashTable;
|
|
U32 const hashLog = cParams->hashLog;
|
|
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
|
|
U32* const bt = ms->chainTable;
|
|
U32 const btLog = cParams->chainLog - 1;
|
|
U32 const btMask = (1 << btLog) - 1;
|
|
U32 matchIndex = hashTable[h];
|
|
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
|
const BYTE* const base = ms->window.base;
|
|
const BYTE* const dictBase = ms->window.dictBase;
|
|
const U32 dictLimit = ms->window.dictLimit;
|
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
const BYTE* const prefixStart = base + dictLimit;
|
|
const BYTE* match;
|
|
const U32 current = (U32)(ip-base);
|
|
const U32 btLow = btMask >= current ? 0 : current - btMask;
|
|
U32* smallerPtr = bt + 2*(current&btMask);
|
|
U32* largerPtr = smallerPtr + 1;
|
|
U32 dummy32; /* to be nullified at the end */
|
|
U32 const windowLow = ms->window.lowLimit;
|
|
U32 matchEndIdx = current+8+1;
|
|
size_t bestLength = 8;
|
|
U32 nbCompares = 1U << cParams->searchLog;
|
|
#ifdef ZSTD_C_PREDICT
|
|
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
|
|
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
|
|
predictedSmall += (predictedSmall>0);
|
|
predictedLarge += (predictedLarge>0);
|
|
#endif /* ZSTD_C_PREDICT */
|
|
|
|
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
|
|
|
|
assert(ip <= iend-8); /* required for h calculation */
|
|
hashTable[h] = current; /* Update Hash Table */
|
|
|
|
assert(windowLow > 0);
|
|
while (nbCompares-- && (matchIndex >= windowLow)) {
|
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
assert(matchIndex < current);
|
|
|
|
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
|
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
|
if (matchIndex == predictedSmall) {
|
|
/* no need to check length, result known */
|
|
*smallerPtr = matchIndex;
|
|
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
|
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
|
|
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
|
|
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
|
|
continue;
|
|
}
|
|
if (matchIndex == predictedLarge) {
|
|
*largerPtr = matchIndex;
|
|
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
|
largerPtr = nextPtr;
|
|
matchIndex = nextPtr[0];
|
|
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
if (!extDict || (matchIndex+matchLength >= dictLimit)) {
|
|
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
|
|
match = base + matchIndex;
|
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
|
} else {
|
|
match = dictBase + matchIndex;
|
|
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
|
|
if (matchIndex+matchLength >= dictLimit)
|
|
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
|
}
|
|
|
|
if (matchLength > bestLength) {
|
|
bestLength = matchLength;
|
|
if (matchLength > matchEndIdx - matchIndex)
|
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
}
|
|
|
|
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
|
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
|
}
|
|
|
|
if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
|
|
/* match is smaller than current */
|
|
*smallerPtr = matchIndex; /* update smaller idx */
|
|
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
|
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
|
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
|
|
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
|
|
} else {
|
|
/* match is larger than current */
|
|
*largerPtr = matchIndex;
|
|
commonLengthLarger = matchLength;
|
|
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
|
largerPtr = nextPtr;
|
|
matchIndex = nextPtr[0];
|
|
} }
|
|
|
|
*smallerPtr = *largerPtr = 0;
|
|
{ U32 positions = 0;
|
|
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
|
assert(matchEndIdx > current + 8);
|
|
return MAX(positions, matchEndIdx - (current + 8));
|
|
}
|
|
}
|
|
|
|
FORCE_INLINE_TEMPLATE
|
|
void ZSTD_updateTree_internal(
|
|
ZSTD_matchState_t* ms,
|
|
const BYTE* const ip, const BYTE* const iend,
|
|
const U32 mls, const ZSTD_dictMode_e dictMode)
|
|
{
|
|
const BYTE* const base = ms->window.base;
|
|
U32 const target = (U32)(ip - base);
|
|
U32 idx = ms->nextToUpdate;
|
|
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
|
idx, target, dictMode);
|
|
|
|
while(idx < target) {
|
|
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
|
assert(idx < (U32)(idx + forward));
|
|
idx += forward;
|
|
}
|
|
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
|
|
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
|
|
ms->nextToUpdate = target;
|
|
}
|
|
|
|
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
|
|
}
|
|
|
|
FORCE_INLINE_TEMPLATE
|
|
U32 ZSTD_insertBtAndGetAllMatches (
|
|
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
|
ZSTD_matchState_t* ms,
|
|
U32* nextToUpdate3,
|
|
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
|
|
const U32 rep[ZSTD_REP_NUM],
|
|
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
|
const U32 lengthToBeat,
|
|
U32 const mls /* template */)
|
|
{
|
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
const BYTE* const base = ms->window.base;
|
|
U32 const current = (U32)(ip-base);
|
|
U32 const hashLog = cParams->hashLog;
|
|
U32 const minMatch = (mls==3) ? 3 : 4;
|
|
U32* const hashTable = ms->hashTable;
|
|
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
|
|
U32 matchIndex = hashTable[h];
|
|
U32* const bt = ms->chainTable;
|
|
U32 const btLog = cParams->chainLog - 1;
|
|
U32 const btMask= (1U << btLog) - 1;
|
|
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
|
const BYTE* const dictBase = ms->window.dictBase;
|
|
U32 const dictLimit = ms->window.dictLimit;
|
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
const BYTE* const prefixStart = base + dictLimit;
|
|
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
|
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
|
|
U32 const matchLow = windowLow ? windowLow : 1;
|
|
U32* smallerPtr = bt + 2*(current&btMask);
|
|
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
|
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
|
U32 dummy32; /* to be nullified at the end */
|
|
U32 mnum = 0;
|
|
U32 nbCompares = 1U << cParams->searchLog;
|
|
|
|
const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
|
|
const ZSTD_compressionParameters* const dmsCParams =
|
|
dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
|
|
const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
|
|
const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
|
|
U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
|
|
U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
|
|
U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
|
|
U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
|
|
U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
|
|
U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
|
|
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
|
|
|
|
size_t bestLength = lengthToBeat-1;
|
|
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
|
|
|
|
/* check repCode */
|
|
assert(ll0 <= 1); /* necessarily 1 or 0 */
|
|
{ U32 const lastR = ZSTD_REP_NUM + ll0;
|
|
U32 repCode;
|
|
for (repCode = ll0; repCode < lastR; repCode++) {
|
|
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
|
U32 const repIndex = current - repOffset;
|
|
U32 repLen = 0;
|
|
assert(current >= dictLimit);
|
|
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
|
|
if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
|
|
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
|
}
|
|
} else { /* repIndex < dictLimit || repIndex >= current */
|
|
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
|
|
dmsBase + repIndex - dmsIndexDelta :
|
|
dictBase + repIndex;
|
|
assert(current >= windowLow);
|
|
if ( dictMode == ZSTD_extDict
|
|
&& ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
|
|
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
|
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
|
|
}
|
|
if (dictMode == ZSTD_dictMatchState
|
|
&& ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
|
|
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
|
|
} }
|
|
/* save longer solution */
|
|
if (repLen > bestLength) {
|
|
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
|
repCode, ll0, repOffset, repLen);
|
|
bestLength = repLen;
|
|
matches[mnum].off = repCode - ll0;
|
|
matches[mnum].len = (U32)repLen;
|
|
mnum++;
|
|
if ( (repLen > sufficient_len)
|
|
| (ip+repLen == iLimit) ) { /* best possible */
|
|
return mnum;
|
|
} } } }
|
|
|
|
/* HC3 match finder */
|
|
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
|
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
|
if ((matchIndex3 >= matchLow)
|
|
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
|
size_t mlen;
|
|
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
|
|
const BYTE* const match = base + matchIndex3;
|
|
mlen = ZSTD_count(ip, match, iLimit);
|
|
} else {
|
|
const BYTE* const match = dictBase + matchIndex3;
|
|
mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
|
|
}
|
|
|
|
/* save best solution */
|
|
if (mlen >= mls /* == 3 > bestLength */) {
|
|
DEBUGLOG(8, "found small match with hlog3, of length %u",
|
|
(U32)mlen);
|
|
bestLength = mlen;
|
|
assert(current > matchIndex3);
|
|
assert(mnum==0); /* no prior solution */
|
|
matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
|
|
matches[0].len = (U32)mlen;
|
|
mnum = 1;
|
|
if ( (mlen > sufficient_len) |
|
|
(ip+mlen == iLimit) ) { /* best possible length */
|
|
ms->nextToUpdate = current+1; /* skip insertion */
|
|
return 1;
|
|
} } }
|
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
|
}
|
|
|
|
hashTable[h] = current; /* Update Hash Table */
|
|
|
|
while (nbCompares-- && (matchIndex >= matchLow)) {
|
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
|
const BYTE* match;
|
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
assert(current > matchIndex);
|
|
|
|
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
|
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
|
match = base + matchIndex;
|
|
if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
|
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
|
|
} else {
|
|
match = dictBase + matchIndex;
|
|
assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
|
|
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
|
|
if (matchIndex+matchLength >= dictLimit)
|
|
match = base + matchIndex; /* prepare for match[matchLength] read */
|
|
}
|
|
|
|
if (matchLength > bestLength) {
|
|
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
|
|
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
|
|
assert(matchEndIdx > matchIndex);
|
|
if (matchLength > matchEndIdx - matchIndex)
|
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
bestLength = matchLength;
|
|
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
|
|
matches[mnum].len = (U32)matchLength;
|
|
mnum++;
|
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
|
}
|
|
}
|
|
|
|
if (match[matchLength] < ip[matchLength]) {
|
|
/* match smaller than current */
|
|
*smallerPtr = matchIndex; /* update smaller idx */
|
|
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
|
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
|
smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
|
|
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
|
|
} else {
|
|
*largerPtr = matchIndex;
|
|
commonLengthLarger = matchLength;
|
|
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
|
largerPtr = nextPtr;
|
|
matchIndex = nextPtr[0];
|
|
} }
|
|
|
|
*smallerPtr = *largerPtr = 0;
|
|
|
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
|
const U32* const dmsBt = dms->chainTable;
|
|
commonLengthSmaller = commonLengthLarger = 0;
|
|
while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
|
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
const BYTE* match = dmsBase + dictMatchIndex;
|
|
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
|
|
if (dictMatchIndex+matchLength >= dmsHighLimit)
|
|
match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
|
|
|
|
if (matchLength > bestLength) {
|
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
|
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
|
|
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
|
|
if (matchLength > matchEndIdx - matchIndex)
|
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
bestLength = matchLength;
|
|
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
|
|
matches[mnum].len = (U32)matchLength;
|
|
mnum++;
|
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
|
}
|
|
}
|
|
|
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
|
if (match[matchLength] < ip[matchLength]) {
|
|
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
|
dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
|
|
} else {
|
|
/* match is larger than current */
|
|
commonLengthLarger = matchLength;
|
|
dictMatchIndex = nextPtr[0];
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(matchEndIdx > current+8);
|
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
|
return mnum;
|
|
}
|
|
|
|
|
|
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
|
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
|
|
ZSTD_matchState_t* ms,
|
|
U32* nextToUpdate3,
|
|
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
|
const U32 rep[ZSTD_REP_NUM],
|
|
U32 const ll0,
|
|
U32 const lengthToBeat)
|
|
{
|
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
U32 const matchLengthSearch = cParams->minMatch;
|
|
DEBUGLOG(8, "ZSTD_BtGetAllMatches");
|
|
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
|
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
|
|
switch(matchLengthSearch)
|
|
{
|
|
case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
|
|
default :
|
|
case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
|
|
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
|
|
case 7 :
|
|
case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
|
|
}
|
|
}
|
|
|
|
|
|
/*-*******************************
|
|
* Optimal parser
|
|
*********************************/
|
|
|
|
|
|
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
|
{
|
|
return sol.litlen + sol.mlen;
|
|
}
|
|
|
|
#if 0 /* debug */
|
|
|
|
static void
|
|
listStats(const U32* table, int lastEltID)
|
|
{
|
|
int const nbElts = lastEltID + 1;
|
|
int enb;
|
|
for (enb=0; enb < nbElts; enb++) {
|
|
(void)table;
|
|
//RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
|
|
RAWLOG(2, "%4i,", table[enb]);
|
|
}
|
|
RAWLOG(2, " \n");
|
|
}
|
|
|
|
#endif
|
|
|
|
FORCE_INLINE_TEMPLATE size_t
|
|
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
seqStore_t* seqStore,
|
|
U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize,
|
|
const int optLevel,
|
|
const ZSTD_dictMode_e dictMode)
|
|
{
|
|
optState_t* const optStatePtr = &ms->opt;
|
|
const BYTE* const istart = (const BYTE*)src;
|
|
const BYTE* ip = istart;
|
|
const BYTE* anchor = istart;
|
|
const BYTE* const iend = istart + srcSize;
|
|
const BYTE* const ilimit = iend - 8;
|
|
const BYTE* const base = ms->window.base;
|
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
|
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
|
|
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
|
ZSTD_optimal_t lastSequence;
|
|
|
|
/* init */
|
|
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
|
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
|
|
assert(optLevel <= 2);
|
|
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
|
|
ip += (ip==prefixStart);
|
|
|
|
/* Match Loop */
|
|
while (ip < ilimit) {
|
|
U32 cur, last_pos = 0;
|
|
|
|
/* find first match */
|
|
{ U32 const litlen = (U32)(ip - anchor);
|
|
U32 const ll0 = !litlen;
|
|
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
|
if (!nbMatches) { ip++; continue; }
|
|
|
|
/* initialize opt[0] */
|
|
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
|
|
opt[0].mlen = 0; /* means is_a_literal */
|
|
opt[0].litlen = litlen;
|
|
/* We don't need to include the actual price of the literals because
|
|
* it is static for the duration of the forward pass, and is included
|
|
* in every price. We include the literal length to avoid negative
|
|
* prices when we subtract the previous literal length.
|
|
*/
|
|
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
|
|
|
/* large match -> immediate encoding */
|
|
{ U32 const maxML = matches[nbMatches-1].len;
|
|
U32 const maxOffset = matches[nbMatches-1].off;
|
|
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
|
|
nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
|
|
|
|
if (maxML > sufficient_len) {
|
|
lastSequence.litlen = litlen;
|
|
lastSequence.mlen = maxML;
|
|
lastSequence.off = maxOffset;
|
|
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
|
|
maxML, sufficient_len);
|
|
cur = 0;
|
|
last_pos = ZSTD_totalLen(lastSequence);
|
|
goto _shortestPath;
|
|
} }
|
|
|
|
/* set prices for first matches starting position == 0 */
|
|
{ U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
|
U32 pos;
|
|
U32 matchNb;
|
|
for (pos = 1; pos < minMatch; pos++) {
|
|
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
|
|
}
|
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
|
U32 const offset = matches[matchNb].off;
|
|
U32 const end = matches[matchNb].len;
|
|
for ( ; pos <= end ; pos++ ) {
|
|
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
|
|
U32 const sequencePrice = literalsPrice + matchPrice;
|
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
|
pos, ZSTD_fCost(sequencePrice));
|
|
opt[pos].mlen = pos;
|
|
opt[pos].off = offset;
|
|
opt[pos].litlen = litlen;
|
|
opt[pos].price = sequencePrice;
|
|
} }
|
|
last_pos = pos-1;
|
|
}
|
|
}
|
|
|
|
/* check further positions */
|
|
for (cur = 1; cur <= last_pos; cur++) {
|
|
const BYTE* const inr = ip + cur;
|
|
assert(cur < ZSTD_OPT_NUM);
|
|
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
|
|
|
|
/* Fix current position with one literal if cheaper */
|
|
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
|
int const price = opt[cur-1].price
|
|
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
|
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
|
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
|
assert(price < 1000000000); /* overflow check */
|
|
if (price <= opt[cur].price) {
|
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
|
|
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
|
|
opt[cur].mlen = 0;
|
|
opt[cur].off = 0;
|
|
opt[cur].litlen = litlen;
|
|
opt[cur].price = price;
|
|
} else {
|
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
|
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
|
|
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
|
|
}
|
|
}
|
|
|
|
/* Set the repcodes of the current position. We must do it here
|
|
* because we rely on the repcodes of the 2nd to last sequence being
|
|
* correct to set the next chunks repcodes during the backward
|
|
* traversal.
|
|
*/
|
|
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
|
|
assert(cur >= opt[cur].mlen);
|
|
if (opt[cur].mlen != 0) {
|
|
U32 const prev = cur - opt[cur].mlen;
|
|
repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
|
memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
|
} else {
|
|
memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
|
}
|
|
|
|
/* last match must start at a minimum distance of 8 from oend */
|
|
if (inr > ilimit) continue;
|
|
|
|
if (cur == last_pos) break;
|
|
|
|
if ( (optLevel==0) /*static_test*/
|
|
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
|
|
DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
|
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
|
}
|
|
|
|
{ U32 const ll0 = (opt[cur].mlen != 0);
|
|
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
|
U32 const previousPrice = opt[cur].price;
|
|
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
|
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
|
U32 matchNb;
|
|
if (!nbMatches) {
|
|
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
|
continue;
|
|
}
|
|
|
|
{ U32 const maxML = matches[nbMatches-1].len;
|
|
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
|
|
inr-istart, cur, nbMatches, maxML);
|
|
|
|
if ( (maxML > sufficient_len)
|
|
|| (cur + maxML >= ZSTD_OPT_NUM) ) {
|
|
lastSequence.mlen = maxML;
|
|
lastSequence.off = matches[nbMatches-1].off;
|
|
lastSequence.litlen = litlen;
|
|
cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
|
|
last_pos = cur + ZSTD_totalLen(lastSequence);
|
|
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
|
|
goto _shortestPath;
|
|
} }
|
|
|
|
/* set prices using matches found at position == cur */
|
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
|
U32 const offset = matches[matchNb].off;
|
|
U32 const lastML = matches[matchNb].len;
|
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
|
U32 mlen;
|
|
|
|
DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
|
|
matchNb, matches[matchNb].off, lastML, litlen);
|
|
|
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
|
U32 const pos = cur + mlen;
|
|
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
|
|
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
|
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
|
|
opt[pos].mlen = mlen;
|
|
opt[pos].off = offset;
|
|
opt[pos].litlen = litlen;
|
|
opt[pos].price = price;
|
|
} else {
|
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
|
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
|
|
}
|
|
} } }
|
|
} /* for (cur = 1; cur <= last_pos; cur++) */
|
|
|
|
lastSequence = opt[last_pos];
|
|
cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
|
|
assert(cur < ZSTD_OPT_NUM); /* control overflow*/
|
|
|
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
assert(opt[0].mlen == 0);
|
|
|
|
/* Set the next chunk's repcodes based on the repcodes of the beginning
|
|
* of the last match, and the last sequence. This avoids us having to
|
|
* update them while traversing the sequences.
|
|
*/
|
|
if (lastSequence.mlen != 0) {
|
|
repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
|
memcpy(rep, &reps, sizeof(reps));
|
|
} else {
|
|
memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
|
}
|
|
|
|
{ U32 const storeEnd = cur + 1;
|
|
U32 storeStart = storeEnd;
|
|
U32 seqPos = cur;
|
|
|
|
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
|
|
last_pos, cur); (void)last_pos;
|
|
assert(storeEnd < ZSTD_OPT_NUM);
|
|
DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
|
storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
|
|
opt[storeEnd] = lastSequence;
|
|
while (seqPos > 0) {
|
|
U32 const backDist = ZSTD_totalLen(opt[seqPos]);
|
|
storeStart--;
|
|
DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
|
seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
|
|
opt[storeStart] = opt[seqPos];
|
|
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
|
|
}
|
|
|
|
/* save sequences */
|
|
DEBUGLOG(6, "sending selected sequences into seqStore")
|
|
{ U32 storePos;
|
|
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
|
U32 const llen = opt[storePos].litlen;
|
|
U32 const mlen = opt[storePos].mlen;
|
|
U32 const offCode = opt[storePos].off;
|
|
U32 const advance = llen + mlen;
|
|
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
|
anchor - istart, (unsigned)llen, (unsigned)mlen);
|
|
|
|
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
|
|
assert(storePos == storeEnd); /* must be last sequence */
|
|
ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
|
|
continue; /* will finish */
|
|
}
|
|
|
|
assert(anchor + llen <= iend);
|
|
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
|
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
|
|
anchor += advance;
|
|
ip = anchor;
|
|
} }
|
|
ZSTD_setBasePrices(optStatePtr, optLevel);
|
|
}
|
|
} /* while (ip < ilimit) */
|
|
|
|
/* Return the last literals size */
|
|
return (size_t)(iend - anchor);
|
|
}
|
|
|
|
|
|
size_t ZSTD_compressBlock_btopt(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
|
|
}
|
|
|
|
|
|
/* used in 2-pass strategy */
|
|
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
|
{
|
|
U32 s, sum=0;
|
|
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
|
for (s=0; s<lastEltIndex+1; s++) {
|
|
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
|
table[s]--;
|
|
sum += table[s];
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/* used in 2-pass strategy */
|
|
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
|
{
|
|
if (ZSTD_compressedLiterals(optPtr))
|
|
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
|
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
|
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
|
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
|
}
|
|
|
|
/* ZSTD_initStats_ultra():
|
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
|
* only works on first block, with no dictionary and no ldm.
|
|
* this function cannot error, hence its contract must be respected.
|
|
*/
|
|
static void
|
|
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
seqStore_t* seqStore,
|
|
U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
|
memcpy(tmpRep, rep, sizeof(tmpRep));
|
|
|
|
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
|
|
assert(ms->opt.litLengthSum == 0); /* first block */
|
|
assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
|
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
|
|
|
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
|
|
|
|
/* invalidate first scan from history */
|
|
ZSTD_resetSeqStore(seqStore);
|
|
ms->window.base -= srcSize;
|
|
ms->window.dictLimit += (U32)srcSize;
|
|
ms->window.lowLimit = ms->window.dictLimit;
|
|
ms->nextToUpdate = ms->window.dictLimit;
|
|
|
|
/* re-inforce weight of collected statistics */
|
|
ZSTD_upscaleStats(&ms->opt);
|
|
}
|
|
|
|
size_t ZSTD_compressBlock_btultra(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
|
}
|
|
|
|
size_t ZSTD_compressBlock_btultra2(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
U32 const current = (U32)((const BYTE*)src - ms->window.base);
|
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
|
|
|
/* 2-pass strategy:
|
|
* this strategy makes a first pass over first block to collect statistics
|
|
* and seed next round's statistics with it.
|
|
* After 1st pass, function forgets everything, and starts a new block.
|
|
* Consequently, this can only work if no data has been previously loaded in tables,
|
|
* aka, no dictionary, no prefix, no ldm preprocessing.
|
|
* The compression ratio gain is generally small (~0.5% on first block),
|
|
* the cost is 2x cpu time on first block. */
|
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
|
&& (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
|
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
|
) {
|
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
|
}
|
|
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
|
}
|
|
|
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
|
|
}
|
|
|
|
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
|
|
}
|
|
|
|
size_t ZSTD_compressBlock_btopt_extDict(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
|
|
}
|
|
|
|
size_t ZSTD_compressBlock_btultra_extDict(
|
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
const void* src, size_t srcSize)
|
|
{
|
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
|
|
}
|
|
|
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|
|
* because btultra2 is not meant to work with dictionaries
|
|
* and is only specific for the first block (no prefix) */
|