Merge pull request #138 from inikep/dev
reduce stack usage with optimal parsing mode
This commit is contained in:
commit
1435335342
@ -174,7 +174,9 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
||||
/* reserve table memory */
|
||||
const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
|
||||
const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog) + (1 << params.hashLog3)) * sizeof(U32);
|
||||
const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) + ((1<<MLbits) + (1<<LLbits) + (1<<Offbits) + (1<<Litbits))*sizeof(U32);
|
||||
const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize)
|
||||
+ ((params.strategy == ZSTD_btopt) ? ((1<<MLbits) + (1<<LLbits) + (1<<Offbits) + (1<<Litbits))*sizeof(U32) + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)) : 0);
|
||||
|
||||
if (zc->workSpaceSize < neededSpace) {
|
||||
free(zc->workSpace);
|
||||
zc->workSpace = malloc(neededSpace);
|
||||
@ -199,20 +201,21 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
||||
zc->params = params;
|
||||
zc->blockSize = blockSize;
|
||||
|
||||
zc->seqStore.litFreq = (U32*) (zc->seqStore.buffer);
|
||||
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
|
||||
zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<<LLbits);
|
||||
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<<MLbits);
|
||||
|
||||
zc->seqStore.offsetStart = zc->seqStore.offCodeFreq + (1<<Offbits);
|
||||
zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2));
|
||||
zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
|
||||
zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart) + blockSize;
|
||||
zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2);
|
||||
zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize;
|
||||
zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2);
|
||||
zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2);
|
||||
// zc->seqStore.XXX = zc->seqStore.dumpsStart + (blockSize>>4);
|
||||
zc->seqStore.litLengthSum = 0;
|
||||
|
||||
zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + (blockSize>>2)));
|
||||
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
|
||||
zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<<LLbits);
|
||||
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<<MLbits);
|
||||
zc->seqStore.matchTable = (ZSTD_match_t*)(zc->seqStore.offCodeFreq + (1<<Offbits));
|
||||
zc->seqStore.priceTable = (ZSTD_optimal_t*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1);
|
||||
|
||||
zc->seqStore.litLengthSum = 0;
|
||||
zc->hbSize = 0;
|
||||
zc->stage = 0;
|
||||
zc->loadedDictEnd = 0;
|
||||
|
@ -64,6 +64,7 @@
|
||||
#define ZSTD_LOG_BLOCK(...)
|
||||
#endif
|
||||
|
||||
#define ZSTD_OPT_NUM (1<<12)
|
||||
#define ZSTD_DICT_MAGIC 0xEC30A435
|
||||
|
||||
#define KB *(1 <<10)
|
||||
@ -165,6 +166,20 @@ MEM_STATIC unsigned ZSTD_highbit(U32 val)
|
||||
/*-*******************************************
|
||||
* Private interfaces
|
||||
*********************************************/
|
||||
typedef struct {
|
||||
U32 off;
|
||||
U32 len;
|
||||
} ZSTD_match_t;
|
||||
|
||||
typedef struct {
|
||||
U32 price;
|
||||
U32 off;
|
||||
U32 mlen;
|
||||
U32 litlen;
|
||||
U32 rep;
|
||||
U32 rep2;
|
||||
} ZSTD_optimal_t;
|
||||
|
||||
typedef struct {
|
||||
void* buffer;
|
||||
U32* offsetStart;
|
||||
@ -180,6 +195,8 @@ typedef struct {
|
||||
BYTE* dumpsStart;
|
||||
BYTE* dumps;
|
||||
/* opt */
|
||||
ZSTD_optimal_t* priceTable;
|
||||
ZSTD_match_t* matchTable;
|
||||
U32* matchLengthFreq;
|
||||
U32* litLengthFreq;
|
||||
U32* litFreq;
|
||||
|
@ -269,8 +269,8 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx,
|
||||
const U32 mls = ctx->params.searchLength;
|
||||
const U32 sufficient_len = ctx->params.targetLength;
|
||||
|
||||
ZSTD_optimal_t opt[ZSTD_OPT_NUM+1];
|
||||
ZSTD_match_t matches[ZSTD_OPT_NUM+1];
|
||||
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
|
||||
ZSTD_match_t* matches = seqStorePtr->matchTable;
|
||||
const BYTE* inr;
|
||||
U32 cur, match_num, last_pos, litlen, price;
|
||||
|
||||
@ -280,32 +280,6 @@ void ZSTD_COMPRESSBLOCK_OPT_GENERIC(ZSTD_CCtx* ctx,
|
||||
ZSTD_rescaleFreqs(seqStorePtr);
|
||||
if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE;
|
||||
|
||||
#if ZSTD_OPT_DEBUG >= 3
|
||||
size_t mostFrequent;
|
||||
unsigned count[256], maxSymbolValue, usedSymbols = 0;
|
||||
maxSymbolValue = 255;
|
||||
mostFrequent = FSE_count(count, &maxSymbolValue, src, srcSize);
|
||||
for (unsigned i=0; i<=maxSymbolValue; i++)
|
||||
if (count[i]) usedSymbols++;
|
||||
|
||||
seqStorePtr->factor = ((usedSymbols <= 18) && (mostFrequent < (1<<14))) ? mostFrequent>>10 : 0; // helps RTF files
|
||||
seqStorePtr->factor2 = (usedSymbols==256) && (mostFrequent > (1<<14));
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
if (seqStorePtr->factor2)
|
||||
printf("FACTOR2 usedSymbols==256;mostFrequent>(1<<14) maxSymbolValue=%d mostFrequent=%d usedSymbols=%d\n", maxSymbolValue, (int)mostFrequent, usedSymbols);
|
||||
if (seqStorePtr->factor) {
|
||||
printf("FACTOR1 usedSymbols<56;mostFrequent<(1<<14) maxSymbolValue=%d mostFrequent=%d usedSymbols=%d\n", maxSymbolValue, (int)mostFrequent, usedSymbols);
|
||||
#if 0
|
||||
for (int i=0; i<256; i++)
|
||||
if (count[i]) printf("%d=%d ", i, count[i]);
|
||||
printf("\n");
|
||||
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len);
|
||||
|
||||
/* Match Loop */
|
||||
@ -619,8 +593,8 @@ void ZSTD_COMPRESSBLOCK_OPT_EXTDICT_GENERIC(ZSTD_CCtx* ctx,
|
||||
const U32 mls = ctx->params.searchLength;
|
||||
const U32 sufficient_len = ctx->params.targetLength;
|
||||
|
||||
ZSTD_optimal_t opt[ZSTD_OPT_NUM+1];
|
||||
ZSTD_match_t matches[ZSTD_OPT_NUM+1];
|
||||
ZSTD_optimal_t* opt = seqStorePtr->priceTable;
|
||||
ZSTD_match_t* matches = seqStorePtr->matchTable;
|
||||
const BYTE* inr;
|
||||
U32 cur, match_num, last_pos, litlen, price;
|
||||
|
||||
@ -913,7 +887,31 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set
|
||||
anchor = ip = ip + mlen;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* check immediate repcode */
|
||||
while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) {
|
||||
if ((anchor - rep_2) >= prefixStart) {
|
||||
if (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(anchor - rep_2))
|
||||
mlen = (U32)ZSTD_count(anchor+MINMATCHOPT, anchor - rep_2 + MINMATCHOPT, iend) + MINMATCHOPT;
|
||||
else
|
||||
break;
|
||||
} else {
|
||||
const BYTE* repMatch = dictBase + ((anchor-base) - rep_2);
|
||||
if ((repMatch + MINMATCHOPT <= dictEnd) && (MEM_readMINMATCH(anchor) == MEM_readMINMATCH(repMatch)))
|
||||
mlen = (U32)ZSTD_count_2segments(anchor+MINMATCHOPT, repMatch+MINMATCHOPT, iend, dictEnd, prefixStart) + MINMATCHOPT;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */
|
||||
ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2);
|
||||
ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT);
|
||||
ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCHOPT);
|
||||
anchor += mlen;
|
||||
}
|
||||
#else
|
||||
/* check immediate repcode */
|
||||
/* minimal correctness condition = while ((anchor >= prefixStart + REPCODE_STARTVALUE) && (anchor <= ilimit)) { */
|
||||
while ((anchor >= base + lowLimit + rep_2) && (anchor <= ilimit)) {
|
||||
const U32 repIndex = (U32)((anchor-base) - rep_2);
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
@ -932,6 +930,7 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (anchor > ip) ip = anchor;
|
||||
}
|
||||
|
||||
|
@ -40,100 +40,83 @@
|
||||
* The optimal parser
|
||||
*********************************************/
|
||||
/*- Constants -*/
|
||||
#define ZSTD_OPT_NUM (1<<12)
|
||||
#define ZSTD_FREQ_START 1
|
||||
#define ZSTD_FREQ_STEP 1
|
||||
#define ZSTD_FREQ_DIV 5
|
||||
|
||||
|
||||
typedef struct {
|
||||
U32 off;
|
||||
U32 len;
|
||||
} ZSTD_match_t;
|
||||
|
||||
typedef struct {
|
||||
U32 price;
|
||||
U32 off;
|
||||
U32 mlen;
|
||||
U32 litlen;
|
||||
U32 rep;
|
||||
U32 rep2;
|
||||
} ZSTD_optimal_t;
|
||||
|
||||
|
||||
MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
|
||||
{
|
||||
unsigned u;
|
||||
|
||||
if (ssPtr->litLengthSum == 0) {
|
||||
ssPtr->matchLengthSum = ZSTD_FREQ_START*(1<<MLbits);
|
||||
ssPtr->litLengthSum = ZSTD_FREQ_START*(1<<LLbits);
|
||||
ssPtr->litSum = ZSTD_FREQ_START*(1<<Litbits);
|
||||
ssPtr->offCodeSum = ZSTD_FREQ_START*(1<<Offbits);
|
||||
ssPtr->matchSum = ssPtr->litSum;
|
||||
|
||||
ssPtr->litSum = 2*(1<<Litbits);
|
||||
ssPtr->litLengthSum = 1*(1<<LLbits);
|
||||
ssPtr->matchLengthSum = 1*(1<<MLbits);
|
||||
ssPtr->offCodeSum = 1*(1<<Offbits);
|
||||
ssPtr->matchSum = 2*(1<<Litbits);
|
||||
|
||||
for (u=0; u<=MaxLit; u++)
|
||||
ssPtr->litFreq[u] = ZSTD_FREQ_START;
|
||||
ssPtr->litFreq[u] = 2;
|
||||
for (u=0; u<=MaxLL; u++)
|
||||
ssPtr->litLengthFreq[u] = ZSTD_FREQ_START;
|
||||
ssPtr->litLengthFreq[u] = 1;
|
||||
for (u=0; u<=MaxML; u++)
|
||||
ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START;
|
||||
ssPtr->matchLengthFreq[u] = 1;
|
||||
for (u=0; u<=MaxOff; u++)
|
||||
ssPtr->offCodeFreq[u] = ZSTD_FREQ_START;
|
||||
ssPtr->offCodeFreq[u] = 1;
|
||||
} else {
|
||||
ssPtr->matchLengthSum = 0;
|
||||
ssPtr->litLengthSum = 0;
|
||||
ssPtr->litSum = 0;
|
||||
ssPtr->offCodeSum = 0;
|
||||
ssPtr->matchSum = 0;
|
||||
ssPtr->litSum = 0;
|
||||
|
||||
for (u=0; u<=MaxLit; u++) {
|
||||
ssPtr->litFreq[u] = ZSTD_FREQ_START + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->litSum += ssPtr->litFreq[u];
|
||||
}
|
||||
for (u=0; u<=MaxLL; u++) {
|
||||
ssPtr->litLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
|
||||
}
|
||||
for (u=0; u<=MaxML; u++) {
|
||||
ssPtr->matchLengthFreq[u] = ZSTD_FREQ_START + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
|
||||
ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
|
||||
}
|
||||
for (u=0; u<=MaxOff; u++) {
|
||||
ssPtr->offCodeFreq[u] = ZSTD_FREQ_START + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
|
||||
ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
|
||||
{
|
||||
U32 u;
|
||||
|
||||
/* literals */
|
||||
seqStorePtr->litSum += litLength * ZSTD_FREQ_STEP;
|
||||
seqStorePtr->litSum += litLength;
|
||||
for (u=0; u < litLength; u++)
|
||||
seqStorePtr->litFreq[literals[u]] += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->litFreq[literals[u]]++;
|
||||
|
||||
/* literal Length */
|
||||
seqStorePtr->litLengthSum += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->litLengthSum++;
|
||||
if (litLength >= MaxLL)
|
||||
seqStorePtr->litLengthFreq[MaxLL] += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->litLengthFreq[MaxLL]++;
|
||||
else
|
||||
seqStorePtr->litLengthFreq[litLength] += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->litLengthFreq[litLength]++;
|
||||
|
||||
/* match offset */
|
||||
seqStorePtr->offCodeSum += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->offCodeSum++;
|
||||
BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset+1) + 1 : 0;
|
||||
seqStorePtr->offCodeFreq[offCode] += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->offCodeFreq[offCode]++;
|
||||
|
||||
/* match Length */
|
||||
seqStorePtr->matchLengthSum += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->matchLengthSum++;
|
||||
if (matchLength >= MaxML)
|
||||
seqStorePtr->matchLengthFreq[MaxML] += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->matchLengthFreq[MaxML]++;
|
||||
else
|
||||
seqStorePtr->matchLengthFreq[matchLength] += ZSTD_FREQ_STEP;
|
||||
seqStorePtr->matchLengthFreq[matchLength]++;
|
||||
}
|
||||
|
||||
FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals)
|
||||
|
Loading…
Reference in New Issue
Block a user