Implemented default coding table for lengths and offset

This commit is contained in:
Yann Collet 2016-04-07 15:24:29 +02:00
parent 79459da14b
commit 485371600a
4 changed files with 31 additions and 56 deletions

View File

@ -224,7 +224,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
const size_t tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
/* Check if workSpace is large enough, alloc a new one if needed */
{ size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (1<<Offbits) + (1<<Litbits))*sizeof(U32)
{ size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
+ ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
@ -258,7 +258,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<<Offbits)));
zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (MaxOff+1)));
zc->seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1));
zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
zc->seqStore.litLengthSum = 0;
@ -738,8 +738,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
Offtype = FSE_ENCODING_RLE;
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Offtype = FSE_ENCODING_STATIC;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (Offbits-1)))) {
FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
Offtype = FSE_ENCODING_RAW;
} else {
size_t nbSeq_1 = nbSeq;
@ -1287,7 +1287,7 @@ U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
U32 idx = zc->nextToUpdate3;
const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
while(idx < target) {
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
idx++;
@ -1335,9 +1335,9 @@ size_t ZSTD_HcFindBestMatch_generic (
}
/* save best solution */
if (currentMl > ml) {
ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex3;
if (ip+currentMl == iLimit) return (ml>=MINMATCH) ? ml : 0; /* best possible, and avoid read overflow*/
if (currentMl > ml) {
ml = currentMl; *offsetPtr = ZSTD_REP_MOVE + current - matchIndex3;
if (ip+currentMl == iLimit) return (ml>=MINMATCH) ? ml : 0; /* best possible, and avoid read overflow*/
} }
}
#endif
@ -1752,7 +1752,7 @@ _storeSequence:
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
/* check immediate repcode */
while ( (ip <= ilimit)
&& (MEM_read32(ip) == MEM_read32(ip - rep[1])) ) {
@ -1841,7 +1841,7 @@ void ZSTD_compressBlock_greedy_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }
rep[1] = rep[0]; rep[0] = (U32)(offset - ZSTD_REP_MOVE);
}
}
_storeSequence:
/* store sequence */
@ -2206,7 +2206,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
}
/* store sequence */
{
{
if (offset >= ZSTD_REP_NUM) {
rep[2] = rep[1];
rep[1] = rep[0];
@ -2527,7 +2527,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc,
const void* dict, size_t dictSize,
ZSTD_parameters params, U64 pledgedSrcSize)
{
U32 hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN);
U32 hashLog3 = (pledgedSrcSize || pledgedSrcSize >= 8192) ? ZSTD_HASHLOG3_MAX : ((pledgedSrcSize >= 2048) ? ZSTD_HASHLOG3_MIN + 1 : ZSTD_HASHLOG3_MIN);
zc->hashLog3 = (params.cParams.searchLength==3) ? hashLog3 : 0;
// printf("windowLog=%d hashLog=%d hashLog3=%d \n", params.windowLog, params.hashLog, zc->hashLog3);

View File

@ -500,34 +500,6 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
@return : nb bytes read from src,
or an error code if it fails, testable with ZSTD_isError()
*/
FORCE_INLINE size_t ZSTD_buildSeqTableOff(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog,
const void* src, size_t srcSize)
{
U32 max = (1<<rawBits)-1;
switch(type)
{
case FSE_ENCODING_RLE :
if (!srcSize) return ERROR(srcSize_wrong);
FSE_buildDTable_rle(DTable, (*(const BYTE*)src) & max); /* if *src > max, data is corrupted */
return 1;
case FSE_ENCODING_RAW :
FSE_buildDTable_raw(DTable, rawBits);
return 0;
case FSE_ENCODING_STATIC:
return 0;
default : /* impossible */
case FSE_ENCODING_DYNAMIC :
{ U32 tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
if (tableLog > maxLog) return ERROR(corruption_detected);
FSE_buildDTable(DTable, norm, max, tableLog);
return headerSize;
} }
}
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
const S16* defaultNorm, U32 defaultLog)
@ -594,7 +566,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
{ size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip);
{ size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog);
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
@ -650,7 +622,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F,
0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF,
0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 };
0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1 };
/* sequence */
{ size_t offset;
@ -660,7 +632,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
offset = OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits); /* <= 26 bits */
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
}
if (offset < ZSTD_REP_NUM) {
if (llCode == 0 && offset <= 1) offset = 1-offset;
@ -678,7 +650,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
} else {
offset -= ZSTD_REP_MOVE;
seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset;
}
seq->offset = offset;

View File

@ -104,11 +104,10 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
#define REPCODE_STARTVALUE 1
#define Litbits 8
#define Offbits 5
#define MaxLit ((1<<Litbits) - 1)
#define MaxML 52
#define MaxLL 35
#define MaxOff ((1<<Offbits)- 1)
#define MaxOff 28
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
#define MLFSELog 9
#define LLFSELog 9
@ -122,21 +121,25 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
13,14,15,16 };
static const S16 LL_defaultNorm[MaxLL+1] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
1, 1, 1, 1 };
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
-1,-1,-1,-1 };
static const U32 LL_defaultNormLog = 6;
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
12,13,14,15,16 };
static const S16 ML_defaultNorm[MaxML+1] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
12,13,14,15,16 };
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, };
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
-1,-1,-1,-1,-1 };
static const U32 ML_defaultNormLog = 6;
static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
static const U32 OF_defaultNormLog = 5;
/*-*******************************************
* Shared functions to include for inlining

View File

@ -57,7 +57,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
ssPtr->litSum = (2<<Litbits);
ssPtr->litLengthSum = MaxLL+1;
ssPtr->matchLengthSum = MaxML+1;
ssPtr->offCodeSum = (1<<Offbits);
ssPtr->offCodeSum = (MaxOff+1);
ssPtr->matchSum = (2<<Litbits);
for (u=0; u<=MaxLit; u++)
@ -529,7 +529,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
if (opt[cur].off >= ZSTD_REP_NUM) {
opt[cur].rep[2] = opt[cur-mlen].rep[1];
opt[cur].rep[1] = opt[cur-mlen].rep[0];
opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
} else {
opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
@ -838,7 +838,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
if (opt[cur].off >= ZSTD_REP_NUM) {
opt[cur].rep[2] = opt[cur-mlen].rep[1];
opt[cur].rep[1] = opt[cur-mlen].rep[0];
opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE;
ZSTD_LOG_ENCODE("%d: COPYREP_OFF cur=%d mlen=%d rep[0]=%d rep[1]=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep[0], opt[cur].rep[1]);
} else {
opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];