minor variation - DSpeed at 640

This commit is contained in:
Yann Collet 2016-03-23 01:32:41 +01:00
parent be391438ff
commit add08d6f61
5 changed files with 58 additions and 92 deletions

View File

@ -246,8 +246,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
/*! BIT_initDStream() :
* Initialize a BIT_DStream_t.
* `bitD` : a pointer to an already allocated BIT_DStream_t structure.
* `srcBuffer` must point at the beginning of a bitStream.
* `srcSize` must be the exact size of the bitStream, in bytes.
* `srcSize` must be the *exact* size of the bitStream, in bytes.
* @return : size of stream (== srcSize) or an errorCode if a problem is detected
*/
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
@ -293,7 +292,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
* On 64-bits, maxNbBits==56.
* @return : value extracted
*/
MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
@ -301,7 +300,7 @@ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
/*! BIT_lookBitsFast() :
* unsafe version; only works only if nbBits >= 1 */
MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
{
U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);

View File

@ -267,7 +267,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
BIT_flushBits(bitC);
}
/* decompression */
/*<===== Decompression =====>*/
typedef struct {
U16 tableLog;
@ -290,34 +290,39 @@ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, con
DStatePtr->table = dt + 1;
}
MEM_STATIC size_t FSE_getStateValue(FSE_DState_t* DStatePtr)
MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
{
return DStatePtr->state;
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
return DInfo.symbol;
}
MEM_STATIC BYTE FSE_peakSymbol(FSE_DState_t* DStatePtr)
MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
{
const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
return DInfo.symbol;
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
U32 const nbBits = DInfo.nbBits;
size_t const lowBits = BIT_readBits(bitD, nbBits);
DStatePtr->state = DInfo.newState + lowBits;
}
MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
{
const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
const U32 nbBits = DInfo.nbBits;
BYTE symbol = DInfo.symbol;
size_t lowBits = BIT_readBits(bitD, nbBits);
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
U32 const nbBits = DInfo.nbBits;
BYTE const symbol = DInfo.symbol;
size_t const lowBits = BIT_readBits(bitD, nbBits);
DStatePtr->state = DInfo.newState + lowBits;
return symbol;
}
/*! FSE_decodeSymbolFast() :
unsafe, only works if no symbol has a probability > 50% */
MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
{
const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
const U32 nbBits = DInfo.nbBits;
BYTE symbol = DInfo.symbol;
size_t lowBits = BIT_readBitsFast(bitD, nbBits);
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
U32 const nbBits = DInfo.nbBits;
BYTE const symbol = DInfo.symbol;
size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
DStatePtr->state = DInfo.newState + lowBits;
return symbol;

View File

@ -611,10 +611,13 @@ static void ZDICT_countEStats(EStats_ress_t esr,
if (*u32Ptr==0) offcode=0;
offsetcodeCount[offcode]++;
}
(void)matchlengthCount; (void)litlengthCount;
/*
for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
matchlengthCount[*bytePtr]++;
for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
litlengthCount[*bytePtr]++;
*/
}
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)

View File

@ -648,14 +648,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
LLtype = FSE_ENCODING_RAW;
} else {
size_t NCountSize;
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize;
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
LLtype = FSE_ENCODING_DYNAMIC;
} }
@ -675,14 +674,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
Offtype = FSE_ENCODING_RAW;
} else {
size_t NCountSize;
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; }
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize;
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
Offtype = FSE_ENCODING_DYNAMIC;
} }
@ -744,8 +742,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]);
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]);
BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0);
BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]);
BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]);
BIT_flushBits(&blockStream);
@ -757,16 +755,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
const U32 nbBits = (offCode-1) + (!offCode);
const BYTE LLCode = llCodeTable[n];
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */
FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
//BIT_flushBits(&blockStream); /* 7 */ /* 7 */
BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]);
BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */
BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]);
BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]);
//if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos);
BIT_flushBits(&blockStream); /* 7 */ /* 7 */
} }

View File

@ -503,7 +503,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
@return : nb bytes read from src,
or an error code if it fails, testable with ZSTD_isError()
*/
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog,
FORCE_INLINE size_t ZSTD_buildSeqTableOff(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog,
const void* src, size_t srcSize)
{
U32 max = (1<<rawBits)-1;
@ -531,8 +531,9 @@ FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits
}
FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize)
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
const S16* defaultNorm, U32 defaultLog)
{
switch(type)
{
@ -542,35 +543,7 @@ FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max,
FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */
return 1;
case FSE_ENCODING_RAW :
FSE_buildDTable(DTable, LL_defaultNorm, max, LL_defaultNormLog);
return 0;
case FSE_ENCODING_STATIC:
return 0;
default : /* impossible */
case FSE_ENCODING_DYNAMIC :
{ U32 tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
if (tableLog > maxLog) return ERROR(corruption_detected);
FSE_buildDTable(DTable, norm, max, tableLog);
return headerSize;
} }
}
FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
const void* src, size_t srcSize)
{
switch(type)
{
case FSE_ENCODING_RLE :
if (!srcSize) return ERROR(srcSize_wrong);
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */
return 1;
case FSE_ENCODING_RAW :
FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog);
FSE_buildDTable(DTable, defaultNorm, max, defaultLog);
return 0;
case FSE_ENCODING_STATIC:
return 0;
@ -619,15 +592,15 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq,
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
/* Build DTables */
{ size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip);
{ size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog);
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
{ size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip);
{ size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip);
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
{ size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip);
{ size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog);
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
ip += bhSize;
}
@ -654,7 +627,7 @@ typedef struct {
static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
{
/* Literal length */
U32 const litCode = FSE_peakSymbol(&(seqState->stateLL));
U32 const litCode = FSE_peekSymbol(&(seqState->stateLL));
{ static const U32 LL_base[MaxLL+1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
@ -662,13 +635,23 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]);
}
/* MatchLength */
{ static const U32 ML_base[MaxML+1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
U32 const mlCode = FSE_peekSymbol(&(seqState->stateML));
seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls;
}
/* Offset */
{ static const U32 offsetPrefix[MaxOff+1] = {
1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40,
0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000,
0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000,
0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 };
U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */
U32 const offsetCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */
U32 const nbBits = offsetCode ? offsetCode-1 : 0;
size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) :
litCode ? seq->offset : seqState->prevOffset;
@ -677,33 +660,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
}
{ static const U32 ML_base[MaxML+1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
U32 const mlCode = FSE_peakSymbol(&(seqState->stateML));
seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls;
}
/* ANS update */
FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */
/* ANS state update */
FSE_updateState(&(seqState->stateLL), &(seqState->DStream));
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */
FSE_updateState(&(seqState->stateML), &(seqState->DStream));
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */
FSE_updateState(&(seqState->stateOffb), &(seqState->DStream));
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
#if 0 /* debug */
{
static U64 totalDecoded = 0;
printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
(U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
totalDecoded += litLength + matchLength;
}
#endif
}