minor variation - DSpeed at 640
This commit is contained in:
parent
be391438ff
commit
add08d6f61
@ -246,8 +246,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
|
||||
/*! BIT_initDStream() :
|
||||
* Initialize a BIT_DStream_t.
|
||||
* `bitD` : a pointer to an already allocated BIT_DStream_t structure.
|
||||
* `srcBuffer` must point at the beginning of a bitStream.
|
||||
* `srcSize` must be the exact size of the bitStream, in bytes.
|
||||
* `srcSize` must be the *exact* size of the bitStream, in bytes.
|
||||
* @return : size of stream (== srcSize) or an errorCode if a problem is detected
|
||||
*/
|
||||
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
|
||||
@ -293,7 +292,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
||||
* On 64-bits, maxNbBits==56.
|
||||
* @return : value extracted
|
||||
*/
|
||||
MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
{
|
||||
U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
|
||||
return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
|
||||
@ -301,7 +300,7 @@ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
|
||||
/*! BIT_lookBitsFast() :
|
||||
* unsafe version; only works only if nbBits >= 1 */
|
||||
MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
||||
{
|
||||
U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
|
||||
return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
|
||||
|
@ -267,7 +267,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
|
||||
BIT_flushBits(bitC);
|
||||
}
|
||||
|
||||
/* decompression */
|
||||
/*<===== Decompression =====>*/
|
||||
|
||||
typedef struct {
|
||||
U16 tableLog;
|
||||
@ -290,34 +290,39 @@ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, con
|
||||
DStatePtr->table = dt + 1;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t FSE_getStateValue(FSE_DState_t* DStatePtr)
|
||||
MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
|
||||
{
|
||||
return DStatePtr->state;
|
||||
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
return DInfo.symbol;
|
||||
}
|
||||
|
||||
MEM_STATIC BYTE FSE_peakSymbol(FSE_DState_t* DStatePtr)
|
||||
MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
|
||||
{
|
||||
const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
return DInfo.symbol;
|
||||
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
U32 const nbBits = DInfo.nbBits;
|
||||
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
||||
DStatePtr->state = DInfo.newState + lowBits;
|
||||
}
|
||||
|
||||
MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
|
||||
{
|
||||
const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
const U32 nbBits = DInfo.nbBits;
|
||||
BYTE symbol = DInfo.symbol;
|
||||
size_t lowBits = BIT_readBits(bitD, nbBits);
|
||||
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
U32 const nbBits = DInfo.nbBits;
|
||||
BYTE const symbol = DInfo.symbol;
|
||||
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
||||
|
||||
DStatePtr->state = DInfo.newState + lowBits;
|
||||
return symbol;
|
||||
}
|
||||
|
||||
/*! FSE_decodeSymbolFast() :
|
||||
unsafe, only works if no symbol has a probability > 50% */
|
||||
MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
|
||||
{
|
||||
const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
const U32 nbBits = DInfo.nbBits;
|
||||
BYTE symbol = DInfo.symbol;
|
||||
size_t lowBits = BIT_readBitsFast(bitD, nbBits);
|
||||
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
||||
U32 const nbBits = DInfo.nbBits;
|
||||
BYTE const symbol = DInfo.symbol;
|
||||
size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
|
||||
|
||||
DStatePtr->state = DInfo.newState + lowBits;
|
||||
return symbol;
|
||||
|
@ -611,10 +611,13 @@ static void ZDICT_countEStats(EStats_ress_t esr,
|
||||
if (*u32Ptr==0) offcode=0;
|
||||
offsetcodeCount[offcode]++;
|
||||
}
|
||||
(void)matchlengthCount; (void)litlengthCount;
|
||||
/*
|
||||
for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
|
||||
matchlengthCount[*bytePtr]++;
|
||||
for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
|
||||
litlengthCount[*bytePtr]++;
|
||||
*/
|
||||
}
|
||||
|
||||
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
||||
|
@ -648,14 +648,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
||||
FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
|
||||
LLtype = FSE_ENCODING_RAW;
|
||||
} else {
|
||||
size_t NCountSize;
|
||||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
|
||||
if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
|
||||
NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
|
||||
op += NCountSize;
|
||||
op += NCountSize; }
|
||||
FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
|
||||
LLtype = FSE_ENCODING_DYNAMIC;
|
||||
} }
|
||||
@ -675,14 +674,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
||||
FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
|
||||
Offtype = FSE_ENCODING_RAW;
|
||||
} else {
|
||||
size_t NCountSize;
|
||||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
|
||||
if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; }
|
||||
FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
|
||||
NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
|
||||
op += NCountSize;
|
||||
op += NCountSize; }
|
||||
FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
|
||||
Offtype = FSE_ENCODING_DYNAMIC;
|
||||
} }
|
||||
@ -744,8 +742,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
||||
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
|
||||
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]);
|
||||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0);
|
||||
BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]);
|
||||
BIT_flushBits(&blockStream);
|
||||
|
||||
@ -757,16 +755,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
||||
const U32 nbBits = (offCode-1) + (!offCode);
|
||||
const BYTE LLCode = llCodeTable[n];
|
||||
/* (7)*/ /* (7)*/
|
||||
FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
|
||||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */
|
||||
FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */
|
||||
FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */
|
||||
//BIT_flushBits(&blockStream); /* 7 */ /* 7 */
|
||||
BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]);
|
||||
BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */
|
||||
BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]);
|
||||
BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]);
|
||||
//if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos);
|
||||
BIT_flushBits(&blockStream); /* 7 */ /* 7 */
|
||||
} }
|
||||
|
||||
|
@ -503,7 +503,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
@return : nb bytes read from src,
|
||||
or an error code if it fails, testable with ZSTD_isError()
|
||||
*/
|
||||
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog,
|
||||
FORCE_INLINE size_t ZSTD_buildSeqTableOff(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 max = (1<<rawBits)-1;
|
||||
@ -531,8 +531,9 @@ FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
|
||||
const void* src, size_t srcSize)
|
||||
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
|
||||
const void* src, size_t srcSize,
|
||||
const S16* defaultNorm, U32 defaultLog)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
@ -542,35 +543,7 @@ FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max,
|
||||
FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */
|
||||
return 1;
|
||||
case FSE_ENCODING_RAW :
|
||||
FSE_buildDTable(DTable, LL_defaultNorm, max, LL_defaultNormLog);
|
||||
return 0;
|
||||
case FSE_ENCODING_STATIC:
|
||||
return 0;
|
||||
default : /* impossible */
|
||||
case FSE_ENCODING_DYNAMIC :
|
||||
{ U32 tableLog;
|
||||
S16 norm[MaxSeq+1];
|
||||
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
|
||||
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
|
||||
if (tableLog > maxLog) return ERROR(corruption_detected);
|
||||
FSE_buildDTable(DTable, norm, max, tableLog);
|
||||
return headerSize;
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case FSE_ENCODING_RLE :
|
||||
if (!srcSize) return ERROR(srcSize_wrong);
|
||||
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
|
||||
FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */
|
||||
return 1;
|
||||
case FSE_ENCODING_RAW :
|
||||
FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog);
|
||||
FSE_buildDTable(DTable, defaultNorm, max, defaultLog);
|
||||
return 0;
|
||||
case FSE_ENCODING_STATIC:
|
||||
return 0;
|
||||
@ -619,15 +592,15 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq,
|
||||
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
|
||||
/* Build DTables */
|
||||
{ size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip);
|
||||
{ size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog);
|
||||
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
|
||||
ip += bhSize;
|
||||
}
|
||||
{ size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip);
|
||||
{ size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip);
|
||||
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
|
||||
ip += bhSize;
|
||||
}
|
||||
{ size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip);
|
||||
{ size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog);
|
||||
if (ZSTD_isError(bhSize)) return ERROR(corruption_detected);
|
||||
ip += bhSize;
|
||||
}
|
||||
@ -654,7 +627,7 @@ typedef struct {
|
||||
static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
|
||||
{
|
||||
/* Literal length */
|
||||
U32 const litCode = FSE_peakSymbol(&(seqState->stateLL));
|
||||
U32 const litCode = FSE_peekSymbol(&(seqState->stateLL));
|
||||
{ static const U32 LL_base[MaxLL+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
||||
@ -662,13 +635,23 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
|
||||
seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]);
|
||||
}
|
||||
|
||||
/* MatchLength */
|
||||
{ static const U32 ML_base[MaxML+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
|
||||
0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
|
||||
U32 const mlCode = FSE_peekSymbol(&(seqState->stateML));
|
||||
seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls;
|
||||
}
|
||||
|
||||
/* Offset */
|
||||
{ static const U32 offsetPrefix[MaxOff+1] = {
|
||||
1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40,
|
||||
0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000,
|
||||
0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000,
|
||||
0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 };
|
||||
U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */
|
||||
U32 const offsetCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */
|
||||
U32 const nbBits = offsetCode ? offsetCode-1 : 0;
|
||||
size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) :
|
||||
litCode ? seq->offset : seqState->prevOffset;
|
||||
@ -677,33 +660,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls)
|
||||
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
|
||||
}
|
||||
|
||||
{ static const U32 ML_base[MaxML+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800,
|
||||
0x1000, 0x2000, 0x4000, 0x8000, 0x10000 };
|
||||
U32 const mlCode = FSE_peakSymbol(&(seqState->stateML));
|
||||
seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls;
|
||||
}
|
||||
|
||||
/* ANS update */
|
||||
FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */
|
||||
/* ANS state update */
|
||||
FSE_updateState(&(seqState->stateLL), &(seqState->DStream));
|
||||
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
|
||||
|
||||
FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */
|
||||
FSE_updateState(&(seqState->stateML), &(seqState->DStream));
|
||||
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
|
||||
|
||||
FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */
|
||||
FSE_updateState(&(seqState->stateOffb), &(seqState->DStream));
|
||||
if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream));
|
||||
|
||||
#if 0 /* debug */
|
||||
{
|
||||
static U64 totalDecoded = 0;
|
||||
printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n",
|
||||
(U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset);
|
||||
totalDecoded += litLength + matchLength;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user