From 597847a2ae9b6c1c0872b000f85e4cfee1627a2d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 20 Mar 2016 19:14:22 +0100 Subject: [PATCH 01/45] first emulation --- lib/zdict.c | 3 +- lib/zstd_compress.c | 97 +++++++++++++++++++++++++++++++++++++++++---- lib/zstd_internal.h | 6 +-- programs/bench.c | 2 +- 4 files changed, 94 insertions(+), 14 deletions(-) diff --git a/lib/zdict.c b/lib/zdict.c index a7c8090a..4c1ffb08 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -284,8 +284,7 @@ static dictItem ZDICT_analyzePos( return solution; } - { - int i; + { int i; U32 searchLength; U32 refinedStart = start; U32 refinedEnd = end; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index cfb2519a..ba84fde7 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -184,7 +184,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 8*maxNbSeq; + const size_t tokenSpace = blockSize + 10*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; @@ -209,7 +209,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.buffer = zc->contentTable + contentSize; zc->hufTable = (HUF_CElt*)zc->seqStore.buffer; zc->flagStaticTables = 0; - zc->seqStore.buffer = (U32*)(zc->seqStore.buffer) + 256; + zc->seqStore.buffer = ((U32*)(zc->seqStore.buffer)) + 256; zc->nextToUpdate = 1; zc->nextSrc = NULL; @@ -221,10 +221,11 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->blockSize = blockSize; zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); - zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); + zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.litLengthStart + maxNbSeq); + zc->seqStore.offCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.litLengthStart = zc->seqStore.litStart + blockSize; - zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + maxNbSeq; + zc->seqStore.matchLengthStart = zc->seqStore.litStart + blockSize; zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); @@ -584,11 +585,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const BYTE* const llTable = seqStorePtr->litLengthStart; - const BYTE* const llPtr = seqStorePtr->litLength; + const U16* const llTable = seqStorePtr->litLengthStart; + const U16* const llPtr = seqStorePtr->litLength; const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; BYTE* const offCodeTable = seqStorePtr->offCodeStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; @@ -633,7 +635,49 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 + /* LL codes */ +static const BYTE llCode[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; +static const BYTE deltaCode = 18; + + { size_t i; + for (i=0; i63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll]; + } } + /* CTable for Literal Lengths */ +#if 1 + { U32 max = 36; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { + FSE_buildCTable_raw(CTable_LitLength, LLbits); + LLtype = FSE_ENCODING_RAW; + } else { + size_t NCountSize; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = FSE_ENCODING_DYNAMIC; + }} +#else { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast(count, &max, llTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { @@ -657,6 +701,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_buildCTable(CTable_LitLength, norm, max, tableLog); LLtype = FSE_ENCODING_DYNAMIC; }} +#endif // 0 /* Offset codes */ { size_t i; for (i=0; ilit += litLength; /* literal Length */ +#if 1 + *seqStorePtr->litLength++ = (U16)litLength; /* take care of litLength >= 65535 ! */ +#else if (litLength >= MaxLL) { *(seqStorePtr->litLength++) = MaxLL; if (litLength<255 + MaxLL) { @@ -802,6 +882,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B seqStorePtr->dumps += 3; } } } else *(seqStorePtr->litLength++) = (BYTE)litLength; +#endif // 0 /* match offset */ *(seqStorePtr->offset++) = (U32)offsetCode; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index ba350c4f..7a3f2134 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -184,11 +184,11 @@ typedef struct { U32* offsetStart; U32* offset; BYTE* offCodeStart; - BYTE* offCode; BYTE* litStart; BYTE* lit; - BYTE* litLengthStart; - BYTE* litLength; + U16* litLengthStart; + U16* litLength; + BYTE* llCodeStart; BYTE* matchLengthStart; BYTE* matchLength; BYTE* dumpsStart; diff --git a/programs/bench.c b/programs/bench.c index c74c03df..e5b231d3 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -282,7 +282,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); -#if 1 +#if 0 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From d64f435f637fc876ac4f9aba4b474856f86cf63c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 00:07:42 +0100 Subject: [PATCH 02/45] handles litLength >= 65535 --- lib/bitstream.h | 4 ++-- lib/zstd_compress.c | 23 +++++++++++++---------- lib/zstd_internal.h | 3 ++- programs/bench.c | 4 ++-- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index e1237930..0fe36eae 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -206,7 +206,7 @@ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBi * unsafe version; does not check buffer overflow */ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) { - size_t nbBytes = bitC->bitPos >> 3; + size_t const nbBytes = bitC->bitPos >> 3; MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; bitC->bitPos &= 7; @@ -218,7 +218,7 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) { - size_t nbBytes = bitC->bitPos >> 3; + size_t const nbBytes = bitC->bitPos >> 3; MEM_writeLEST(bitC->ptr, bitC->bitContainer); bitC->ptr += nbBytes; if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index ba84fde7..937cce85 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -172,9 +172,9 @@ size_t ZSTD_sizeofCCtx(ZSTD_parameters params) /* hidden interface, for parama { ZSTD_CCtx* zc = ZSTD_createCCtx(); ZSTD_compressBegin_advanced(zc, NULL, 0, params); - { size_t const size = sizeof(*zc) + zc->workSpaceSize; + { size_t const ccsize = sizeof(*zc) + zc->workSpaceSize; ZSTD_freeCCtx(zc); - return size; } + return ccsize; } } @@ -291,7 +291,7 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) /*! ZSTD_reduceTable() : -* rescale indexes from a table (indexes are U32) */ +* reduce table indexes by `reducerValue` */ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) { U32 u; @@ -586,15 +586,15 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const U16* const llTable = seqStorePtr->litLengthStart; - const U16* const llPtr = seqStorePtr->litLength; const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; + const U32* const offsetTableEnd = seqStorePtr->offset; BYTE* const offCodeTable = seqStorePtr->offCodeStart; BYTE* const llCodeTable = seqStorePtr->llCodeStart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = llPtr - llTable; + size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; /* Compress literals */ @@ -648,7 +648,8 @@ static const BYTE deltaCode = 18; { size_t i; for (i=0; ilitLengthLong; llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll]; } } @@ -788,10 +789,11 @@ static const U32 llBits[36] = { 0, 0, 0, 0, 0, 0, 0, 0, const BYTE LLCode = llCodeTable[n]; /* (7)*/ /* (7)*/ FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 17 */ /* 17 */ if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 17 */ /* 27 */ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 26 */ /* 36 */ + FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */ if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 62 */ /* 24 bits max in 32-bits mode */ + //BIT_flushBits(&blockStream); /* 7 */ /* 7 */ + BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_addBits(&blockStream, llTable[n], llBits[LLCode]); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -866,7 +868,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B /* literal Length */ #if 1 - *seqStorePtr->litLength++ = (U16)litLength; /* take care of litLength >= 65535 ! */ + if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->litLengthLong = (U32)litLength; } + else *seqStorePtr->litLength++ = (U16)litLength; #else if (litLength >= MaxLL) { *(seqStorePtr->litLength++) = MaxLL; diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 7a3f2134..2b830e31 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -110,7 +110,7 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MaxLL ((1< Date: Mon, 21 Mar 2016 00:39:19 +0100 Subject: [PATCH 03/45] support default LL distribution --- lib/fse.c | 113 ++++++++++++++++++++---------------------- lib/zstd_compress.c | 10 +++- lib/zstd_decompress.c | 2 +- programs/bench.c | 1 + 4 files changed, 65 insertions(+), 61 deletions(-) diff --git a/lib/fse.c b/lib/fse.c index 291e6419..dabe2830 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -145,21 +145,18 @@ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { - const unsigned tableSize = 1 << tableLog; - const unsigned tableMask = tableSize - 1; + U32 const tableSize = 1 << tableLog; + U32 const tableMask = tableSize - 1; void* const ptr = ct; U16* const tableU16 = ( (U16*) ptr) + 2; void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - const unsigned step = FSE_tableStep(tableSize); - unsigned cumul[FSE_MAX_SYMBOL_VALUE+2]; - U32 position = 0; + U32 const step = FSE_tableStep(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ U32 highThreshold = tableSize-1; - unsigned symbol; - unsigned i; - /* header */ + /* CTable header */ tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; @@ -167,42 +164,44 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ /* symbol start positions */ - cumul[0] = 0; - for (i=1; i<=maxSymbolValue+1; i++) { - if (normalizedCounter[i-1]==-1) { /* Low proba symbol */ - cumul[i] = cumul[i-1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1); - } else { - cumul[i] = cumul[i-1] + normalizedCounter[i-1]; - } } - cumul[maxSymbolValue+1] = tableSize+1; - - /* Spread symbols */ - for (symbol=0; symbol<=maxSymbolValue; symbol++) { - int nbOccurences; - for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ - } } - - if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ - - /* Build table */ - for (i=0; i highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - - if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + { U32 position = 0; + for (s=0; s<=maxSymbolValue; s++) { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } /* Build Decoding table */ - { - U32 i; - for (i=0; i>= 2; } - { - short const max = (short)((2*threshold-1)-remaining); + { short const max = (short)((2*threshold-1)-remaining); short count; if ((bitStream & (threshold-1)) < (U32)max) { diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 937cce85..c4cd7952 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -655,7 +655,7 @@ static const BYTE deltaCode = 18; /* CTable for Literal Lengths */ #if 1 - { U32 max = 36; + { U32 max = 35; size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = llCodeTable[0]; @@ -664,7 +664,13 @@ static const BYTE deltaCode = 18; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { LLtype = FSE_ENCODING_STATIC; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { - FSE_buildCTable_raw(CTable_LitLength, LLbits); + static const S16 LL_defaultNorm[36] = { 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 1, 1, 1, 1, + 1, 1, 1, 1 }; + static const U32 LL_defaultNormLog = 6; + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, 35, LL_defaultNormLog); LLtype = FSE_ENCODING_RAW; } else { size_t NCountSize; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 88734829..3a6887e9 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -325,7 +325,7 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* zc, const void* src, size_t srcSize) { - size_t result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); + size_t const result = ZSTD_getFrameParams(&(zc->fParams), src, srcSize); if ((MEM_32bits()) && (zc->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits); return result; } diff --git a/programs/bench.c b/programs/bench.c index 7acfcca3..2ee3c417 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -282,6 +282,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); + (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ #if 0 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From 5c71491a4614f6e2045b83863529fe08056daa7d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 02:23:34 +0100 Subject: [PATCH 04/45] first working version with alternate LL codes --- programs/bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index 2ee3c417..b46dd10c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -283,7 +283,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#if 0 +#if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From b0aec17a90210db47ef67a521580e1f23eb656fd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 13:24:16 +0100 Subject: [PATCH 05/45] code simplification (but reduce decompression speed ...) --- lib/fse.c | 49 ++++++++++--------------- lib/zstd_compress.c | 85 ++++++++++++------------------------------- lib/zstd_decompress.c | 80 ++++++++++++++++++++++++++-------------- lib/zstd_internal.h | 11 +++++- lib/zstd_opt.h | 16 ++++---- 5 files changed, 113 insertions(+), 128 deletions(-) diff --git a/lib/fse.c b/lib/fse.c index dabe2830..63898ab1 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -363,8 +363,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, bitStream >>= 16; bitCount -= 16; } } - { - short count = normalizedCounter[charnum++]; + { short count = normalizedCounter[charnum++]; const short max = (short)((2*threshold-1)-remaining); remaining -= FSE_abs(count); if (remaining<1) return ERROR(GENERIC); @@ -506,11 +505,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t * Counting histogram ****************************************************************/ /*! FSE_count_simple - This function just counts byte values within @src, - and store the histogram into @count. - This function is unsafe : it doesn't check that all values within @src can fit into @count. - For this reason, prefer using a table @count with 256 elements. - @return : highest count for a single element + This function just counts byte values within `src`, + and store the histogram into table `count`. + This function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element */ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) @@ -519,7 +518,6 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const BYTE* const end = ip + srcSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; - U32 s; memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } @@ -529,7 +527,7 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, while (!count[maxSymbolValue]) maxSymbolValue--; *maxSymbolValuePtr = maxSymbolValue; - for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; + { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } return (size_t)max; } @@ -543,7 +541,6 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, const BYTE* const iend = ip+sourceSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; - U32 s; U32 Counting1[256] = { 0 }; U32 Counting2[256] = { 0 }; @@ -558,8 +555,8 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, } if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - { /* by stripes of 16 bytes */ - U32 cached = MEM_read32(ip); ip += 4; + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; while (ip < iend-15) { U32 c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; @@ -589,15 +586,15 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, while (ipmaxSymbolValue; s--) { + U32 s; for (s=255; s>maxSymbolValue; s--) { Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); } } - for (s=0; s<=maxSymbolValue; s++) { + { U32 s; for (s=0; s<=maxSymbolValue; s++) { count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; if (count[s] > max) max = count[s]; - } + }} while (!count[maxSymbolValue]) maxSymbolValue--; *maxSymbolValuePtr = maxSymbolValue; @@ -631,7 +628,7 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, `U16 maxSymbolValue;` `U16 nextStateNumber[1 << tableLog];` // This size is variable `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable -Allocation is manual, since C standard does not support variable-size structures. +Allocation is manual (C standard does not support variable-size structures). */ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) @@ -727,7 +724,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, /* all values are pretty poor; probably incompressible data (should have already been detected); find max, then give all remaining points to max */ - U32 maxV = 0, maxC =0; + U32 maxV = 0, maxC = 0; for (s=0; s<=maxSymbolValue; s++) if (count[s] > maxC) maxV=s, maxC=count[s]; norm[maxV] += (short)ToDistribute; @@ -765,8 +762,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - { - U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; U64 const scale = 62 - tableLog; U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ U64 const vStep = 1ULL<<(scale-20); @@ -842,13 +838,11 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) tableU16[s] = (U16)(tableSize + s); /* Build Symbol Transformation Table */ - { - const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); + { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); for (s=0; s<=maxSymbolValue; s++) { symbolTT[s].deltaNbBits = deltaNbBits; symbolTT[s].deltaFindState = s-1; - } - } + } } return 0; } @@ -884,15 +878,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, const BYTE* const istart = (const BYTE*) src; const BYTE* const iend = istart + srcSize; const BYTE* ip=iend; - - size_t errorCode; BIT_CStream_t bitC; FSE_CState_t CState1, CState2; /* init */ if (srcSize <= 2) return 0; - errorCode = BIT_initCStream(&bitC, dst, dstSize); - if (FSE_isError(errorCode)) return 0; + { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(errorCode)) return 0; } #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) @@ -915,8 +907,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, } /* 2 or 4 encoding per loop */ - for ( ; ip>istart ; ) - { + for ( ; ip>istart ; ) { FSE_encodeSymbol(&bitC, &CState2, *--ip); if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index c4cd7952..f098c02a 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -191,7 +191,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((1<seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); @@ -585,7 +585,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const U16* const llTable = seqStorePtr->litLengthStart; + U16* const llTable = seqStorePtr->litLengthStart; const BYTE* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; const U32* const offsetTableEnd = seqStorePtr->offset; @@ -636,26 +636,24 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, #define MAX_SEQ_FOR_STATIC_FSE 1000 /* LL codes */ -static const BYTE llCode[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; -static const BYTE deltaCode = 18; - - { size_t i; + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE deltaCode = 19; + size_t i; for (i=0; ilitLengthLong; - llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : llCode[ll]; + if (llTable[i] == 65535) { ll = seqStorePtr->litLengthLong; llTable[i] = (U16)ll; } + llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : LL_Code[ll]; } } /* CTable for Literal Lengths */ -#if 1 - { U32 max = 35; + { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { *op++ = llCodeTable[0]; @@ -663,14 +661,8 @@ static const BYTE deltaCode = 18; LLtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { - static const S16 LL_defaultNorm[36] = { 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 1, 1, 1, 1, - 1, 1, 1, 1 }; - static const U32 LL_defaultNormLog = 6; - FSE_buildCTable(CTable_LitLength, LL_defaultNorm, 35, LL_defaultNormLog); + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); LLtype = FSE_ENCODING_RAW; } else { size_t NCountSize; @@ -684,31 +676,6 @@ static const BYTE deltaCode = 18; FSE_buildCTable(CTable_LitLength, norm, max, tableLog); LLtype = FSE_ENCODING_DYNAMIC; }} -#else - { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast(count, &max, llTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LLbits-1)))) { - FSE_buildCTable_raw(CTable_LitLength, LLbits); - LLtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llTable[nbSeq-1]]>1) { count[llTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); - LLtype = FSE_ENCODING_DYNAMIC; - }} -#endif // 0 /* Offset codes */ { size_t i; for (i=0; i 198618400) && (pos < 198618500)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", - (U32)(literals - g_start), (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 if (offsetCode == 0) seqStorePtr->realRepSum++; @@ -2278,7 +2241,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, ZSTD_validateParams(¶ms); { size_t const errorCode = ZSTD_resetCCtx_advanced(zc, params); - if (ZSTD_isError(errorCode)) return errorCode; } + if (ZSTD_isError(errorCode)) return errorCode; } /* Write Frame Header into ctx headerBuffer */ MEM_writeLE32(zc->headerBuffer, ZSTD_MAGICNUMBER); diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 3a6887e9..2c1acab5 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -531,6 +531,34 @@ FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits } +FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, + const void* src, size_t srcSize) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable(DTable, LL_defaultNorm, max, LL_defaultNormLog); + return 0; + case FSE_ENCODING_STATIC: + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize) @@ -576,7 +604,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, LLbits, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, 35, LLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -612,21 +640,13 @@ typedef struct { static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { - const BYTE* dumps = seqState->dumps; - const BYTE* const de = seqState->dumpsEnd; - size_t litLength, offset; - /* Literal length */ - litLength = FSE_peakSymbol(&(seqState->stateLL)); - if (litLength == MaxLL) { - const U32 add = *dumps++; - if (add < 255) litLength += add; - else { - litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */ - if (litLength&1) litLength>>=1, dumps += 3; - else litLength = (U16)(litLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ + U32 const litCode = FSE_peakSymbol(&(seqState->stateLL)); + { static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]); } /* Offset */ @@ -637,11 +657,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ U32 const nbBits = offsetCode ? offsetCode-1 : 0; - offset = offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits); + size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) : + litCode ? seq->offset : seqState->prevOffset; + if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ + seq->offset = offset; if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - if (offsetCode==0) offset = litLength ? seq->offset : seqState->prevOffset; - if (offsetCode | !litLength) seqState->prevOffset = seq->offset; /* cmove */ - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ } /* Literal length update */ @@ -650,7 +671,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) /* MatchLength */ { size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + const BYTE* dumps = seqState->dumps; if (matchLength == MaxML) { + const BYTE* const de = seqState->dumpsEnd; const U32 add = *dumps++; if (add < 255) matchLength += add; else { @@ -662,13 +685,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) } matchLength += mls; seq->matchLength = matchLength; + seqState->dumps = dumps; } - /* save result */ - seq->litLength = litLength; - seq->offset = offset; - seqState->dumps = dumps; - #if 0 /* debug */ { static U64 totalDecoded = 0; @@ -799,13 +818,18 @@ static size_t ZSTD_decompressSequences( FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; nbSeq--) { size_t oneSeqSize; - nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); +#if 0 /* for debug */ + { U32 pos = (U32)(op-base); + if ((pos > 198618400) && (pos < 198618500)) + printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); + } +#endif oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 2b830e31..51784833 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -103,11 +103,10 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define Litbits 8 #define MLbits 7 -#define LLbits 6 #define Offbits 5 #define MaxLit ((1<litLengthSum == 0) { ssPtr->litSum = (2<litLengthSum = (1<litLengthSum = MaxLL+1; ssPtr->matchLengthSum = (1<offCodeSum = (1<matchSum = (2<offCodeSum += ssPtr->offCodeFreq[u]; } } - + ZSTD_setLog2Prices(ssPtr); } @@ -243,7 +243,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( if (minMatch == 3) { /* HC3 match finder */ U32 matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - + if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { const BYTE* match; size_t currentMl=0; @@ -408,7 +408,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const BYTE* const ilimit = iend - 8; const BYTE* const base = ctx->base; const BYTE* const prefixStart = base + ctx->dictLimit; - + U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 sufficient_len = ctx->params.targetLength; @@ -733,7 +733,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const dictBase = ctx->dictBase; const BYTE* const dictEnd = dictBase + dictLimit; const U32 lowLimit = ctx->lowLimit; - + U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE; const U32 maxSearches = 1U << ctx->params.searchLog; const U32 sufficient_len = ctx->params.targetLength; @@ -1044,12 +1044,12 @@ _storeSequence: // cur, last_pos, best_mlen, best_off have to be set break; } else { const BYTE* repMatch = dictBase + ((anchor-base) - rep_2); - if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) + if ((repMatch + minMatch <= dictEnd) && (MEM_readMINMATCH(anchor, minMatch) == MEM_readMINMATCH(repMatch, minMatch))) mlen = (U32)ZSTD_count_2segments(anchor+minMatch, repMatch+minMatch, iend, dictEnd, prefixStart) + minMatch; else break; } - + offset = rep_2; rep_2 = rep_1; rep_1 = offset; /* swap offset history */ ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2); ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-minMatch); From be4605d851ba2e9a2be5bb27999c2018f4c0db6b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 21 Mar 2016 14:29:26 +0100 Subject: [PATCH 06/45] improved decompression speed --- lib/zstd_decompress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 2c1acab5..2c3f560f 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -818,8 +818,9 @@ static size_t ZSTD_decompressSequences( FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; nbSeq--) { + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { size_t oneSeqSize; + nbSeq--; ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); From fadda6c875f5eb20a16c1f97d8589c8e850e99f2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 22 Mar 2016 12:14:26 +0100 Subject: [PATCH 07/45] first prototype with ML encoding scheme (but not yet decoding scheme) --- lib/zstd_compress.c | 289 ++++++++++++++++++++------------------------ lib/zstd_internal.h | 19 ++- programs/bench.c | 2 +- 3 files changed, 148 insertions(+), 162 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index f098c02a..d0cd50ea 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -184,7 +184,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 10*maxNbSeq; + const size_t tokenSpace = blockSize + 12*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; @@ -222,11 +222,12 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); - zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.litLengthStart + maxNbSeq); - zc->seqStore.offCodeStart = zc->seqStore.llCodeStart + maxNbSeq; + zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); + zc->seqStore.llCodeStart = (BYTE*) (zc->seqStore.matchLengthStart + maxNbSeq); + zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; + zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.matchLengthStart = zc->seqStore.litStart + blockSize; - zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + maxNbSeq; + zc->seqStore.dumpsStart = zc->seqStore.litStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ U16* const llTable = seqStorePtr->litLengthStart; - const BYTE* const mlTable = seqStorePtr->matchLengthStart; + U16* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; const U32* const offsetTableEnd = seqStorePtr->offset; BYTE* const offCodeTable = seqStorePtr->offCodeStart; BYTE* const llCodeTable = seqStorePtr->llCodeStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; + static U32 blockNb = 0; + blockNb++; + + if (blockNb==79) + blockNb += !nbSeq; + /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; @@ -644,88 +652,106 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24 }; - const BYTE deltaCode = 19; - size_t i; - for (i=0; ilitLengthLong; llTable[i] = (U16)ll; } - llCodeTable[i] = (ll>63) ? ZSTD_highbit(ll) + deltaCode : LL_Code[ll]; + const BYTE LL_deltaCode = 19; + size_t u; + for (u=0; ulongLength; llTable[u] = (U16)ll; } + llCodeTable[u] = (ll>63) ? ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; } } /* CTable for Literal Lengths */ - { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { - FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); - LLtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); - LLtype = FSE_ENCODING_DYNAMIC; - }} + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); + LLtype = FSE_ENCODING_RAW; + } else { + size_t NCountSize; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = FSE_ENCODING_DYNAMIC; + } } /* Offset codes */ { size_t i; for (i=0; i 2)) { - *op++ = offCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (Offbits-1)))) { - FSE_buildCTable_raw(CTable_OffsetBits, Offbits); - Offtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); - Offtype = FSE_ENCODING_DYNAMIC; - }} + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = offCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + Offtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (Offbits-1)))) { + FSE_buildCTable_raw(CTable_OffsetBits, Offbits); + Offtype = FSE_ENCODING_RAW; + } else { + size_t NCountSize; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = FSE_ENCODING_DYNAMIC; + } } + + /* ML codes */ + { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + const BYTE ML_deltaCode = 36; + size_t u; + for (u=0; ulongLength; mlTable[u] = (U16)ml; } + mlCodeTable[u] = (ml>127) ? ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + } } /* CTable for MatchLengths */ - { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast(count, &max, mlTable, nbSeq); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = FSE_ENCODING_RLE; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = FSE_ENCODING_STATIC; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (MLbits-1)))) { - FSE_buildCTable_raw(CTable_MatchLength, MLbits); - MLtype = FSE_ENCODING_RAW; - } else { - size_t NCountSize; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - FSE_normalizeCount(norm, tableLog, count, nbSeq, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; - FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); - MLtype = FSE_ENCODING_DYNAMIC; - }} + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = *mlTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = FSE_ENCODING_RLE; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + MLtype = FSE_ENCODING_STATIC; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { + FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); + MLtype = FSE_ENCODING_RAW; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = FSE_ENCODING_DYNAMIC; + } } seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); zc->flagStaticTables = 0; @@ -739,55 +765,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); } /* not enough space remaining */ -#if 1 /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlTable[nbSeq-1]); + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); BIT_flushBits(&blockStream); - { size_t n; - for (n=nbSeq-2; n=0; i--) { - const BYTE mlCode = mlTable[i]; - const U32 offset = offsetTable[i]; - const BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/ - const U32 nbBits = (offCode-1) + (!offCode); - const BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 17 */ /* 17 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 17 */ /* 27 */ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 26 */ /* 36 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 62 */ /* 24 bits max in 32-bits mode */ - BIT_flushBits(&blockStream); /* 7 */ /* 7 */ - }} -#endif // 0 + { size_t n; + for (n=nbSeq-2 ; n= maxCSize) return 0; - } + { size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + if ((size_t)(op-ostart) >= maxCSize) return 0; } return op - ostart; } @@ -836,44 +840,15 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B seqStorePtr->lit += litLength; /* literal Length */ -#if 1 - if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->litLengthLong = (U32)litLength; } + if (litLength>=65535) { *(seqStorePtr->litLength++) = 65535; seqStorePtr->longLength = (U32)litLength; } else *seqStorePtr->litLength++ = (U16)litLength; -#else - if (litLength >= MaxLL) { - *(seqStorePtr->litLength++) = MaxLL; - if (litLength<255 + MaxLL) { - *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL); - } else { - *(seqStorePtr->dumps++) = 255; - if (litLength < (1<<15)) { - MEM_writeLE16(seqStorePtr->dumps, (U16)(litLength<<1)); - seqStorePtr->dumps += 2; - } else { - MEM_writeLE32(seqStorePtr->dumps, (U32)((litLength<<1)+1)); - seqStorePtr->dumps += 3; - } } } - else *(seqStorePtr->litLength++) = (BYTE)litLength; -#endif // 0 /* match offset */ *(seqStorePtr->offset++) = (U32)offsetCode; /* match Length */ - if (matchCode >= MaxML) { - *(seqStorePtr->matchLength++) = MaxML; - if (matchCode < 255+MaxML) { - *(seqStorePtr->dumps++) = (BYTE)(matchCode - MaxML); - } else { - *(seqStorePtr->dumps++) = 255; - if (matchCode < (1<<15)) { - MEM_writeLE16(seqStorePtr->dumps, (U16)(matchCode<<1)); - seqStorePtr->dumps += 2; - } else { - MEM_writeLE32(seqStorePtr->dumps, (U32)((matchCode<<1)+1)); - seqStorePtr->dumps += 3; - } } } - else *(seqStorePtr->matchLength++) = (BYTE)matchCode; + if (matchCode>=65535) { *(seqStorePtr->matchLength++) = 65535; seqStorePtr->longLength = (U32)matchCode; } + else *seqStorePtr->matchLength++ = (U16)matchCode; } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 51784833..3b83059b 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -105,7 +105,7 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MLbits 7 #define Offbits 5 #define MaxLit ((1< Date: Tue, 22 Mar 2016 23:19:28 +0100 Subject: [PATCH 08/45] first working version with both encoder and decode alternate LL + ML coding scheme. decompression speed highly impacted --- lib/zstd_compress.c | 44 ++++------------- lib/zstd_decompress.c | 107 ++++++++++++++++++++++-------------------- lib/zstd_internal.h | 7 +-- lib/zstd_opt.h | 2 +- programs/bench.c | 6 +-- 5 files changed, 71 insertions(+), 95 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index d0cd50ea..81db040c 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -80,7 +80,6 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->lit = ssPtr->litStart; ssPtr->litLength = ssPtr->litLengthStart; ssPtr->matchLength = ssPtr->matchLengthStart; - ssPtr->dumps = ssPtr->dumpsStart; } @@ -184,14 +183,14 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 12*maxNbSeq; + const size_t tokenSpace = blockSize + 11*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((1<seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.dumpsStart = zc->seqStore.litStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.litStart + blockSize)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); zc->seqStore.litLengthSum = 0; @@ -599,12 +597,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; - static U32 blockNb = 0; - blockNb++; - - if (blockNb==79) - blockNb += !nbSeq; - /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; @@ -620,25 +612,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; if (nbSeq==0) goto _check_compressibility; - /* dumps : contains rests of large lengths */ - if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/) - return ERROR(dstSize_tooSmall); - seqHead = op; - { size_t const dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; - if (dumpsLength < 512) { - op[0] = (BYTE)(dumpsLength >> 8); - op[1] = (BYTE)(dumpsLength); - op += 2; - } else { - op[0] = 2; - op[1] = (BYTE)(dumpsLength>>8); - op[2] = (BYTE)(dumpsLength); - op += 3; - } - if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall); - memcpy(op, seqStorePtr->dumpsStart, dumpsLength); - op += dumpsLength; - } + /* seqHead : flags for FSE encoding type */ + seqHead = op++; #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 @@ -714,7 +689,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* ML codes */ { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 31, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, @@ -753,7 +728,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, MLtype = FSE_ENCODING_DYNAMIC; } } - seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); zc->flagStaticTables = 0; /* Encoding Sequences */ @@ -791,6 +766,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); + //if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -824,7 +800,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 198618400) && (pos < 198618500)) + if ((pos > 10354000) && (pos < 10355000)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index b1f51561..96b8846c 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -559,9 +559,37 @@ FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, } -size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, - FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, - const void* src, size_t srcSize) +FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, + const void* src, size_t srcSize) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog); + return 0; + case FSE_ENCODING_STATIC: + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + +size_t ZSTD_decodeSeqHeaders(int* nbSeq, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) { const BYTE* const istart = (const BYTE* const)src; const BYTE* ip = istart; @@ -585,26 +613,13 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; - { size_t dumpsLength; - if (*ip & 2) { - dumpsLength = ip[2]; - dumpsLength += ip[1] << 8; - ip += 3; - } else { - dumpsLength = ip[1]; - dumpsLength += (ip[0] & 1) << 8; - ip += 2; - } - *dumpsPtr = ip; - ip += dumpsLength; - *dumpsLengthPtr = dumpsLength; - } + ip++; /* check */ if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, 35, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -612,7 +627,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MLbits, MLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -633,8 +648,6 @@ typedef struct { FSE_DState_t stateOffb; FSE_DState_t stateML; size_t prevOffset; - const BYTE* dumps; - const BYTE* dumpsEnd; } seqState_t; @@ -662,31 +675,26 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ } - /* Literal length update */ + { static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + U32 const mlCode = FSE_peakSymbol(&(seqState->stateML)); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; + } + + /* ANS update */ FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - /* MatchLength */ - { size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); - const BYTE* dumps = seqState->dumps; - if (matchLength == MaxML) { - const BYTE* const de = seqState->dumpsEnd; - const U32 add = *dumps++; - if (add < 255) matchLength += add; - else { - matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ - if (matchLength&1) matchLength>>=1, dumps += 3; - else matchLength = (U16)(matchLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ - } - matchLength += mls; - seq->matchLength = matchLength; - seqState->dumps = dumps; - } + FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + + FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); #if 0 /* debug */ { @@ -781,12 +789,10 @@ static size_t ZSTD_decompressSequences( BYTE* const ostart = (BYTE* const)dst; BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; - size_t dumpsLength; const BYTE* litPtr = dctx->litPtr; const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; const BYTE* const litEnd = litPtr + dctx->litSize; int nbSeq; - const BYTE* dumps; U32* DTableLL = dctx->LLTable; U32* DTableML = dctx->MLTable; U32* DTableOffb = dctx->OffTable; @@ -796,7 +802,7 @@ static size_t ZSTD_decompressSequences( const U32 mls = dctx->fParams.mml; /* Build Decoding Tables */ - { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, ip, seqSize); if (ZSTD_isError(errorCode)) return errorCode; @@ -810,8 +816,6 @@ static size_t ZSTD_decompressSequences( memset(&sequence, 0, sizeof(sequence)); sequence.offset = REPCODE_STARTVALUE; - seqState.dumps = dumps; - seqState.dumpsEnd = dumps + dumpsLength; seqState.prevOffset = REPCODE_STARTVALUE; { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); if (ERR_isError(errorCode)) return ERROR(corruption_detected); } @@ -825,7 +829,7 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 198618400) && (pos < 198618500)) + if ((pos > 10354000) && (pos < 10355000)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } @@ -867,17 +871,16 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; - size_t litCSize; if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength); /* Decode literals sub-block */ - litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; } return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3b83059b..fa6c93ca 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -102,16 +102,15 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define HASHLOG3 17 #define Litbits 8 -#define MLbits 7 #define Offbits 5 #define MaxLit ((1<litLengthSum == 0) { ssPtr->litSum = (2<litLengthSum = MaxLL+1; - ssPtr->matchLengthSum = (1<matchLengthSum = MaxML+1; ssPtr->offCodeSum = (1<matchSum = (2< /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ -#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* clock_t, clock, CLOCKS_PER_SEC */ /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) @@ -65,7 +65,7 @@ #include "mem.h" #include "zstd_static.h" #include "xxhash.h" -#include "datagen.h" /* RDG_genBuffer */ +#include "datagen.h" /* RDG_genBuffer */ /* ************************************* @@ -283,7 +283,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#if 0 +#if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From add08d6f61c69a5ea27ff4ced268585d2baca654 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 01:32:41 +0100 Subject: [PATCH 09/45] minor variation - DSpeed at 640 --- lib/bitstream.h | 7 ++-- lib/fse_static.h | 33 +++++++++-------- lib/zdict.c | 3 ++ lib/zstd_compress.c | 24 +++++-------- lib/zstd_decompress.c | 83 +++++++++++++------------------------------ 5 files changed, 58 insertions(+), 92 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 0fe36eae..d90c9b24 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -246,8 +246,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) /*! BIT_initDStream() : * Initialize a BIT_DStream_t. * `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcBuffer` must point at the beginning of a bitStream. -* `srcSize` must be the exact size of the bitStream, in bytes. +* `srcSize` must be the *exact* size of the bitStream, in bytes. * @return : size of stream (== srcSize) or an errorCode if a problem is detected */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) @@ -293,7 +292,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); @@ -301,7 +300,7 @@ MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) /*! BIT_lookBitsFast() : * unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) { U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); diff --git a/lib/fse_static.h b/lib/fse_static.h index ca303db8..f3c3d44e 100644 --- a/lib/fse_static.h +++ b/lib/fse_static.h @@ -267,7 +267,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt BIT_flushBits(bitC); } -/* decompression */ +/*<===== Decompression =====>*/ typedef struct { U16 tableLog; @@ -290,34 +290,39 @@ MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, con DStatePtr->table = dt + 1; } -MEM_STATIC size_t FSE_getStateValue(FSE_DState_t* DStatePtr) +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) { - return DStatePtr->state; + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; } -MEM_STATIC BYTE FSE_peakSymbol(FSE_DState_t* DStatePtr) +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; } MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = BIT_readBits(bitD, nbBits); + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; return symbol; } +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) { - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = BIT_readBitsFast(bitD, nbBits); + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); DStatePtr->state = DInfo.newState + lowBits; return symbol; diff --git a/lib/zdict.c b/lib/zdict.c index 4c1ffb08..c99cabe1 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -611,10 +611,13 @@ static void ZDICT_countEStats(EStats_ress_t esr, if (*u32Ptr==0) offcode=0; offsetcodeCount[offcode]++; } + (void)matchlengthCount; (void)litlengthCount; + /* for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++) matchlengthCount[*bytePtr]++; for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++) litlengthCount[*bytePtr]++; + */ } static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 81db040c..5ae6d37d 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -648,14 +648,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); LLtype = FSE_ENCODING_RAW; } else { - size_t NCountSize; size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } FSE_buildCTable(CTable_LitLength, norm, max, tableLog); LLtype = FSE_ENCODING_DYNAMIC; } } @@ -675,14 +674,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_buildCTable_raw(CTable_OffsetBits, Offbits); Offtype = FSE_ENCODING_RAW; } else { - size_t NCountSize; size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return ERROR(GENERIC); - op += NCountSize; + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); Offtype = FSE_ENCODING_DYNAMIC; } } @@ -744,8 +742,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); BIT_flushBits(&blockStream); @@ -757,16 +755,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, const U32 nbBits = (offCode-1) + (!offCode); const BYTE LLCode = llCodeTable[n]; /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */ FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* 7 */ - //BIT_flushBits(&blockStream); /* 7 */ /* 7 */ - BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ + BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); - //if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 96b8846c..ee57b853 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -503,7 +503,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, @return : nb bytes read from src, or an error code if it fails, testable with ZSTD_isError() */ -FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, +FORCE_INLINE size_t ZSTD_buildSeqTableOff(FSE_DTable* DTable, U32 type, U32 rawBits, U32 maxLog, const void* src, size_t srcSize) { U32 max = (1< max, data is corrupted */ return 1; case FSE_ENCODING_RAW : - FSE_buildDTable(DTable, LL_defaultNorm, max, LL_defaultNormLog); - return 0; - case FSE_ENCODING_STATIC: - return 0; - default : /* impossible */ - case FSE_ENCODING_DYNAMIC : - { U32 tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) return ERROR(corruption_detected); - if (tableLog > maxLog) return ERROR(corruption_detected); - FSE_buildDTable(DTable, norm, max, tableLog); - return headerSize; - } } -} - - -FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, - const void* src, size_t srcSize) -{ - switch(type) - { - case FSE_ENCODING_RLE : - if (!srcSize) return ERROR(srcSize_wrong); - if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); - FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ - return 1; - case FSE_ENCODING_RAW : - FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog); + FSE_buildDTable(DTable, defaultNorm, max, defaultLog); return 0; case FSE_ENCODING_STATIC: return 0; @@ -619,15 +592,15 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTable(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -654,7 +627,7 @@ typedef struct { static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { /* Literal length */ - U32 const litCode = FSE_peakSymbol(&(seqState->stateLL)); + U32 const litCode = FSE_peekSymbol(&(seqState->stateLL)); { static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, @@ -662,13 +635,23 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]); } + /* MatchLength */ + { static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; + } + /* Offset */ { static const U32 offsetPrefix[MaxOff+1] = { 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - U32 const offsetCode = FSE_peakSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + U32 const offsetCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ U32 const nbBits = offsetCode ? offsetCode-1 : 0; size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) : litCode ? seq->offset : seqState->prevOffset; @@ -677,33 +660,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); } - { static const U32 ML_base[MaxML+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, - 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - U32 const mlCode = FSE_peakSymbol(&(seqState->stateML)); - seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; - } - - /* ANS update */ - FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ + /* ANS state update */ + FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + FSE_updateState(&(seqState->stateML), &(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */ + FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - -#if 0 /* debug */ - { - static U64 totalDecoded = 0; - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", - (U32)(totalDecoded), (U32)litLength, (U32)matchLength, (U32)offset); - totalDecoded += litLength + matchLength; - } -#endif } From 9c34df93b686531449df97ed78a8eb2b39e2c251 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 01:54:25 +0100 Subject: [PATCH 10/45] new decodeSequence, merging parts --- lib/zstd_decompress.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index ee57b853..a854aaf7 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -627,48 +627,45 @@ typedef struct { static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) { /* Literal length */ - U32 const litCode = FSE_peekSymbol(&(seqState->stateLL)); - { static const U32 LL_base[MaxLL+1] = { + U32 const llCode = FSE_peekSymbol(&(seqState->stateLL)); + U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); + U32 const ofCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode ? ofCode-1 : 0; + + static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - seq->litLength = LL_base[litCode] + BIT_readBits(&(seqState->DStream), LL_bits[litCode]); - } - /* MatchLength */ - { static const U32 ML_base[MaxML+1] = { + static const U32 ML_base[MaxML+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; - U32 const mlCode = FSE_peekSymbol(&(seqState->stateML)); - seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; - } - /* Offset */ - { static const U32 offsetPrefix[MaxOff+1] = { + static const U32 OF_base[MaxOff+1] = { 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - U32 const offsetCode = FSE_peekSymbol(&(seqState->stateOffb)); /* <= maxOff, by table construction */ - U32 const nbBits = offsetCode ? offsetCode-1 : 0; - size_t const offset = offsetCode ? offsetPrefix[offsetCode] + BIT_readBits(&(seqState->DStream), nbBits) : - litCode ? seq->offset : seqState->prevOffset; - if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ + + seq->litLength = LL_base[llCode] + BIT_readBits(&(seqState->DStream), llBits); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), mlBits) + mls; + + /* Offset */ + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : + llCode ? seq->offset : seqState->prevOffset; + if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); } /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_updateState(&(seqState->stateML), &(seqState->DStream)); - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); - if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); } From fd6922508695c64d850aafe676e2f94ca12f1501 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 02:47:33 +0100 Subject: [PATCH 11/45] bench : added HIGH_PRIORTY --- programs/bench.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index bc28e460..905f84ed 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -53,13 +53,17 @@ /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) -# include +# include /* sleep */ +# include /* setpriority */ # define BMK_sleep(s) sleep(s) +# define HIGH_PRIORITY setpriority(PRIO_PROCESS, 0, -20) #elif defined(_WIN32) # include # define BMK_sleep(s) Sleep(1000*s) +# define HIGH_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); #else # define BMK_sleep(s) /* disabled */ +# define HIGH_PRIORITY #endif #include "mem.h" @@ -207,6 +211,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* init */ if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ + HIGH_PRIORITY; /* Init blockTable data */ { const char* srcPtr = (const char*)srcBuffer; From afab02098ac3c25ed7c689336dbd2b5d2faeaf97 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 13:57:49 +0100 Subject: [PATCH 12/45] improved decoding speed (660) --- lib/bitstream.h | 12 ++++++++++-- lib/zstd_decompress.c | 8 +++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index d90c9b24..af9151ad 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -285,6 +285,14 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } +MEM_STATIC size_t BIT_consumeFirstBits(size_t* bitDPtr, U32 const nbBits) +{ + static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + size_t const result = *bitDPtr & mask[nbBits]; + *bitDPtr >>= nbBits; + return result; +} + /*! BIT_lookBits() : * Provides next n bits from local register. * local register is not modified (bits are still present for next read/look). @@ -318,7 +326,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) */ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) { - size_t value = BIT_lookBits(bitD, nbBits); + size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); return value; } @@ -327,7 +335,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) * unsafe version; only works only if nbBits >= 1 */ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) { - size_t value = BIT_lookBitsFast(bitD, nbBits); + size_t const value = BIT_lookBitsFast(bitD, nbBits); BIT_skipBits(bitD, nbBits); return value; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index a854aaf7..92af2b35 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -652,16 +652,18 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - seq->litLength = LL_base[llCode] + BIT_readBits(&(seqState->DStream), llBits); - seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), mlBits) + mls; + size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); /* Offset */ - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_consumeFirstBits(&allBits, ofBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } + seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; + seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); + /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); FSE_updateState(&(seqState->stateML), &(seqState->DStream)); From 2512597576d4a27608d1135bad2d49da122b3141 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 14:00:09 +0100 Subject: [PATCH 13/45] minor reordering (DSpeed 665) --- lib/zstd_compress.c | 8 ++++---- lib/zstd_decompress.c | 11 +++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 5ae6d37d..e2d42faf 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -742,9 +742,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); - BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); BIT_flushBits(&blockStream); { size_t n; @@ -758,9 +758,9 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 25 */ /* 35 */ FSE_encodeSymbol(&blockStream, &stateMatchLength, MLCode); /* 17 */ /* 17 */ FSE_encodeSymbol(&blockStream, &stateLitLength, LLCode); /* 16 */ /* 26 */ - BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ - BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); + BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); + BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 92af2b35..1de3da1e 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -635,6 +635,8 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const mlBits = ML_bits[mlCode]; U32 const ofBits = ofCode ? ofCode-1 : 0; + size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); + static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, @@ -652,18 +654,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; - size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); - - /* Offset */ + /* sequence */ + seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); + seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; { size_t const offset = ofCode ? OF_base[ofCode] + BIT_consumeFirstBits(&allBits, ofBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } - seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; - seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); - /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); FSE_updateState(&(seqState->stateML), &(seqState->DStream)); From 3c017867decd1dcedfc9d8699bb6daa89e4d1434 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 14:09:51 +0100 Subject: [PATCH 14/45] minor optimization (DSpeed 665) --- lib/bitstream.h | 11 +++++++++++ lib/zstd_decompress.c | 8 ++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index af9151ad..f2ed51bc 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -285,6 +285,17 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } +MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) +{ + return bitD >> start; +} + +MEM_STATIC size_t BIT_getNBits(size_t bitD, U32 const nbBits, U32 const start) +{ + static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + return (bitD >> start) & mask[nbBits]; +} + MEM_STATIC size_t BIT_consumeFirstBits(size_t* bitDPtr, U32 const nbBits) { static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 1de3da1e..3abd6bbc 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -635,7 +635,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const mlBits = ML_bits[mlCode]; U32 const ofBits = ofCode ? ofCode-1 : 0; - size_t allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); + size_t const allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -655,9 +655,9 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; /* sequence */ - seq->litLength = LL_base[llCode] + BIT_consumeFirstBits(&allBits, llBits); - seq->matchLength = ML_base[mlCode] + BIT_consumeFirstBits(&allBits, mlBits) + mls; - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_consumeFirstBits(&allBits, ofBits) : + seq->litLength = LL_base[llCode] + BIT_getNBits(allBits, llBits, 0); + seq->matchLength = ML_base[mlCode] + BIT_getNBits(allBits, mlBits, llBits) + mls; + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_getUpperBits(allBits, llBits+mlBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; From 6cf45dac81a7e1c2168d6effdfab7bcc7995c1bb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 14:18:37 +0100 Subject: [PATCH 15/45] shared const BIT_mask --- lib/bitstream.h | 20 +++++++++----------- lib/zstd_decompress.c | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index f2ed51bc..62d2cb2b 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -141,7 +141,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); /*-************************************************************** -* Helper functions +* Internal functions ****************************************************************/ MEM_STATIC unsigned BIT_highbit32 (register U32 val) { @@ -165,6 +165,9 @@ MEM_STATIC unsigned BIT_highbit32 (register U32 val) # endif } +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + /*-************************************************************** * bitStream encoding @@ -189,8 +192,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t ds Does not check for register overflow ! */ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos; + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; bitC->bitPos += nbBits; } @@ -290,18 +292,14 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) return bitD >> start; } -MEM_STATIC size_t BIT_getNBits(size_t bitD, U32 const nbBits, U32 const start) +MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - return (bitD >> start) & mask[nbBits]; + return (bitD >> start) & BIT_mask[nbBits]; } -MEM_STATIC size_t BIT_consumeFirstBits(size_t* bitDPtr, U32 const nbBits) +MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) { - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ - size_t const result = *bitDPtr & mask[nbBits]; - *bitDPtr >>= nbBits; - return result; + return bitD & BIT_mask[nbBits]; } /*! BIT_lookBits() : diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 3abd6bbc..7749ffd3 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -655,8 +655,8 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; /* sequence */ - seq->litLength = LL_base[llCode] + BIT_getNBits(allBits, llBits, 0); - seq->matchLength = ML_base[mlCode] + BIT_getNBits(allBits, mlBits, llBits) + mls; + seq->litLength = LL_base[llCode] + BIT_getLowerBits(allBits, llBits); + seq->matchLength = ML_base[mlCode] + mls + BIT_getMiddleBits(allBits, mlBits, llBits); { size_t const offset = ofCode ? OF_base[ofCode] + BIT_getUpperBits(allBits, llBits+mlBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ From 862a85976f84f0d6b2ef0f4ae21e0975d2ca3af4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 18:45:23 +0100 Subject: [PATCH 16/45] Added BMI instructions --- lib/bitstream.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 62d2cb2b..749dc02f 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -292,9 +292,14 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) return bitD >> start; } +#include MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { +#if defined(__BMI__) && defined(__GNUC__) + return __builtin_ia32_bextr_u64(bitD, (nbBits<<8) | start ); +#else return (bitD >> start) & BIT_mask[nbBits]; +#endif } MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) @@ -309,10 +314,15 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) + MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { +#if defined(__BMI__) && defined(__GNUC__) + return __builtin_ia32_bextr_u64(bitD->bitContainer, (nbBits<<8) | (64 - bitD->bitsConsumed - nbBits) ); +#else U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); + //return (bitD->bitContainer >> (64 - bitD->bitsConsumed - nbBits)) & BIT_mask[nbBits]; +#endif } /*! BIT_lookBitsFast() : From 72d706a020a3d3a1ed1ad214783e732ccad00ed9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 20:44:12 +0100 Subject: [PATCH 17/45] fixed crash at -O3 with customized block size (wrong alignment) --- lib/zstd_compress.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index e2d42faf..121a9fcd 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -219,6 +219,16 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->params = params; zc->blockSize = blockSize; + if (params.strategy == ZSTD_btopt) { + zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); + zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); + zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); + zc->seqStore.buffer = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + zc->seqStore.litLengthSum = 0; + } zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer); zc->seqStore.litLengthStart = (U16*) (void*)(zc->seqStore.offsetStart + maxNbSeq); zc->seqStore.matchLengthStart = (U16*) (void*)(zc->seqStore.litLengthStart + maxNbSeq); @@ -226,15 +236,6 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.litStart + blockSize)); - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); - zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); - zc->seqStore.litLengthSum = 0; - } zc->hbSize = 0; zc->stage = 0; @@ -632,7 +633,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, for (u=0; ulongLength; llTable[u] = (U16)ll; } - llCodeTable[u] = (ll>63) ? ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; + llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; } } /* CTable for Literal Lengths */ @@ -699,14 +700,14 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, for (u=0; ulongLength; mlTable[u] = (U16)ml; } - mlCodeTable[u] = (ml>127) ? ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; } } /* CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlTable; + *op++ = *mlCodeTable; FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); MLtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { From 7cbe79ab25cb3381fb2e249de8d6422d8b62565f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 23 Mar 2016 22:31:57 +0100 Subject: [PATCH 18/45] fixed write overflow found by fuzzer --- lib/zstd_compress.c | 67 +++++++++++++++++++++++---------------------- programs/fuzzer.c | 4 +-- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 121a9fcd..8d2eff0b 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -589,7 +589,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, U16* const mlTable = seqStorePtr->matchLengthStart; const U32* const offsetTable = seqStorePtr->offsetStart; const U32* const offsetTableEnd = seqStorePtr->offset; - BYTE* const offCodeTable = seqStorePtr->offCodeStart; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; BYTE* const llCodeTable = seqStorePtr->llCodeStart; BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; BYTE* const ostart = (BYTE*)dst; @@ -607,7 +607,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } /* Sequences Header */ - if ((oend-op) < MIN_SEQUENCES_SIZE) return ERROR(dstSize_tooSmall); + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; @@ -661,12 +661,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } } /* Offset codes */ - { size_t i; for (i=0; i 2)) { - *op++ = offCodeTable[0]; + *op++ = ofCodeTable[0]; FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); Offtype = FSE_ENCODING_RLE; } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { @@ -677,7 +677,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } else { size_t nbSeq_1 = nbSeq; const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[offCodeTable[nbSeq-1]]>1) { count[offCodeTable[nbSeq-1]]--; nbSeq_1--; } + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); @@ -741,27 +741,30 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* first symbols */ FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, offCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); - BIT_addBits(&blockStream, offsetTable[nbSeq-1], offCodeTable[nbSeq-1] ? (offCodeTable[nbSeq-1]-1) : 0); + BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1] ? (ofCodeTable[nbSeq-1]-1) : 0); BIT_flushBits(&blockStream); { size_t n; for (n=nbSeq-2 ; n 64 - 7 - 27) + BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, llTable[n], llBits); + BIT_addBits(&blockStream, mlTable[n], mlBits); + BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -1997,7 +2000,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc, static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, - void* dst, size_t dstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame) { @@ -2006,10 +2009,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, if (frame && (zc->stage==0)) { hbSize = zc->hbSize; - if (dstSize <= hbSize) return ERROR(dstSize_tooSmall); + if (dstCapacity <= hbSize) return ERROR(dstSize_tooSmall); zc->stage = 1; memcpy(dst, zc->headerBuffer, hbSize); - dstSize -= hbSize; + dstCapacity -= hbSize; dst = (char*)dst + hbSize; } @@ -2048,8 +2051,8 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, zc->nextSrc = ip + srcSize; { size_t const cSize = frame ? - ZSTD_compress_generic (zc, dst, dstSize, src, srcSize) : - ZSTD_compressBlock_internal (zc, dst, dstSize, src, srcSize); + ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : + ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); if (ZSTD_isError(cSize)) return cSize; return cSize + hbSize; } @@ -2057,10 +2060,10 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, size_t ZSTD_compressContinue (ZSTD_CCtx* zc, - void* dst, size_t dstSize, + void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_compressContinue_internal(zc, dst, dstSize, src, srcSize, 1); + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); } @@ -2283,18 +2286,18 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, /* Init */ { size_t const errorCode = ZSTD_compressBegin_advanced(ctx, dict, dictSize, params); - if(ZSTD_isError(errorCode)) return errorCode; } + if(ZSTD_isError(errorCode)) return errorCode; } /* body (compression) */ { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); - if(ZSTD_isError(oSize)) return oSize; - op += oSize; - dstCapacity -= oSize; } + if(ZSTD_isError(oSize)) return oSize; + op += oSize; + dstCapacity -= oSize; } /* Close frame */ { size_t const oSize = ZSTD_compressEnd(ctx, op, dstCapacity); - if(ZSTD_isError(oSize)) return oSize; - op += oSize; } + if(ZSTD_isError(oSize)) return oSize; + op += oSize; } return (op - ostart); } diff --git a/programs/fuzzer.c b/programs/fuzzer.c index d53586c1..29bf4861 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -488,12 +488,12 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 maxDuration, doub if (cSize > 3) { const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */ const size_t tooSmallSize = cSize - missing; - static const U32 endMark = 0x4DC2B1A9; + const U32 endMark = 0x4DC2B1A9; memcpy(dstBuffer+tooSmallSize, &endMark, 4); errorCode = ZSTD_compressCCtx(ctx, dstBuffer, tooSmallSize, sampleBuffer, sampleSize, cLevel); CHECK(!ZSTD_isError(errorCode), "ZSTD_compressCCtx should have failed ! (buffer too small : %u < %u)", (U32)tooSmallSize, (U32)cSize); { U32 endCheck; memcpy(&endCheck, dstBuffer+tooSmallSize, 4); - CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } + CHECK(endCheck != endMark, "ZSTD_compressCCtx : dst buffer overflow"); } } /* frame header decompression test */ From b21ce15efe3175564ddda8bbdf287fd73474645c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 24 Mar 2016 01:27:55 +0100 Subject: [PATCH 19/45] minor variables isolation --- lib/bitstream.h | 25 +++++------- lib/zstd_decompress.c | 95 ++++++++++++++++++++----------------------- 2 files changed, 56 insertions(+), 64 deletions(-) diff --git a/lib/bitstream.h b/lib/bitstream.h index 749dc02f..40400680 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -122,7 +122,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. * A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. * Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream() +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). */ @@ -256,15 +256,13 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } if (srcSize >= sizeof(size_t)) { /* normal case */ - U32 contain32; bitD->start = (const char*)srcBuffer; bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); bitD->bitContainer = MEM_readLEST(bitD->ptr); - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } } else { - U32 contain32; bitD->start = (const char*)srcBuffer; bitD->ptr = bitD->start; bitD->bitContainer = *(const BYTE*)(bitD->start); @@ -278,9 +276,9 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; default:; } - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ - bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(lastByte); } bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; } @@ -295,7 +293,7 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) #include MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { -#if defined(__BMI__) && defined(__GNUC__) +#if defined(__BMI__) && defined(__GNUC__) /* experimental */ return __builtin_ia32_bextr_u64(bitD, (nbBits<<8) | start ); #else return (bitD >> start) & BIT_mask[nbBits]; @@ -316,12 +314,11 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitD, U32 const nbBits) */ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { -#if defined(__BMI__) && defined(__GNUC__) +#if defined(__BMI__) && defined(__GNUC__) /* experimental */ return __builtin_ia32_bextr_u64(bitD->bitContainer, (nbBits<<8) | (64 - bitD->bitsConsumed - nbBits) ); #else U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); - //return (bitD->bitContainer >> (64 - bitD->bitsConsumed - nbBits)) & BIT_mask[nbBits]; #endif } @@ -339,8 +336,8 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) } /*! BIT_readBits() : - * Read next n bits from local register. - * pay attention to not read more than nbBits contained into local register. + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. * @return : extracted value. */ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 7749ffd3..b4a2184e 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -560,50 +560,51 @@ FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U3 } -size_t ZSTD_decodeSeqHeaders(int* nbSeq, +size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize) { const BYTE* const istart = (const BYTE* const)src; - const BYTE* ip = istart; const BYTE* const iend = istart + srcSize; - U32 LLtype, Offtype, MLtype; + const BYTE* ip = istart; /* check */ if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); /* SeqHead */ - *nbSeq = *ip++; - if (*nbSeq==0) return 1; - if (*nbSeq >= 0x7F) { - if (*nbSeq == 0xFF) - *nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - else - *nbSeq = ((nbSeq[0]-0x80)<<8) + *ip++; + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq >= 0x7F) { + if (nbSeq == 0xFF) + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + else + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + *nbSeqPtr = nbSeq; } /* FSE table descriptors */ - LLtype = *ip >> 6; - Offtype = (*ip >> 4) & 3; - MLtype = (*ip >> 2) & 3; - ip++; + { U32 const LLtype = *ip >> 6; + U32 const Offtype = (*ip >> 4) & 3; + U32 const MLtype = (*ip >> 2) & 3; + ip++; - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } - { size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } - { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog); - if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); - ip += bhSize; - } + /* Build DTables */ + { size_t const bhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTableOff(DTableOffb, Offtype, Offbits, OffFSELog, ip, iend-ip); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } + { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog); + if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); + ip += bhSize; + } } return ip-istart; } @@ -675,10 +676,8 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, const BYTE** litPtr, const BYTE* const litLimit_8, const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) { - static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ BYTE* const oLitEnd = op + sequence.litLength; - const size_t sequenceLength = sequence.litLength + sequence.matchLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ BYTE* const oend_8 = oend-8; const BYTE* const litEnd = *litPtr + sequence.litLength; @@ -687,7 +686,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* check */ if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ - if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */ + if (litEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */ /* copy Literals */ ZSTD_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */ @@ -697,8 +696,7 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - base)) { /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); match = dictEnd - (base-match); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -715,7 +713,9 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, /* match within prefix */ if (sequence.offset < 8) { /* close range match, overlap */ - const int sub2 = dec64table[sequence.offset]; + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ + int const sub2 = dec64table[sequence.offset]; op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; @@ -892,7 +892,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { size_t decodedSize=0; - size_t cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); + size_t const cBlockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties); if (ZSTD_isError(cBlockSize)) return cBlockSize; ip += ZSTD_blockHeaderSize; @@ -992,7 +992,6 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co { case ZSTDds_getFrameHeaderSize : { - /* get frame header size */ if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */ dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; @@ -1006,7 +1005,6 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co } case ZSTDds_decodeFrameHeader: { - /* get frame header */ size_t result; memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); @@ -1017,16 +1015,14 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co } case ZSTDds_decodeBlockHeader: { - /* Decode block header */ blockProperties_t bp; - size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(blockSize)) return blockSize; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; if (bp.blockType == bt_end) { dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; - } - else { - dctx->expected = blockSize; + } else { + dctx->expected = cBlockSize; dctx->bType = bp.blockType; dctx->stage = ZSTDds_decompressBlock; } @@ -1113,7 +1109,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* dict, size_t dictSiz static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { size_t eSize; - U32 magic = MEM_readLE32(dict); + U32 const magic = MEM_readLE32(dict); if (magic != ZSTD_DICT_MAGIC) { /* pure content mode */ ZSTD_refDictContent(dctx, dict, dictSize); @@ -1136,12 +1132,11 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - size_t errorCode; - errorCode = ZSTD_decompressBegin(dctx); - if (ZSTD_isError(errorCode)) return errorCode; + { size_t const errorCode = ZSTD_decompressBegin(dctx); + if (ZSTD_isError(errorCode)) return errorCode; } if (dict && dictSize) { - errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); + size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted); } From 646693e3be39ffeb40451499b7689edbd2ff8d1d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 24 Mar 2016 02:31:27 +0100 Subject: [PATCH 20/45] support for alternate offset (fusion) --- lib/zstd_compress.c | 14 +++++++------- lib/zstd_decompress.c | 19 +++++++++++-------- lib/zstd_internal.h | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 8d2eff0b..f79109a1 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -661,7 +661,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } } /* Offset codes */ - { size_t i; for (i=0; i 64 - 7 - 27) + if (ofBits + mlBits + llBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, llTable[n], llBits); BIT_addBits(&blockStream, mlTable[n], mlBits); @@ -798,8 +798,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 10354000) && (pos < 10355000)) - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + if ((pos > 23945000) && (pos < 23946800)) + printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 @@ -818,7 +818,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B else *seqStorePtr->litLength++ = (U16)litLength; /* match offset */ - *(seqStorePtr->offset++) = (U32)offsetCode; + *(seqStorePtr->offset++) = (U32)offsetCode + 1; /* match Length */ if (matchCode>=65535) { *(seqStorePtr->matchLength++) = 65535; seqStorePtr->longLength = (U32)matchCode; } diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index b4a2184e..48614c35 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -634,9 +634,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const llBits = LL_bits[llCode]; U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode ? ofCode-1 : 0; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; - size_t const allBits = BIT_readBits(&(seqState->DStream), llBits+mlBits+ofBits); + size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); + + if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -650,10 +653,10 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; static const U32 OF_base[MaxOff+1] = { - 1 /*fake*/, 1, 2, 4, 8, 0x10, 0x20, 0x40, - 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, - 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, - 0x800000, 0x1000000, 0x2000000, 0x4000000, /*fake*/ 1, 1, 1, 1 }; + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, + 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, + 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; /* sequence */ seq->litLength = LL_base[llCode] + BIT_getLowerBits(allBits, llBits); @@ -792,8 +795,8 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 10354000) && (pos < 10355000)) - printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", + if ((pos > 23945280) && (pos < 23946797)) + printf("Dpos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } #endif diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index fa6c93ca..4ce44399 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -110,7 +110,7 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ #define MLFSELog 9 #define LLFSELog 9 -#define OffFSELog 9 +#define OffFSELog 8 #define FSE_ENCODING_RAW 0 #define FSE_ENCODING_RLE 1 From 433a5cce7ec75f421da8bbb677d07e36dd55e467 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 25 Mar 2016 11:43:48 +0100 Subject: [PATCH 21/45] improved decompression speed (680) --- lib/zstd_compress.c | 6 +++--- lib/zstd_decompress.c | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index f79109a1..1c4d83c9 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -798,9 +798,9 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 23945000) && (pos < 23946800)) - printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + if ((pos > 15181500) && (pos < 15183150)) + printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 if (offsetCode == 0) seqStorePtr->realRepSum++; diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index 48614c35..f0b695a5 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -637,9 +637,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const ofBits = ofCode; U32 const totalBits = llBits+mlBits+ofBits; - size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); - - if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); + //size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, @@ -659,13 +657,15 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; /* sequence */ - seq->litLength = LL_base[llCode] + BIT_getLowerBits(allBits, llBits); - seq->matchLength = ML_base[mlCode] + mls + BIT_getMiddleBits(allBits, mlBits, llBits); - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_getUpperBits(allBits, llBits+mlBits) : + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : llCode ? seq->offset : seqState->prevOffset; if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } + seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); + seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); + + if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); /* ANS state update */ FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); @@ -795,7 +795,7 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 23945280) && (pos < 23946797)) + if ((pos > 15181500) && (pos < 15183150)) printf("Dpos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } From b58c685cf783268f34e010d36e138a41f4466aec Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 25 Mar 2016 20:29:35 +0100 Subject: [PATCH 22/45] fixed : fullbench link to deprecate function (wrong prototype) --- programs/fullbench.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/programs/fullbench.c b/programs/fullbench.c index b4afcf11..c5c56496 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -119,7 +119,7 @@ static clock_t BMK_clockSpan( clock_t clockStart ) static size_t BMK_findMaxMem(U64 requiredMem) { - const size_t step = 64 MB; + size_t const step = 64 MB; void* testmem = NULL; requiredMem = (((requiredMem >> 26) + 1) << 26); @@ -183,15 +183,13 @@ size_t local_ZSTD_decodeLiteralsBlock(void* dst, size_t dstSize, void* buff2, co } extern size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr); -extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize); +extern size_t ZSTD_decodeSeqHeaders(int* nbSeq, FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, const void* src, size_t srcSize); size_t local_ZSTD_decodeSeqHeaders(void* dst, size_t dstSize, void* buff2, const void* src, size_t srcSize) { U32 DTableML[FSE_DTABLE_SIZE_U32(10)], DTableLL[FSE_DTABLE_SIZE_U32(10)], DTableOffb[FSE_DTABLE_SIZE_U32(9)]; /* MLFSELog, LLFSELog and OffFSELog are not public values */ - const BYTE* dumps; - size_t length; int nbSeq; (void)src; (void)srcSize; (void)dst; (void)dstSize; - return ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &length, DTableLL, DTableML, DTableOffb, buff2, g_cSize); + return ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, buff2, g_cSize); } From a5b66e34c7c9601a56c2db127946db7da39fa700 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 01:48:27 +0100 Subject: [PATCH 23/45] minor variable isolation and remove a goto --- programs/bench.c | 130 ++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 64 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 905f84ed..2bde4c03 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -239,17 +239,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); /* Bench */ - { size_t cSize = 0; - double fastestC = 100000000., fastestD = 100000000.; - double ratio = 0.; - U64 crcCheck = 0; + { double fastestC = 100000000., fastestD = 100000000.; clock_t coolTime = clock(); U32 testNb; DISPLAY("\r%79s\r", ""); for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) { - int nbLoops; - clock_t clockStart, clockSpan; + size_t cSize; + double ratio = 0.; + clock_t clockStart; clock_t const clockLoop = g_nbIterations ? TIMELOOP_S * CLOCKS_PER_SEC : 10; /* overheat protection */ @@ -266,20 +264,21 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, clockStart = clock(); while (clock() == clockStart); clockStart = clock(); - - for (nbLoops = 0 ; BMK_clockSpan(clockStart) < clockLoop ; nbLoops++) { - U32 blockNb; - ZSTD_compressBegin_usingDict(refCtx, dictBuffer, dictBufferSize, cLevel); - for (blockNb=0; blockNb%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", testNb, displayName, (U32)srcSize, (U32)cSize, ratio, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC), (double)srcSize / 1000000. / (fastestD / CLOCKS_PER_SEC) ); /* CRC Checking */ -_findError: - crcCheck = XXH64(resultBuffer, srcSize, 0); - if (crcOrig!=crcCheck) { - size_t u; - DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); - for (u=0; u u) break; - bacc += blockTable[segNb].srcSize; + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if (crcOrig!=crcCheck) { + size_t u; + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u u) break; + bacc += blockTable[segNb].srcSize; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); + break; } - pos = (U32)(u - bacc); - bNb = pos / (128 KB); - printf("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); - break; - } - if (u==srcSize-1) { /* should never happen */ - printf("no difference detected\n"); - } } - break; - } /* if (crcOrig!=crcCheck) */ + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } } + break; + } } /* CRC Checking */ #endif } /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */ DISPLAY("%2i#\n", cLevel); @@ -407,23 +407,25 @@ static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) return total; } +/*! BMK_loadFiles() : + Loads `buffer` with content of files listed within `fileNamesTable`. + At most, fills `buffer` entirely */ static void BMK_loadFiles(void* buffer, size_t bufferSize, size_t* fileSizes, - const char** fileNamesTable, unsigned const nbFiles) + const char** fileNamesTable, unsigned nbFiles) { size_t pos = 0; unsigned n; for (n=0; n bufferSize-pos) fileSize = bufferSize-pos; - readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); - if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); - pos += readSize; + if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ + { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); + if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + pos += readSize; } fileSizes[n] = (size_t)fileSize; fclose(f); } From b9151406dece85691c52b09ce49b22e8477ee2a9 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 17:18:11 +0100 Subject: [PATCH 24/45] fixed 32-bits compatibility --- lib/zstd_compress.c | 35 ++++++++++++++++++++--------------- lib/zstd_decompress.c | 25 ++++++++++++++----------- programs/bench.c | 5 +++-- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 1c4d83c9..90857a82 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -744,38 +744,43 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); BIT_addBits(&blockStream, llTable[nbSeq-1], LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, mlTable[nbSeq-1], ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, offsetTable[nbSeq-1], ofCodeTable[nbSeq-1]); BIT_flushBits(&blockStream); { size_t n; for (n=nbSeq-2 ; n 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) - BIT_flushBits(&blockStream); + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits > 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ BIT_addBits(&blockStream, llTable[n], llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, mlTable[n], mlBits); - BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ - BIT_flushBits(&blockStream); /* 7 */ /* 7 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, offsetTable[n], ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ } } FSE_flushCState(&blockStream, &stateMatchLength); FSE_flushCState(&blockStream, &stateOffsetBits); FSE_flushCState(&blockStream, &stateLitLength); - { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; } - } + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } /* check compressibility */ _check_compressibility: @@ -798,8 +803,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 15181500) && (pos < 15183150)) - printf("Cpos %6u :%4u literals & match %3u bytes at distance %6u \n", + if ((pos > 200000000) && (pos < 200900000)) + printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif #if ZSTD_OPT_DEBUG == 3 diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index f0b695a5..6457e4c8 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -637,8 +637,6 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) U32 const ofBits = ofCode; U32 const totalBits = llBits+mlBits+ofBits; - //size_t const allBits = BIT_readBits(&(seqState->DStream), totalBits); - static const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, @@ -657,20 +655,25 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, /*fake*/ 1, 1, 1, 1, 1 }; /* sequence */ - { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : + { size_t const offset = ofCode ? OF_base[ofCode] + BIT_readBits(&(seqState->DStream), ofBits) : /* <= 26 bits */ llCode ? seq->offset : seqState->prevOffset; + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); if (ofCode | !llCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; } - seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); - seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); - if (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) BIT_reloadDStream(&(seqState->DStream)); + seq->matchLength = ML_base[mlCode] + mls + ((mlCode>31) ? BIT_readBits(&(seqState->DStream), mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&(seqState->DStream)); + + seq->litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&(seqState->DStream), llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&(seqState->DStream)); /* ANS state update */ - FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); - FSE_updateState(&(seqState->stateML), &(seqState->DStream)); - FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); + FSE_updateState(&(seqState->stateLL), &(seqState->DStream)); /* <= 9 bits */ + FSE_updateState(&(seqState->stateML), &(seqState->DStream)); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); /* <= 18 bits */ + FSE_updateState(&(seqState->stateOffb), &(seqState->DStream)); /* <= 8 bits */ } @@ -795,8 +798,8 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 15181500) && (pos < 15183150)) - printf("Dpos %6u : %3u literals & match %3u bytes at distance %6u \n", + if ((pos > 200802300) && (pos < 200802400)) + printf("Dpos %6u :%5u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } #endif diff --git a/programs/bench.c b/programs/bench.c index 2bde4c03..0d18ea64 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -195,14 +195,13 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, size_t const blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); ZSTD_CCtx* refCtx = ZSTD_createCCtx(); ZSTD_CCtx* ctx = ZSTD_createCCtx(); ZSTD_DCtx* refDCtx = ZSTD_createDCtx(); ZSTD_DCtx* dctx = ZSTD_createDCtx(); - U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); U32 nbBlocks; /* checks */ @@ -240,6 +239,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* Bench */ { double fastestC = 100000000., fastestD = 100000000.; + U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); clock_t coolTime = clock(); U32 testNb; @@ -306,6 +306,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if (ZSTD_isError(regenSize)) { DISPLAY("ZSTD_decompress_usingPreparedDCtx() failed on block %u : %s \n", blockNb, ZSTD_getErrorName(regenSize)); + clockStart -= clockLoop+1; /* force immediate test end */ break; } blockTable[blockNb].resSize = regenSize; From 74bd11954b922d2b32f6840b371c15959ba5b6ea Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 17:50:26 +0100 Subject: [PATCH 25/45] fixed ARM compatibility --- Makefile | 5 ++--- lib/bitstream.h | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 93d5e052..494f59dc 100644 --- a/Makefile +++ b/Makefile @@ -89,9 +89,8 @@ gpptest: clean $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" armtest: clean -# $(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror" $(MAKE) -C $(PRGDIR) datagen # use native, faster - $(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static" + $(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static" # for Travis CI arminstall: clean @@ -105,7 +104,7 @@ armtest-w-install: clean arminstall armtest ppctest: clean $(MAKE) -C $(PRGDIR) datagen # use native, faster - $(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static" + $(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS="-Werror -static" # for Travis CI ppcinstall: clean diff --git a/lib/bitstream.h b/lib/bitstream.h index 40400680..fd114e55 100644 --- a/lib/bitstream.h +++ b/lib/bitstream.h @@ -53,6 +53,14 @@ extern "C" { #include "error_private.h" /* error codes and messages */ +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#endif + + /*-****************************************** * bitStream encoding API (write forward) ********************************************/ @@ -290,7 +298,6 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitD, U32 const start) return bitD >> start; } -#include MEM_STATIC size_t BIT_getMiddleBits(size_t bitD, U32 const nbBits, U32 const start) { #if defined(__BMI__) && defined(__GNUC__) /* experimental */ From b44be742447f258b4ecec2753abc9f8c373ba55c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 26 Mar 2016 20:52:14 +0100 Subject: [PATCH 26/45] Fixed zdict more dictionary compression tests --- lib/zdict.c | 96 ++++++++++++++++++++++++------------------- lib/zstd_compress.c | 93 +++++++++++++++++++++++------------------ lib/zstd_internal.h | 3 +- programs/fileio.c | 27 ++++++------ programs/playTests.sh | 7 +++- 5 files changed, 130 insertions(+), 96 deletions(-) diff --git a/lib/zdict.c b/lib/zdict.c index c99cabe1..a643f4f5 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -574,7 +574,6 @@ static void ZDICT_fillNoise(void* buffer, size_t length) { unsigned acc = PRIME1; size_t p=0;; - for (p=0; p> 21); @@ -594,30 +593,37 @@ static void ZDICT_countEStats(EStats_ress_t esr, U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, const void* src, size_t srcSize) { - const BYTE* bytePtr; - const U32* u32Ptr; - seqStore_t seqStore; + const seqStore_t* seqStorePtr; if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */ ZSTD_copyCCtx(esr.zc, esr.ref); ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); - seqStore = ZSTD_copySeqStore(esr.zc); + seqStorePtr = ZSTD_getSeqStore(esr.zc); - /* count stats */ - for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++) - countLit[*bytePtr]++; - for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) { - BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1; - if (*u32Ptr==0) offcode=0; - offsetcodeCount[offcode]++; + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; } - (void)matchlengthCount; (void)litlengthCount; - /* - for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++) - matchlengthCount[*bytePtr]++; - for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++) - litlengthCount[*bytePtr]++; - */ + + /* seqStats */ + { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart); + ZSTD_seqToCodes(seqStorePtr, nbSeq); + + { const BYTE* codePtr = seqStorePtr->offCodeStart; + size_t u; + for (u=0; umlCodeStart; + size_t u; + for (u=0; ullCodeStart; + size_t u; + for (u=0; u= 3) { - const U32 nb = 25; + U32 const nb = 25; + U32 const dictContentSize = ZDICT_dictSize(dictList); U32 u; - U32 dictContentSize = ZDICT_dictSize(dictList); DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); DISPLAYLEVEL(3, "list %u best segments \n", nb); for (u=1; u<=nb; u++) { @@ -850,8 +862,7 @@ size_t ZDICT_trainFromBuffer_unsafe( } } } /* create dictionary */ - { - U32 dictContentSize = ZDICT_dictSize(dictList); + { U32 dictContentSize = ZDICT_dictSize(dictList); size_t hSize; BYTE* ptr; U32 u; @@ -896,31 +907,32 @@ size_t ZDICT_trainFromBuffer_unsafe( } +/* issue : samplesBuffer need to be followed by a noisy guard band. +* work around : duplicate the buffer, and add the noise */ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_params_t params) { - size_t sBuffSize; void* newBuff; - size_t result; + size_t sBuffSize; { unsigned u; for (u=0, sBuffSize=0; u no dictionary */ newBuff = malloc(sBuffSize + NOISELENGTH); if (!newBuff) return ERROR(memory_allocation); memcpy(newBuff, samplesBuffer, sBuffSize); ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ - result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity, + { size_t const result = ZDICT_trainFromBuffer_unsafe( + dictBuffer, dictBufferCapacity, newBuff, samplesSizes, nbSamples, params); - free(newBuff); - return result; + free(newBuff); + return result; } } -/* issue : samplesBuffer need to be followed by a noisy guard band. -* work around : duplicate the buffer, and add the noise ? */ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) { diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 90857a82..9f2a28f5 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -127,9 +127,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) return 0; /* reserved as a potential error code in the future */ } -seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ { - return ctx->seqStore; + return &(ctx->seqStore); } @@ -569,11 +569,59 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, ostart[4] = (BYTE)(cLitSize); break; } - return lhSize+cLitSize; } +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq) +{ + /* LL codes */ + { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + const BYTE LL_deltaCode = 19; + U16* const llTable = seqStorePtr->litLengthStart; + BYTE* const llCodeTable = seqStorePtr->llCodeStart; + size_t u; + for (u=0; ulongLength; llTable[u] = (U16)ll; } + llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; + } } + + /* Offset codes */ + { const U32* const offsetTable = seqStorePtr->offsetStart; + BYTE* const ofCodeTable = seqStorePtr->offCodeStart; + size_t u; + for (u=0; umatchLengthStart; + BYTE* const mlCodeTable = seqStorePtr->mlCodeStart; + size_t u; + for (u=0; ulongLength; mlTable[u] = (U16)ml; } + mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; + } } +} + + size_t ZSTD_compressSequences(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, size_t srcSize) @@ -619,22 +667,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 - /* LL codes */ - { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; - const BYTE LL_deltaCode = 19; - size_t u; - for (u=0; ulongLength; llTable[u] = (U16)ll; } - llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll]; - } } + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr, nbSeq); /* CTable for Literal Lengths */ { U32 max = MaxLL; @@ -660,9 +694,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, LLtype = FSE_ENCODING_DYNAMIC; } } - /* Offset codes */ - { size_t i; for (i=0; i 2)) { @@ -686,23 +718,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, Offtype = FSE_ENCODING_DYNAMIC; } } - /* ML codes */ - { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; - const BYTE ML_deltaCode = 36; - size_t u; - for (u=0; ulongLength; mlTable[u] = (U16)ml; } - mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml]; - } } - /* CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 4ce44399..ff271340 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -236,7 +236,8 @@ typedef struct { #endif } seqStore_t; -seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx); +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq); #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/programs/fileio.c b/programs/fileio.c index 907d990a..ff77a8a9 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -333,28 +333,30 @@ static int FIO_compressFilename_internal(cRess_t ress, { FILE* srcFile = ress.srcFile; FILE* dstFile = ress.dstFile; - U64 filesize = 0; + U64 readsize = 0; U64 compressedfilesize = 0; size_t dictSize = ress.dictBufferSize; size_t sizeCheck, errorCode; ZSTD_parameters params; /* init */ - filesize = MAX(FIO_getFileSize(srcFileName),dictSize); - params = ZSTD_getParams(cLevel, filesize); - params.srcSize = filesize; + { U64 const filesize = FIO_getFileSize(srcFileName); + U64 const levelsize = MAX(FIO_getFileSize(srcFileName), dictSize); + params = ZSTD_getParams(cLevel, levelsize); + params.srcSize = filesize; + } if (g_maxWLog) if (params.windowLog > g_maxWLog) params.windowLog = g_maxWLog; errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, params); if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode)); /* Main compression loop */ - filesize = 0; + readsize = 0; while (1) { /* Fill input Buffer */ - size_t inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); + size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); if (inSize==0) break; - filesize += inSize; - DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); + readsize += inSize; + DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(readsize>>20)); { /* Compress using buffered streaming */ size_t usedInSize = inSize; @@ -371,13 +373,12 @@ static int FIO_compressFilename_internal(cRess_t ress, if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); compressedfilesize += cSize; } - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100); + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(readsize>>20), (double)compressedfilesize/readsize*100); } /* End of Frame */ - { - size_t cSize = ress.dstBufferSize; - size_t result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize); + { size_t cSize = ress.dstBufferSize; + size_t const result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize); if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end"); sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile); @@ -388,7 +389,7 @@ static int FIO_compressFilename_internal(cRess_t ress, /* Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", - (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + (unsigned long long)readsize, (unsigned long long) compressedfilesize, (double)compressedfilesize/readsize*100); return 0; } diff --git a/programs/playTests.sh b/programs/playTests.sh index 444d91eb..aa0ffc3a 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -25,7 +25,7 @@ roundTripTest() { echo "\n**** simple tests **** " ./datagen > tmp -$ZSTD tmp +$ZSTD -f tmp $ZSTD -99 tmp && die "too large compression level undetected" $ZSTD tmp -c > tmpCompressed $ZSTD tmp --stdout > tmpCompressed @@ -71,6 +71,11 @@ echo "\n**** dictionary tests **** " ./datagen -g1M | md5sum > tmp1 ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2 diff -q tmp1 tmp2 +$ZSTD --train *.c *.h -o tmpDict +$ZSTD xxhash.c -D tmpDict -of tmp +$ZSTD -d tmp -D tmpDict -of result +diff xxhash.c result + echo "\n**** multiple files tests **** " From 21588e370be06d5a3e2292e75a8fb940d72e69e2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Mar 2016 16:50:44 +0200 Subject: [PATCH 27/45] changed validateParams() into checkParams() + adjustParams() --- lib/error_private.h | 1 + lib/error_public.h | 1 + lib/zbuff.c | 5 ++- lib/zstd_compress.c | 93 ++++++++++++++++++++++++++++++--------------- lib/zstd_static.h | 11 ++++-- 5 files changed, 77 insertions(+), 34 deletions(-) diff --git a/lib/error_private.h b/lib/error_private.h index ff0b829f..7bd03065 100644 --- a/lib/error_private.h +++ b/lib/error_private.h @@ -95,6 +95,7 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) case PREFIX(prefix_unknown): return "Unknown frame descriptor"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; + case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; case PREFIX(init_missing): return "Context should be init first"; case PREFIX(memory_allocation): return "Allocation error : not enough memory"; case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; diff --git a/lib/error_public.h b/lib/error_public.h index 073b8c6a..6fcf802e 100644 --- a/lib/error_public.h +++ b/lib/error_public.h @@ -47,6 +47,7 @@ typedef enum { ZSTD_error_prefix_unknown, ZSTD_error_frameParameter_unsupported, ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_compressionParameter_unsupported, ZSTD_error_init_missing, ZSTD_error_memory_allocation, ZSTD_error_stage_wrong, diff --git a/lib/zbuff.c b/lib/zbuff.c index 386b47d5..6c06b543 100644 --- a/lib/zbuff.c +++ b/lib/zbuff.c @@ -128,7 +128,9 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic { size_t neededInBuffSize; - ZSTD_validateParams(¶ms); + { size_t const errorCode = ZSTD_checkParams(params); + if (ZSTD_isError(errorCode)) return errorCode; } + ZSTD_adjustParams(¶ms, 0, dictSize); neededInBuffSize = (size_t)1 << params.windowLog; /* allocate buffers */ @@ -157,6 +159,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic return 0; /* ready to go */ } + size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) { return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 9f2a28f5..9cac406d 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -133,35 +133,51 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface * } +#define CLAMP(val,min,max) { if (valmax) val=max; } +#define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } + +/** ZSTD_checkParams() : + ensure param values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkParams(ZSTD_parameters params) +{ + { U32 const windowLog_max = MEM_32bits() ? 25 : ZSTD_WINDOWLOG_MAX; /* 32 bits mode cannot flush > 24 bits */ + CLAMPCHECK(params.windowLog, ZSTD_WINDOWLOG_MIN, windowLog_max); } + CLAMPCHECK(params.contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); + CLAMPCHECK(params.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(params.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + { U32 const searchLengthMin = (params.strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; + U32 const searchLengthMax = (params.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + CLAMPCHECK(params.searchLength, searchLengthMin, searchLengthMax); } + CLAMPCHECK(params.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + CLAMPCHECK((U32)(params.strategy), 0, (U32)ZSTD_btopt); + return 0; +} + + static unsigned ZSTD_highbit(U32 val); -#define CLAMP(val,min,max) { if (valmax) val=max; } - -/** ZSTD_validateParams() : - correct params value to remain within authorized range, - optimize for `srcSize` if srcSize > 0 */ -void ZSTD_validateParams(ZSTD_parameters* params) +/** ZSTD_adjustParams() : + optimize params for q given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization. + Both `srcSize` and `dictSize` are optional (use 0 if unknown), + but if both are 0, no optimization can be done. + Note : params is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ +void ZSTD_adjustParams(ZSTD_parameters* params, size_t srcSize, size_t dictSize) { - /* validate params */ - if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25; /* 32 bits mode cannot flush > 24 bits */ - CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX); - CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - { U32 const searchLengthMin = (params->strategy == ZSTD_btopt) ? ZSTD_SEARCHLENGTH_MIN : ZSTD_SEARCHLENGTH_MIN+1; - U32 const searchLengthMax = (params->strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; - CLAMP(params->searchLength, searchLengthMin, searchLengthMax); } - CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt; + if (srcSize+dictSize == 0) return; /* no size information available : no adjustment */ /* resize params, to use less memory when necessary */ - if ((params->srcSize > 0) && (params->srcSize < (1<srcSize)-1) + 1; - if (params->windowLog > srcLog) params->windowLog = srcLog; - } + { size_t const minSrcSize = (srcSize==0) ? 500 : 0; + size_t const rSize = srcSize + dictSize + minSrcSize; + if (rSize < (1<windowLog > srcLog) params->windowLog = srcLog; + } } if (params->hashLog > params->windowLog) params->hashLog = params->windowLog; - { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); - if (params->contentLog > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus; } /* <= ZSTD_CONTENTLOG_MAX */ + { U32 const btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt); + U32 const maxContentLog = params->windowLog+btPlus; + if (params->contentLog > maxContentLog) params->contentLog = maxContentLog; } /* <= ZSTD_CONTENTLOG_MAX */ if (params->windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ } @@ -2194,9 +2210,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); /* known magic number : dict is parsed for entropy stats and content */ - { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; - if (ZSTD_isError(eSize)) return eSize; - return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); + { size_t const eSize = ZSTD_loadDictEntropyStats(zc, (const char*)dict+4 /* skip magic */, dictSize-4) + 4; + if (ZSTD_isError(eSize)) return eSize; + return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); } } @@ -2207,7 +2223,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, const void* dict, size_t dictSize, ZSTD_parameters params) { - ZSTD_validateParams(¶ms); + /* compression parameters verification and optimization */ + { size_t const errorCode = ZSTD_checkParams(params); + if (ZSTD_isError(errorCode)) return errorCode; } + + ZSTD_adjustParams(¶ms, 0, dictSize); { size_t const errorCode = ZSTD_resetCCtx_advanced(zc, params); if (ZSTD_isError(errorCode)) return errorCode; } @@ -2295,7 +2315,7 @@ size_t ZSTD_compress_usingPreparedCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* prepare } -size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, +static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, @@ -2322,16 +2342,29 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, return (op - ostart); } +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + size_t const errorCode = ZSTD_checkParams(params); + if (ZSTD_isError(errorCode)) return errorCode; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) { + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize+dictSize); ZSTD_LOG_BLOCK("%p: ZSTD_compress_usingDict srcSize=%d dictSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, (int)dictSize, compressionLevel); - return ZSTD_compress_advanced(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize)); + ZSTD_adjustParams(¶ms, srcSize, dictSize); + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) { ZSTD_LOG_BLOCK("%p: ZSTD_compressCCtx srcSize=%d compressionLevel=%d\n", ctx->base, (int)srcSize, compressionLevel); - return ZSTD_compress_advanced(ctx, dst, dstCapacity, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize)); + return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); } size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 4ae771fd..db962405 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -95,9 +95,14 @@ ZSTDLIB_API unsigned ZSTD_maxCLevel (void); * `srcSize` value is optional, select 0 if not known */ ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSize); -/*! ZSTD_validateParams() : -* correct params value to remain within authorized range */ -ZSTDLIB_API void ZSTD_validateParams(ZSTD_parameters* params); +/*! ZSTD_checkParams() : +* Ensure param values remain within authorized range */ +ZSTDLIB_API size_t ZSTD_checkParams(ZSTD_parameters params); + +/*! ZSTD_adjustParams() : +* optimize params for a given `srcSize` and `dictSize`. +* both values are optional, select `0` if unknown. */ +ZSTDLIB_API void ZSTD_adjustParams(ZSTD_parameters* params, size_t srcSize, size_t dictSize); /*! ZSTD_compress_advanced() : * Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ From 3b71925c2db586b18cacc0f77aa0e9ab76603be2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Mar 2016 19:48:05 +0200 Subject: [PATCH 28/45] separate params into compressionParams and frameParams --- lib/zbuff.c | 28 ++-- lib/zbuff_static.h | 4 +- lib/zdict.c | 19 ++- lib/zstd_compress.c | 355 ++++++++++++++++++++++-------------------- lib/zstd_opt.h | 20 +-- lib/zstd_static.h | 22 ++- programs/fileio.c | 16 +- programs/playTests.sh | 6 + 8 files changed, 257 insertions(+), 213 deletions(-) diff --git a/lib/zbuff.c b/lib/zbuff.c index 6c06b543..e26072d8 100644 --- a/lib/zbuff.c +++ b/lib/zbuff.c @@ -124,14 +124,14 @@ size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) /* *** Initialization *** */ -size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, ZSTD_parameters params) +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) { size_t neededInBuffSize; - { size_t const errorCode = ZSTD_checkParams(params); - if (ZSTD_isError(errorCode)) return errorCode; } - ZSTD_adjustParams(¶ms, 0, dictSize); - neededInBuffSize = (size_t)1 << params.windowLog; + ZSTD_adjustCParams(¶ms.cParams, pledgedSrcSize, dictSize); + neededInBuffSize = (size_t)1 << params.cParams.windowLog; /* allocate buffers */ if (zbc->inBuffSize < neededInBuffSize) { @@ -148,7 +148,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic if (zbc->outBuff == NULL) return ERROR(memory_allocation); } - zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params); + zbc->outBuffContentSize = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize); if (ZSTD_isError(zbc->outBuffContentSize)) return zbc->outBuffContentSize; zbc->inToCompress = 0; @@ -160,15 +160,17 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, const void* dict, size_t dic } -size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) -{ - return ZBUFF_compressInit_advanced(zbc, NULL, 0, ZSTD_getParams(compressionLevel, 0)); -} - - ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) { - return ZBUFF_compressInit_advanced(zbc, dict, dictSize, ZSTD_getParams(compressionLevel, 0)); + ZSTD_parameters params; + params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 0; + return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0); +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel); } diff --git a/lib/zbuff_static.h b/lib/zbuff_static.h index 40550890..9fb522e5 100644 --- a/lib/zbuff_static.h +++ b/lib/zbuff_static.h @@ -51,7 +51,9 @@ extern "C" { /* ************************************* * Advanced Streaming functions ***************************************/ -ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params); +ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize); #if defined (__cplusplus) diff --git a/lib/zdict.c b/lib/zdict.c index a643f4f5..122ac8cb 100644 --- a/lib/zdict.c +++ b/lib/zdict.c @@ -626,6 +626,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, } } } +/* static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) { unsigned u; @@ -634,6 +635,15 @@ static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) if (max < fileSizes[u]) max = fileSizes[u]; return max; } +*/ + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total; + unsigned u; + for (u=0, total=0; u