Merge branch 'dev' into flexibleLevel
This commit is contained in:
commit
9945e60ac4
@ -801,10 +801,12 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
|
||||
</b>/* These parameters are only useful if multi-threading is enabled (ZSTD_MULTITHREAD).<b>
|
||||
* They return an error otherwise. */
|
||||
ZSTD_p_nbWorkers=400, </b>/* Select how many threads will be spawned to compress in parallel.<b>
|
||||
* Triggers asynchronous mode, even with nbWorkers = 1.
|
||||
* Can only be set to a value >= 1 if ZSTD_MULTITHREAD is enabled.
|
||||
* More threads improve speed, but also increase memory usage.
|
||||
* Default value is `0`, aka "blocking mode" : no worker is spawned, compression is performed inside Caller's thread */
|
||||
* When nbWorkers >= 1, triggers asynchronous mode :
|
||||
* ZSTD_compress_generic() consumes some input, flush some output if possible, and immediately gives back control to caller,
|
||||
* while compression work is performed in parallel, within worker threads.
|
||||
* (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
|
||||
* More workers improve speed, but also increase memory usage.
|
||||
* Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
|
||||
ZSTD_p_jobSize, </b>/* Size of a compression job. This value is only enforced in streaming (non-blocking) mode.<b>
|
||||
* Each compression job is completed in parallel, so indirectly controls the nb of active threads.
|
||||
* 0 means default, which is dynamically determined based on compression parameters.
|
||||
@ -854,8 +856,10 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
|
||||
</b></pre><BR>
|
||||
<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
|
||||
</b><p> Set one compression parameter, selected by enum ZSTD_cParameter.
|
||||
Setting a parameter is generally only possible during frame initialization (before starting compression),
|
||||
except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
|
||||
Note : when `value` is an enum, cast it to unsigned for proper type checking.
|
||||
@result : informational value (typically, the one being set, possibly corrected),
|
||||
@result : informational value (typically, value being set clamped correctly),
|
||||
or an error code (which can be tested with ZSTD_isError()).
|
||||
</p></pre><BR>
|
||||
|
||||
@ -1025,9 +1029,10 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t
|
||||
<pre><b>size_t ZSTD_CCtx_setParametersUsingCCtxParams(
|
||||
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
|
||||
</b><p> Apply a set of ZSTD_CCtx_params to the compression context.
|
||||
This must be done before the dictionary is loaded.
|
||||
The pledgedSrcSize is treated as unknown.
|
||||
Multithreading parameters are applied only if nbWorkers >= 1.
|
||||
This can be done even after compression is started,
|
||||
if nbWorkers==0, this will have no impact until a new compression is started.
|
||||
if nbWorkers>=1, new parameters will be picked up at next job,
|
||||
with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
|
@ -139,8 +139,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
||||
{ U32 u;
|
||||
for (u=0; u<tableSize; u++) {
|
||||
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
|
||||
U16 nextState = symbolNext[symbol]++;
|
||||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
|
||||
U32 const nextState = symbolNext[symbol]++;
|
||||
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
|
||||
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
||||
} }
|
||||
|
||||
|
@ -1247,32 +1247,44 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
||||
|
||||
|
||||
#define ZSTD_ROWSIZE 16
|
||||
/*! ZSTD_reduceTable_internal() :
|
||||
* reduce table indexes by `reducerValue`
|
||||
* presume table size is a multiple of ZSTD_ROWSIZE.
|
||||
* Helps auto-vectorization */
|
||||
static void ZSTD_reduceTable_internal (U32* const table, int const nbRows, U32 const reducerValue)
|
||||
/*! ZSTD_reduceTable() :
|
||||
* reduce table indexes by `reducerValue`, or squash to zero.
|
||||
* PreserveMark preserves "unsorted mark" for btlazy2 strategy.
|
||||
* It must be set to a clear 0/1 value, to remove branch during inlining.
|
||||
* Presume table size is a multiple of ZSTD_ROWSIZE
|
||||
* to help auto-vectorization */
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
|
||||
{
|
||||
int const nbRows = (int)size / ZSTD_ROWSIZE;
|
||||
int cellNb = 0;
|
||||
int rowNb;
|
||||
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
||||
assert(size < (1U<<31)); /* can be casted to int */
|
||||
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
||||
int column;
|
||||
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
||||
if (preserveMark) {
|
||||
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
|
||||
table[cellNb] += adder;
|
||||
}
|
||||
if (table[cellNb] < reducerValue) table[cellNb] = 0;
|
||||
else table[cellNb] -= reducerValue;
|
||||
cellNb++;
|
||||
} }
|
||||
}
|
||||
|
||||
/*! ZSTD_reduceTable() :
|
||||
* reduce table indexes by `reducerValue` */
|
||||
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
|
||||
static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
|
||||
{
|
||||
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
||||
assert(size < (1U<<31)); /* can be casted to int */
|
||||
ZSTD_reduceTable_internal(table, size/ZSTD_ROWSIZE, reducerValue);
|
||||
ZSTD_reduceTable_internal(table, size, reducerValue, 0);
|
||||
}
|
||||
|
||||
static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
|
||||
{
|
||||
ZSTD_reduceTable_internal(table, size, reducerValue, 1);
|
||||
}
|
||||
|
||||
|
||||
/*! ZSTD_ldm_reduceTable() :
|
||||
* reduce table indexes by `reducerValue` */
|
||||
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
||||
@ -1297,8 +1309,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
|
||||
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
|
||||
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
|
||||
ZSTD_preserveUnsortedMark(ms->chainTable, chainSize, reducerValue);
|
||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
|
||||
else
|
||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||
}
|
||||
|
||||
if (ms->hashLog3) {
|
||||
|
@ -33,6 +33,12 @@ extern "C" {
|
||||
#define kSearchStrength 8
|
||||
#define HASH_READ_SIZE 8
|
||||
#define ZSTD_CLEVEL_CUSTOM 999
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionnally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
|
@ -15,35 +15,6 @@
|
||||
/*-*************************************
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor.
|
||||
It's not a big deal though : the candidate will just be considered unsorted, and be sorted again.
|
||||
Additionnally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a moderate loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled by a table re-use using a different strategy */
|
||||
|
||||
/*! ZSTD_preserveUnsortedMark() :
|
||||
* pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
|
||||
* so that combined operation preserves its value.
|
||||
* Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
|
||||
* As a consequence, the list of unsorted elements would stop on the first element,
|
||||
* removing candidates, resulting in a negligible loss to compression ratio
|
||||
* (since overflow protection with ZSTD_reduceTable() is relatively rare).
|
||||
* Another potential risk is that a position will be promoted from *unsorted*
|
||||
* to *sorted=>smaller:0*, meaning the next candidate will be considered smaller.
|
||||
* This could be wrong, and result in data corruption.
|
||||
* On second thought, this corruption might be impossible,
|
||||
* because unsorted elements are always at the beginning of the list,
|
||||
* and squashing to zero reduce the list to a single element,
|
||||
* which needs to be sorted anyway.
|
||||
* I haven't spent much thoughts into this possible scenario,
|
||||
* and just felt it was safer to implement ZSTD_preserveUnsortedMark() */
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
|
||||
{
|
||||
U32 u;
|
||||
for (u=0; u<size; u++)
|
||||
if (table[u] == ZSTD_DUBT_UNSORTED_MARK)
|
||||
table[u] = ZSTD_DUBT_UNSORTED_MARK + reducerValue;
|
||||
}
|
||||
|
||||
void ZSTD_updateDUBT(
|
||||
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
/*!
|
||||
* LEGACY_SUPPORT :
|
||||
* if set to 1, ZSTD_decompress() can decode older formats (v0.1+)
|
||||
* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
|
||||
*/
|
||||
#ifndef ZSTD_LEGACY_SUPPORT
|
||||
# define ZSTD_LEGACY_SUPPORT 0
|
||||
@ -235,8 +235,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
||||
|
||||
|
||||
/*-*************************************************************
|
||||
* Decompression section
|
||||
***************************************************************/
|
||||
* Frame header decoding
|
||||
***************************************************************/
|
||||
|
||||
/*! ZSTD_isFrame() :
|
||||
* Tells if the content of `buffer` starts with a valid Frame Identifier.
|
||||
@ -258,7 +258,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
|
||||
|
||||
/** ZSTD_frameHeaderSize_internal() :
|
||||
* srcSize must be large enough to reach header size fields.
|
||||
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
|
||||
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
|
||||
* @return : size of the Frame Header
|
||||
* or an error code, which can be tested with ZSTD_isError() */
|
||||
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
|
||||
@ -481,6 +481,10 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************************************
|
||||
* Block decoding
|
||||
***************************************************************/
|
||||
|
||||
/*! ZSTD_getcBlockSize() :
|
||||
* Provides the size of compressed block from block header `src` */
|
||||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
@ -651,6 +655,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
|
||||
typedef union {
|
||||
FSE_decode_t realData;
|
||||
FSE_DTable dtable;
|
||||
U32 alignedBy4;
|
||||
} FSE_decode_t4;
|
||||
|
||||
@ -729,7 +734,6 @@ static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTa
|
||||
const void* src, size_t srcSize,
|
||||
const FSE_decode_t4* defaultTable, U32 flagRepeatTable)
|
||||
{
|
||||
const void* const tmpPtr = defaultTable; /* bypass strict aliasing */
|
||||
switch(type)
|
||||
{
|
||||
case set_rle :
|
||||
@ -739,7 +743,7 @@ static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTa
|
||||
*DTablePtr = DTableSpace;
|
||||
return 1;
|
||||
case set_basic :
|
||||
*DTablePtr = (const FSE_DTable*)tmpPtr;
|
||||
*DTablePtr = &defaultTable->dtable;
|
||||
return 0;
|
||||
case set_repeat:
|
||||
if (!flagRepeatTable) return ERROR(corruption_detected);
|
||||
@ -936,13 +940,14 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
||||
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
||||
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
|
||||
assert(ofBits <= MaxOff);
|
||||
if (MEM_32bits() && longOffsets) {
|
||||
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
||||
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
|
||||
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
|
||||
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
||||
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
||||
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
|
||||
} else {
|
||||
offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
||||
offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
|
||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
|
||||
}
|
||||
}
|
||||
@ -955,7 +960,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
||||
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
|
||||
seqState->prevOffset[1] = seqState->prevOffset[0];
|
||||
seqState->prevOffset[0] = offset = temp;
|
||||
} else {
|
||||
} else { /* offset == 0 */
|
||||
offset = seqState->prevOffset[0];
|
||||
}
|
||||
} else {
|
||||
@ -967,16 +972,16 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
||||
}
|
||||
|
||||
seq.matchLength = ML_base[mlCode]
|
||||
+ ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
||||
+ ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
||||
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
|
||||
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
||||
|
||||
seq.litLength = LL_base[llCode]
|
||||
+ ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
||||
+ ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits())
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
|
||||
@ -1364,13 +1369,13 @@ static size_t ZSTD_decompressSequencesLong(
|
||||
FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
|
||||
/* prepare in advance */
|
||||
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
|
||||
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
||||
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
}
|
||||
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
||||
|
||||
/* decode and decompress */
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
||||
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||
@ -1411,13 +1416,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
/* isLongOffset must be true if there are long offsets.
|
||||
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
|
||||
* We don't expect that to be the case in 64-bit mode.
|
||||
* If we are in block mode we don't know the window size, so we have to be
|
||||
* conservative.
|
||||
* In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
|
||||
*/
|
||||
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
|
||||
/* windowSize could be any value at this point, since it is only validated
|
||||
* in the streaming API.
|
||||
*/
|
||||
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
||||
|
||||
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
|
||||
@ -1429,7 +1430,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
ip += litCSize;
|
||||
srcSize -= litCSize;
|
||||
}
|
||||
if (frame && dctx->fParams.windowSize > (1<<23))
|
||||
if ( frame /* windowSize exists */
|
||||
&& (dctx->fParams.windowSize > (1<<24))
|
||||
&& MEM_64bits() /* x86 benefits less from long mode than x64 */ )
|
||||
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
|
||||
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user