diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 652ddd19..68298772 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1026,17 +1026,18 @@ ZSTD_decompressSequencesLong_body( /* Regen sequences */ if (nbSeq) { #define STORED_SEQS 4 -#define STOSEQ_MASK (STORED_SEQS-1) +#define STORED_SEQS_MASK (STORED_SEQS-1) #define ADVANCED_SEQS 4 seq_t sequences[STORED_SEQS]; int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); seqState_t seqState; int seqNb; dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } + { int i; for (i=0; ientropy.rep[i]; } seqState.prefixStart = prefixStart; seqState.pos = (size_t)(op-prefixStart); seqState.dictEnd = dictEnd; + assert(iend > ip); CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); @@ -1051,10 +1052,10 @@ ZSTD_decompressSequencesLong_body( /* decode and decompress */ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } #undef STORED_SEQS -#undef STOSEQ_MASK +#undef STORED_SEQS_MASK #undef ADVANCED_SEQS } @@ -1118,9 +1119,10 @@ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, #endif typedef size_t (*ZSTD_decompressSequences_t)( - ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, - const void *seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset); + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset); static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1136,10 +1138,17 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } -static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 @@ -1150,6 +1159,8 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } + + /* ZSTD_getLongOffsetsShare() : * condition : offTable must be valid * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) @@ -1188,7 +1199,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * In block mode, window size is not known, so we have to be conservative. * (note: but it could be evaluated from current-lowLimit) */ - ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))); + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); @@ -1208,7 +1219,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += seqHSize; srcSize -= seqHSize; - if ( (!frame || dctx->fParams.windowSize > (1<<24)) + if ( (!frame || (dctx->fParams.windowSize > (1<<24))) && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ diff --git a/programs/bench.c b/programs/bench.c index 326c1c1c..caa80357 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -522,22 +522,24 @@ static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t resu /* benchMem with no allocation */ -static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( - const void** srcPtrs, size_t* srcSizes, - void** cPtrs, size_t* cCapacities, size_t* cSizes, - void** resPtrs, size_t* resSizes, - void** resultBufferPtr, void* compressedBuffer, - size_t maxCompressedSize, - BMK_timedFnState_t* timeStateCompress, - BMK_timedFnState_t* timeStateDecompress, +static BMK_benchOutcome_t +BMK_benchMemAdvancedNoAlloc( + const void** srcPtrs, size_t* srcSizes, + void** cPtrs, size_t* cCapacities, size_t* cSizes, + void** resPtrs, size_t* resSizes, + void** resultBufferPtr, void* compressedBuffer, + size_t maxCompressedSize, + BMK_timedFnState_t* timeStateCompress, + BMK_timedFnState_t* timeStateDecompress, - const void* srcBuffer, size_t srcSize, - const size_t* fileSizes, unsigned nbFiles, - const int cLevel, const ZSTD_compressionParameters* comprParams, - const void* dictBuffer, size_t dictBufferSize, - ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, - int displayLevel, const char* displayName, - const BMK_advancedParams_t* adv) + const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, + const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv) { size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ BMK_benchResult_t benchResult; @@ -599,6 +601,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( cPtr += cCapacities[nbBlocks]; resPtr += thisBlockSize; remaining -= thisBlockSize; + if (BMK_decodeOnly) { assert(nbBlocks==0); cSizes[nbBlocks] = thisBlockSize; } } } } @@ -633,7 +636,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); while (!(compressionCompleted && decompressionCompleted)) { - if (!compressionCompleted) { BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, @@ -659,7 +661,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( } } { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", marks[markNb], displayName, (U32)srcSize, (U32)cSize, @@ -690,7 +691,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( } { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", marks[markNb], displayName, (U32)srcSize, (U32)benchResult.cSize, @@ -700,6 +700,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( } decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); } + markNb = (markNb+1) % NB_MARKS; } /* while (!(compressionCompleted && decompressionCompleted)) */ /* CRC Checking */ @@ -707,7 +708,8 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { size_t u; - DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", + displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u