fixed decompression-only benchmark
This commit is contained in:
parent
f6eb12084d
commit
8bed4012bd
@ -1026,17 +1026,18 @@ ZSTD_decompressSequencesLong_body(
|
|||||||
/* Regen sequences */
|
/* Regen sequences */
|
||||||
if (nbSeq) {
|
if (nbSeq) {
|
||||||
#define STORED_SEQS 4
|
#define STORED_SEQS 4
|
||||||
#define STOSEQ_MASK (STORED_SEQS-1)
|
#define STORED_SEQS_MASK (STORED_SEQS-1)
|
||||||
#define ADVANCED_SEQS 4
|
#define ADVANCED_SEQS 4
|
||||||
seq_t sequences[STORED_SEQS];
|
seq_t sequences[STORED_SEQS];
|
||||||
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
|
||||||
seqState_t seqState;
|
seqState_t seqState;
|
||||||
int seqNb;
|
int seqNb;
|
||||||
dctx->fseEntropy = 1;
|
dctx->fseEntropy = 1;
|
||||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
|
||||||
seqState.prefixStart = prefixStart;
|
seqState.prefixStart = prefixStart;
|
||||||
seqState.pos = (size_t)(op-prefixStart);
|
seqState.pos = (size_t)(op-prefixStart);
|
||||||
seqState.dictEnd = dictEnd;
|
seqState.dictEnd = dictEnd;
|
||||||
|
assert(iend > ip);
|
||||||
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
||||||
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
||||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||||
@ -1051,10 +1052,10 @@ ZSTD_decompressSequencesLong_body(
|
|||||||
/* decode and decompress */
|
/* decode and decompress */
|
||||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
||||||
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||||
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
||||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||||
PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||||
sequences[seqNb&STOSEQ_MASK] = sequence;
|
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
||||||
op += oneSeqSize;
|
op += oneSeqSize;
|
||||||
}
|
}
|
||||||
if (seqNb<nbSeq) return ERROR(corruption_detected);
|
if (seqNb<nbSeq) return ERROR(corruption_detected);
|
||||||
@ -1062,7 +1063,7 @@ ZSTD_decompressSequencesLong_body(
|
|||||||
/* finish queue */
|
/* finish queue */
|
||||||
seqNb -= seqAdvance;
|
seqNb -= seqAdvance;
|
||||||
for ( ; seqNb<nbSeq ; seqNb++) {
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
||||||
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
||||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||||
op += oneSeqSize;
|
op += oneSeqSize;
|
||||||
}
|
}
|
||||||
@ -1070,7 +1071,7 @@ ZSTD_decompressSequencesLong_body(
|
|||||||
/* save reps for next block */
|
/* save reps for next block */
|
||||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||||
#undef STORED_SEQS
|
#undef STORED_SEQS
|
||||||
#undef STOSEQ_MASK
|
#undef STORED_SEQS_MASK
|
||||||
#undef ADVANCED_SEQS
|
#undef ADVANCED_SEQS
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1118,8 +1119,9 @@ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef size_t (*ZSTD_decompressSequences_t)(
|
typedef size_t (*ZSTD_decompressSequences_t)(
|
||||||
ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
|
ZSTD_DCtx* dctx,
|
||||||
const void *seqStart, size_t seqSize, int nbSeq,
|
void* dst, size_t maxDstSize,
|
||||||
|
const void* seqStart, size_t seqSize, int nbSeq,
|
||||||
const ZSTD_longOffset_e isLongOffset);
|
const ZSTD_longOffset_e isLongOffset);
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
@ -1136,7 +1138,14 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
|
|||||||
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|
||||||
|
/* ZSTD_decompressSequencesLong() :
|
||||||
|
* decompression function triggered when a minimum share of offsets is considered "long",
|
||||||
|
* aka out of cache.
|
||||||
|
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
|
||||||
|
* This function will try to mitigate main memory latency through the use of prefetching */
|
||||||
|
static size_t
|
||||||
|
ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
||||||
void* dst, size_t maxDstSize,
|
void* dst, size_t maxDstSize,
|
||||||
const void* seqStart, size_t seqSize, int nbSeq,
|
const void* seqStart, size_t seqSize, int nbSeq,
|
||||||
const ZSTD_longOffset_e isLongOffset)
|
const ZSTD_longOffset_e isLongOffset)
|
||||||
@ -1150,6 +1159,8 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
|
|||||||
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* ZSTD_getLongOffsetsShare() :
|
/* ZSTD_getLongOffsetsShare() :
|
||||||
* condition : offTable must be valid
|
* condition : offTable must be valid
|
||||||
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
|
||||||
@ -1188,7 +1199,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|||||||
* In block mode, window size is not known, so we have to be conservative.
|
* In block mode, window size is not known, so we have to be conservative.
|
||||||
* (note: but it could be evaluated from current-lowLimit)
|
* (note: but it could be evaluated from current-lowLimit)
|
||||||
*/
|
*/
|
||||||
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
|
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
|
||||||
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
|
||||||
|
|
||||||
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
|
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
|
||||||
@ -1208,7 +1219,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|||||||
ip += seqHSize;
|
ip += seqHSize;
|
||||||
srcSize -= seqHSize;
|
srcSize -= seqHSize;
|
||||||
|
|
||||||
if ( (!frame || dctx->fParams.windowSize > (1<<24))
|
if ( (!frame || (dctx->fParams.windowSize > (1<<24)))
|
||||||
&& (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
|
&& (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
|
||||||
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
|
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
|
||||||
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
||||||
|
@ -522,7 +522,8 @@ static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t resu
|
|||||||
|
|
||||||
|
|
||||||
/* benchMem with no allocation */
|
/* benchMem with no allocation */
|
||||||
static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
static BMK_benchOutcome_t
|
||||||
|
BMK_benchMemAdvancedNoAlloc(
|
||||||
const void** srcPtrs, size_t* srcSizes,
|
const void** srcPtrs, size_t* srcSizes,
|
||||||
void** cPtrs, size_t* cCapacities, size_t* cSizes,
|
void** cPtrs, size_t* cCapacities, size_t* cSizes,
|
||||||
void** resPtrs, size_t* resSizes,
|
void** resPtrs, size_t* resSizes,
|
||||||
@ -533,7 +534,8 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
|
|
||||||
const void* srcBuffer, size_t srcSize,
|
const void* srcBuffer, size_t srcSize,
|
||||||
const size_t* fileSizes, unsigned nbFiles,
|
const size_t* fileSizes, unsigned nbFiles,
|
||||||
const int cLevel, const ZSTD_compressionParameters* comprParams,
|
const int cLevel,
|
||||||
|
const ZSTD_compressionParameters* comprParams,
|
||||||
const void* dictBuffer, size_t dictBufferSize,
|
const void* dictBuffer, size_t dictBufferSize,
|
||||||
ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
|
ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
|
||||||
int displayLevel, const char* displayName,
|
int displayLevel, const char* displayName,
|
||||||
@ -599,6 +601,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
cPtr += cCapacities[nbBlocks];
|
cPtr += cCapacities[nbBlocks];
|
||||||
resPtr += thisBlockSize;
|
resPtr += thisBlockSize;
|
||||||
remaining -= thisBlockSize;
|
remaining -= thisBlockSize;
|
||||||
|
if (BMK_decodeOnly) { assert(nbBlocks==0); cSizes[nbBlocks] = thisBlockSize; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -633,7 +636,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
|
||||||
|
|
||||||
while (!(compressionCompleted && decompressionCompleted)) {
|
while (!(compressionCompleted && decompressionCompleted)) {
|
||||||
|
|
||||||
if (!compressionCompleted) {
|
if (!compressionCompleted) {
|
||||||
BMK_runOutcome_t const cOutcome =
|
BMK_runOutcome_t const cOutcome =
|
||||||
BMK_benchTimedFn( timeStateCompress,
|
BMK_benchTimedFn( timeStateCompress,
|
||||||
@ -659,7 +661,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
} }
|
} }
|
||||||
|
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
markNb = (markNb+1) % NB_MARKS;
|
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
||||||
marks[markNb], displayName,
|
marks[markNb], displayName,
|
||||||
(U32)srcSize, (U32)cSize,
|
(U32)srcSize, (U32)cSize,
|
||||||
@ -690,7 +691,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
}
|
}
|
||||||
|
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
markNb = (markNb+1) % NB_MARKS;
|
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
||||||
marks[markNb], displayName,
|
marks[markNb], displayName,
|
||||||
(U32)srcSize, (U32)benchResult.cSize,
|
(U32)srcSize, (U32)benchResult.cSize,
|
||||||
@ -700,6 +700,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
}
|
}
|
||||||
decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
|
decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
|
||||||
}
|
}
|
||||||
|
markNb = (markNb+1) % NB_MARKS;
|
||||||
} /* while (!(compressionCompleted && decompressionCompleted)) */
|
} /* while (!(compressionCompleted && decompressionCompleted)) */
|
||||||
|
|
||||||
/* CRC Checking */
|
/* CRC Checking */
|
||||||
@ -707,7 +708,8 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
|
U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
|
||||||
if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
|
if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
|
||||||
size_t u;
|
size_t u;
|
||||||
DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
|
DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n",
|
||||||
|
displayName, (unsigned)crcOrig, (unsigned)crcCheck);
|
||||||
for (u=0; u<srcSize; u++) {
|
for (u=0; u<srcSize; u++) {
|
||||||
if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
|
if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
|
||||||
U32 segNb, bNb, pos;
|
U32 segNb, bNb, pos;
|
||||||
|
Loading…
Reference in New Issue
Block a user