fixed zstd -b speed result
the benchmark was displaying the speed of last run instead of the best of all previous runs.
This commit is contained in:
parent
b0e1f3982d
commit
4da5bdf482
@ -315,7 +315,6 @@ BMK_runOutcome_t BMK_benchFunction(
|
||||
unsigned nbLoops)
|
||||
{
|
||||
size_t dstSize = 0;
|
||||
U64 totalTime;
|
||||
|
||||
if(!nbLoops) {
|
||||
RETURN_QUIET_ERROR(1, BMK_runOutcome_t, "nbLoops must be nonzero ");
|
||||
@ -335,7 +334,7 @@ BMK_runOutcome_t BMK_benchFunction(
|
||||
#endif
|
||||
}
|
||||
|
||||
/* benchmark loop */
|
||||
/* benchmark */
|
||||
{ UTIL_time_t const clockStart = UTIL_getTime();
|
||||
unsigned loopNb, blockNb;
|
||||
if (initFn != NULL) initFn(initPayload);
|
||||
@ -351,17 +350,15 @@ BMK_runOutcome_t BMK_benchFunction(
|
||||
} else if (loopNb == 0) {
|
||||
dstSize += res;
|
||||
if (blockResults != NULL) blockResults[blockNb] = res;
|
||||
dstSize += res;
|
||||
} }
|
||||
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
|
||||
totalTime = UTIL_clockSpanNano(clockStart);
|
||||
}
|
||||
|
||||
{ BMK_runTime_t rt;
|
||||
rt.nanoSecPerRun = totalTime / nbLoops;
|
||||
rt.sumOfReturn = dstSize;
|
||||
return BMK_setValid_runTime(rt);
|
||||
}
|
||||
{ U64 const totalTime = UTIL_clockSpanNano(clockStart);
|
||||
BMK_runTime_t rt;
|
||||
rt.nanoSecPerRun = totalTime / nbLoops;
|
||||
rt.sumOfReturn = dstSize;
|
||||
return BMK_setValid_runTime(rt);
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
@ -545,7 +542,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
||||
int displayLevel, const char* displayName,
|
||||
const BMK_advancedParams_t* adv)
|
||||
{
|
||||
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
|
||||
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
|
||||
BMK_benchResult_t benchResult;
|
||||
size_t const loadedCompressedSize = srcSize;
|
||||
size_t cSize = 0;
|
||||
@ -555,6 +552,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
||||
assert(cctx != NULL); assert(dctx != NULL);
|
||||
|
||||
/* init */
|
||||
memset(&benchResult, 0, sizeof(benchResult));
|
||||
if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */
|
||||
if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */
|
||||
const char* srcPtr = (const char*)srcBuffer;
|
||||
@ -640,66 +638,72 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
|
||||
while (!(compressionCompleted && decompressionCompleted)) {
|
||||
|
||||
if (!compressionCompleted) {
|
||||
BMK_runTime_t cResult;
|
||||
|
||||
BMK_timedFnOutcome_t const cOutcome =
|
||||
BMK_benchFunctionTimed(timeStateCompress,
|
||||
&local_defaultCompress, (void*)cctx,
|
||||
&local_initCCtx, (void*)&cctxprep,
|
||||
nbBlocks,
|
||||
srcPtrs, srcSizes,
|
||||
cPtrs, cCapacities,
|
||||
cSizes);
|
||||
BMK_benchFunctionTimed( timeStateCompress,
|
||||
&local_defaultCompress, cctx,
|
||||
&local_initCCtx, &cctxprep,
|
||||
nbBlocks,
|
||||
srcPtrs, srcSizes,
|
||||
cPtrs, cCapacities,
|
||||
cSizes);
|
||||
|
||||
if (!BMK_isSuccessful_timedFnOutcome(cOutcome)) {
|
||||
return BMK_benchOutcome_error();
|
||||
}
|
||||
|
||||
cResult = BMK_extract_timedFnResult(cOutcome);
|
||||
ratio = (double)(srcSize / cResult.sumOfReturn);
|
||||
{ BMK_runTime_t const cResult = BMK_extract_timedFnResult(cOutcome);
|
||||
cSize = cResult.sumOfReturn;
|
||||
ratio = (double)srcSize / cSize;
|
||||
{ BMK_benchResult_t newResult;
|
||||
newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
|
||||
benchResult.cSize = cSize;
|
||||
if (newResult.cSpeed > benchResult.cSpeed)
|
||||
benchResult.cSpeed = newResult.cSpeed;
|
||||
} }
|
||||
|
||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
cSize = cResult.sumOfReturn;
|
||||
benchResult.cSpeed = (srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
|
||||
benchResult.cSize = cSize;
|
||||
ratio = (double)srcSize / cSize;
|
||||
markNb = (markNb+1) % NB_MARKS;
|
||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
||||
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
|
||||
marks[markNb], displayName,
|
||||
(U32)srcSize, (U32)cSize,
|
||||
ratioAccuracy, ratio,
|
||||
benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
|
||||
}
|
||||
compressionCompleted = BMK_isCompleted_timedFnOutcome(cOutcome);
|
||||
}
|
||||
|
||||
if(!decompressionCompleted) {
|
||||
BMK_runTime_t dResult;
|
||||
|
||||
BMK_timedFnOutcome_t const dOutcome =
|
||||
BMK_benchFunctionTimed(timeStateDecompress,
|
||||
&local_defaultDecompress, (void*)(dctx),
|
||||
&local_initDCtx, (void*)&dctxprep,
|
||||
nbBlocks,
|
||||
(const void* const*)cPtrs, cSizes,
|
||||
resPtrs, resSizes,
|
||||
NULL);
|
||||
BMK_benchFunctionTimed(timeStateDecompress,
|
||||
&local_defaultDecompress, dctx,
|
||||
&local_initDCtx, &dctxprep,
|
||||
nbBlocks,
|
||||
(const void *const *)cPtrs, cSizes,
|
||||
resPtrs, resSizes,
|
||||
NULL);
|
||||
|
||||
if(!BMK_isSuccessful_timedFnOutcome(dOutcome)) {
|
||||
return BMK_benchOutcome_error();
|
||||
}
|
||||
|
||||
dResult = BMK_extract_timedFnResult(dOutcome);
|
||||
{ BMK_runTime_t const dResult = BMK_extract_timedFnResult(dOutcome);
|
||||
U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
|
||||
if (newDSpeed > benchResult.dSpeed)
|
||||
benchResult.dSpeed = newDSpeed;
|
||||
}
|
||||
|
||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
benchResult.dSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
|
||||
markNb = (markNb+1) % NB_MARKS;
|
||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
||||
marks[markNb], displayName, (U32)srcSize, (U32)benchResult.cSize,
|
||||
marks[markNb], displayName,
|
||||
(U32)srcSize, (U32)benchResult.cSize,
|
||||
ratioAccuracy, ratio,
|
||||
benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
|
||||
(double)benchResult.dSpeed / (1 MB));
|
||||
(double)benchResult.dSpeed / MB_UNIT);
|
||||
}
|
||||
decompressionCompleted = BMK_isCompleted_timedFnOutcome(dOutcome);
|
||||
}
|
||||
}
|
||||
} /* while (!(compressionCompleted && decompressionCompleted)) */
|
||||
|
||||
/* CRC Checking */
|
||||
{ const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
|
||||
|
@ -597,7 +597,7 @@ static int benchFiles(U32 benchNb,
|
||||
if ((U64)benchedSize > inFileSize)
|
||||
benchedSize = (size_t)inFileSize;
|
||||
if ((U64)benchedSize < inFileSize) {
|
||||
DISPLAY("Not enough memory for '%s' full size; testing %u MB only...\n",
|
||||
DISPLAY("Not enough memory for '%s' full size; testing %u MB only... \n",
|
||||
inFileName, (U32)(benchedSize>>20));
|
||||
} }
|
||||
|
||||
|
@ -27,7 +27,8 @@
|
||||
#include "util.h"
|
||||
#include "bench.h"
|
||||
#include "zstd_errors.h"
|
||||
#include "zstd_internal.h"
|
||||
#include "zstd_internal.h" /* should not be needed */
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Constants
|
||||
@ -46,6 +47,7 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t
|
||||
static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO;
|
||||
static const int g_maxNbVariations = 64;
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Macros
|
||||
**************************************/
|
||||
@ -90,9 +92,9 @@ static const char* g_stratName[ZSTD_btultra+1] = {
|
||||
"ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ",
|
||||
"ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra "};
|
||||
|
||||
|
||||
static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 };
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Setup for Adding new params
|
||||
**************************************/
|
||||
@ -212,6 +214,7 @@ static void displayParamVal(FILE* f, varInds_t param, U32 value, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Benchmark Parameters/Global Variables
|
||||
**************************************/
|
||||
@ -284,6 +287,7 @@ static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSp
|
||||
* g_clockGranularity
|
||||
*/
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* General Util Functions
|
||||
*********************************************************/
|
||||
@ -653,7 +657,7 @@ static void BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize
|
||||
}
|
||||
|
||||
fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
|
||||
(double)srcSize / res.result.cSize, (double)res.result.cSpeed / (1 MB), (double)res.result.dSpeed / (1 MB));
|
||||
(double)srcSize / res.result.cSize, (double)res.result.cSpeed / MB_UNIT, (double)res.result.dSpeed / MB_UNIT);
|
||||
}
|
||||
|
||||
/* Writes to f the results of a parameter benchmark */
|
||||
@ -836,7 +840,7 @@ static void BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_
|
||||
}
|
||||
fprintf(f, "================================\n");
|
||||
fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
|
||||
(double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / (1 MB));
|
||||
(double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
|
||||
|
||||
|
||||
fprintf(f, "Overall Winner: \n");
|
||||
@ -1718,16 +1722,16 @@ static int BMK_seed(winnerInfo_t* winners, const paramValues_t params,
|
||||
/* too large compression speed difference for the compression benefit */
|
||||
if (W_ratio > O_ratio)
|
||||
DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
|
||||
W_ratio, (double)testResult.cSpeed / (1 MB),
|
||||
O_ratio, (double)winners[cLevel].result.cSpeed / (1 MB), cLevel);
|
||||
W_ratio, (double)testResult.cSpeed / MB_UNIT,
|
||||
O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT, cLevel);
|
||||
continue;
|
||||
}
|
||||
if (W_DSpeed_note < O_DSpeed_note ) {
|
||||
/* too large decompression speed difference for the compression benefit */
|
||||
if (W_ratio > O_ratio)
|
||||
DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
|
||||
W_ratio, (double)testResult.dSpeed / (1 MB),
|
||||
O_ratio, (double)winners[cLevel].result.dSpeed / (1 MB), cLevel);
|
||||
W_ratio, (double)testResult.dSpeed / MB_UNIT,
|
||||
O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT, cLevel);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1817,7 +1821,7 @@ static void BMK_benchFullTable(const buffers_t buf, const contexts_t ctx)
|
||||
if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
|
||||
|
||||
if (g_target) {
|
||||
BMK_init_level_constraints(g_target * (1 MB));
|
||||
BMK_init_level_constraints(g_target * MB_UNIT);
|
||||
} else {
|
||||
/* baseline config for level 1 */
|
||||
paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize));
|
||||
|
Loading…
Reference in New Issue
Block a user