fixed zstd -b speed result

the benchmark was displaying the speed of last run
instead of the best of all previous runs.
This commit is contained in:
Yann Collet 2018-08-23 18:04:50 -07:00
parent b0e1f3982d
commit 4da5bdf482
3 changed files with 59 additions and 51 deletions

View File

@ -315,7 +315,6 @@ BMK_runOutcome_t BMK_benchFunction(
unsigned nbLoops)
{
size_t dstSize = 0;
U64 totalTime;
if(!nbLoops) {
RETURN_QUIET_ERROR(1, BMK_runOutcome_t, "nbLoops must be nonzero ");
@ -335,7 +334,7 @@ BMK_runOutcome_t BMK_benchFunction(
#endif
}
/* benchmark loop */
/* benchmark */
{ UTIL_time_t const clockStart = UTIL_getTime();
unsigned loopNb, blockNb;
if (initFn != NULL) initFn(initPayload);
@ -351,17 +350,15 @@ BMK_runOutcome_t BMK_benchFunction(
} else if (loopNb == 0) {
dstSize += res;
if (blockResults != NULL) blockResults[blockNb] = res;
dstSize += res;
} }
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
totalTime = UTIL_clockSpanNano(clockStart);
}
{ BMK_runTime_t rt;
rt.nanoSecPerRun = totalTime / nbLoops;
rt.sumOfReturn = dstSize;
return BMK_setValid_runTime(rt);
}
{ U64 const totalTime = UTIL_clockSpanNano(clockStart);
BMK_runTime_t rt;
rt.nanoSecPerRun = totalTime / nbLoops;
rt.sumOfReturn = dstSize;
return BMK_setValid_runTime(rt);
} }
}
@ -545,7 +542,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
int displayLevel, const char* displayName,
const BMK_advancedParams_t* adv)
{
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
BMK_benchResult_t benchResult;
size_t const loadedCompressedSize = srcSize;
size_t cSize = 0;
@ -555,6 +552,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
assert(cctx != NULL); assert(dctx != NULL);
/* init */
memset(&benchResult, 0, sizeof(benchResult));
if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */
if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */
const char* srcPtr = (const char*)srcBuffer;
@ -640,66 +638,72 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
while (!(compressionCompleted && decompressionCompleted)) {
if (!compressionCompleted) {
BMK_runTime_t cResult;
BMK_timedFnOutcome_t const cOutcome =
BMK_benchFunctionTimed(timeStateCompress,
&local_defaultCompress, (void*)cctx,
&local_initCCtx, (void*)&cctxprep,
nbBlocks,
srcPtrs, srcSizes,
cPtrs, cCapacities,
cSizes);
BMK_benchFunctionTimed( timeStateCompress,
&local_defaultCompress, cctx,
&local_initCCtx, &cctxprep,
nbBlocks,
srcPtrs, srcSizes,
cPtrs, cCapacities,
cSizes);
if (!BMK_isSuccessful_timedFnOutcome(cOutcome)) {
return BMK_benchOutcome_error();
}
cResult = BMK_extract_timedFnResult(cOutcome);
ratio = (double)(srcSize / cResult.sumOfReturn);
{ BMK_runTime_t const cResult = BMK_extract_timedFnResult(cOutcome);
cSize = cResult.sumOfReturn;
ratio = (double)srcSize / cSize;
{ BMK_benchResult_t newResult;
newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
benchResult.cSize = cSize;
if (newResult.cSpeed > benchResult.cSpeed)
benchResult.cSpeed = newResult.cSpeed;
} }
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
cSize = cResult.sumOfReturn;
benchResult.cSpeed = (srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
benchResult.cSize = cSize;
ratio = (double)srcSize / cSize;
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
marks[markNb], displayName,
(U32)srcSize, (U32)cSize,
ratioAccuracy, ratio,
benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
}
compressionCompleted = BMK_isCompleted_timedFnOutcome(cOutcome);
}
if(!decompressionCompleted) {
BMK_runTime_t dResult;
BMK_timedFnOutcome_t const dOutcome =
BMK_benchFunctionTimed(timeStateDecompress,
&local_defaultDecompress, (void*)(dctx),
&local_initDCtx, (void*)&dctxprep,
nbBlocks,
(const void* const*)cPtrs, cSizes,
resPtrs, resSizes,
NULL);
BMK_benchFunctionTimed(timeStateDecompress,
&local_defaultDecompress, dctx,
&local_initDCtx, &dctxprep,
nbBlocks,
(const void *const *)cPtrs, cSizes,
resPtrs, resSizes,
NULL);
if(!BMK_isSuccessful_timedFnOutcome(dOutcome)) {
return BMK_benchOutcome_error();
}
dResult = BMK_extract_timedFnResult(dOutcome);
{ BMK_runTime_t const dResult = BMK_extract_timedFnResult(dOutcome);
U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
if (newDSpeed > benchResult.dSpeed)
benchResult.dSpeed = newDSpeed;
}
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
benchResult.dSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
marks[markNb], displayName, (U32)srcSize, (U32)benchResult.cSize,
marks[markNb], displayName,
(U32)srcSize, (U32)benchResult.cSize,
ratioAccuracy, ratio,
benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
(double)benchResult.dSpeed / (1 MB));
(double)benchResult.dSpeed / MB_UNIT);
}
decompressionCompleted = BMK_isCompleted_timedFnOutcome(dOutcome);
}
}
} /* while (!(compressionCompleted && decompressionCompleted)) */
/* CRC Checking */
{ const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);

View File

@ -597,7 +597,7 @@ static int benchFiles(U32 benchNb,
if ((U64)benchedSize > inFileSize)
benchedSize = (size_t)inFileSize;
if ((U64)benchedSize < inFileSize) {
DISPLAY("Not enough memory for '%s' full size; testing %u MB only...\n",
DISPLAY("Not enough memory for '%s' full size; testing %u MB only... \n",
inFileName, (U32)(benchedSize>>20));
} }

View File

@ -27,7 +27,8 @@
#include "util.h"
#include "bench.h"
#include "zstd_errors.h"
#include "zstd_internal.h"
#include "zstd_internal.h" /* should not be needed */
/*-************************************
* Constants
@ -46,6 +47,7 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t
static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO;
static const int g_maxNbVariations = 64;
/*-************************************
* Macros
**************************************/
@ -90,9 +92,9 @@ static const char* g_stratName[ZSTD_btultra+1] = {
"ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ",
"ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra "};
static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 };
/*-************************************
* Setup for Adding new params
**************************************/
@ -212,6 +214,7 @@ static void displayParamVal(FILE* f, varInds_t param, U32 value, int width) {
}
}
/*-************************************
* Benchmark Parameters/Global Variables
**************************************/
@ -284,6 +287,7 @@ static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSp
* g_clockGranularity
*/
/*-*******************************************************
* General Util Functions
*********************************************************/
@ -653,7 +657,7 @@ static void BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize
}
fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
(double)srcSize / res.result.cSize, (double)res.result.cSpeed / (1 MB), (double)res.result.dSpeed / (1 MB));
(double)srcSize / res.result.cSize, (double)res.result.cSpeed / MB_UNIT, (double)res.result.dSpeed / MB_UNIT);
}
/* Writes to f the results of a parameter benchmark */
@ -836,7 +840,7 @@ static void BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_
}
fprintf(f, "================================\n");
fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
(double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / (1 MB));
(double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
fprintf(f, "Overall Winner: \n");
@ -1718,16 +1722,16 @@ static int BMK_seed(winnerInfo_t* winners, const paramValues_t params,
/* too large compression speed difference for the compression benefit */
if (W_ratio > O_ratio)
DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
W_ratio, (double)testResult.cSpeed / (1 MB),
O_ratio, (double)winners[cLevel].result.cSpeed / (1 MB), cLevel);
W_ratio, (double)testResult.cSpeed / MB_UNIT,
O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT, cLevel);
continue;
}
if (W_DSpeed_note < O_DSpeed_note ) {
/* too large decompression speed difference for the compression benefit */
if (W_ratio > O_ratio)
DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
W_ratio, (double)testResult.dSpeed / (1 MB),
O_ratio, (double)winners[cLevel].result.dSpeed / (1 MB), cLevel);
W_ratio, (double)testResult.dSpeed / MB_UNIT,
O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT, cLevel);
continue;
}
@ -1817,7 +1821,7 @@ static void BMK_benchFullTable(const buffers_t buf, const contexts_t ctx)
if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
if (g_target) {
BMK_init_level_constraints(g_target * (1 MB));
BMK_init_level_constraints(g_target * MB_UNIT);
} else {
/* baseline config for level 1 */
paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize));