From 3d230db85391aa5810abc6301035e9741c986765 Mon Sep 17 00:00:00 2001 From: George Lu Date: Mon, 6 Aug 2018 17:13:36 -0700 Subject: [PATCH] Change speed representation from floating point to integral --- programs/bench.c | 43 +++++++++++++++++++------------------------ programs/bench.h | 4 ++-- tests/paramgrill.c | 46 +++++++++++++++++----------------------------- 3 files changed, 38 insertions(+), 55 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index b496caf2..49b31787 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -555,9 +555,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn); { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * 1000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - results.result.cSpeed = compressionSpeed * 1000000; + results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / intermediateResultCompress.result.result.nanoSecPerRun); cSize = intermediateResultCompress.result.result.sumOfReturn; results.result.cSize = cSize; ratio = (double)srcSize / results.result.cSize; @@ -565,7 +563,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed); + results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB)); } } @@ -579,16 +577,13 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000; - results.result.dSpeed = decompressionSpeed * 1000000; + results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ intermediateResultDecompress.result.result.nanoSecPerRun); markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed, - decompressionSpeed); + results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB), + (double)results.result.dSpeed / (1 MB)); } } } @@ -605,19 +600,20 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( if(compressionResults.result.nanoSecPerRun == 0) { results.result.cSpeed = 0; } else { - results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + results.result.cSpeed = srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun; } results.result.cSize = compressionResults.result.sumOfReturn; { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; + results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun); + cSize = compressionResults.result.sumOfReturn; + results.result.cSize = cSize; ratio = (double)srcSize / results.result.cSize; markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r", marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed); + results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB)); } } if(adv->mode != BMK_compressOnly) { @@ -633,19 +629,18 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( if(decompressionResults.result.nanoSecPerRun == 0) { results.result.dSpeed = 0; } else { - results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC; + results.result.dSpeed = srcSize * TIMELOOP_NANOSEC / decompressionResults.result.nanoSecPerRun; } - { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; - double const compressionSpeed = results.result.cSpeed / 1000000; - int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1; - double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000; - results.result.dSpeed = decompressionSpeed * 1000000; + + { + int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ decompressionResults.result.nanoSecPerRun); markNb = (markNb+1) % NB_MARKS; DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r", marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize, ratioAccuracy, ratio, - cSpeedAccuracy, compressionSpeed, - decompressionSpeed); + results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB), + (double)results.result.dSpeed / (1 MB)); } } } @@ -693,8 +688,8 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc( } /* CRC Checking */ if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ - double const cSpeed = results.result.cSpeed / 1000000; - double const dSpeed = results.result.dSpeed / 1000000; + double const cSpeed = (double)results.result.cSpeed / (1 MB); + double const dSpeed = (double)results.result.dSpeed / (1 MB); if (adv->additionalParam) { DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam); } else { diff --git a/programs/bench.h b/programs/bench.h index 8baf33a0..6247fa59 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -32,8 +32,8 @@ extern "C" { typedef struct { size_t cSize; - double cSpeed; /* bytes / sec */ - double dSpeed; + U64 cSpeed; /* bytes / sec */ + U64 dSpeed; size_t cMem; } BMK_result_t; diff --git a/tests/paramgrill.c b/tests/paramgrill.c index b8396c72..5af3d88f 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -249,18 +249,6 @@ static int feasible(const BMK_result_t results, const constraint_t target) { return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem); } -#define EPSILON 0.001 -static int epsilonEqual(const double c1, const double c2) { - return MAX(c1/c2,c2/c1) < 1 + EPSILON; -} - -/* checks exact equivalence to 0, to stop compiler complaining fpeq */ -static int eqZero(const double c1) { - const double z1 = 0.0; - const double z2 = -0.0; - return !(memcmp(&c1, &z1, sizeof(double))) || !(memcmp(&c1, &z2, sizeof(double))); -} - /* hill climbing value for part 1 */ static double resultScore(const BMK_result_t res, const size_t srcSize, const constraint_t target) { double cs = 0., ds = 0., rt, cm = 0.; @@ -280,7 +268,7 @@ static double resultScore(const BMK_result_t res, const size_t srcSize, const co static int compareResultLT(const BMK_result_t result1, const BMK_result_t result2, const constraint_t target, size_t srcSize) { if(feasible(result1, target) && feasible(result2, target)) { return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed) - || (result1.cSize == result2.cSize && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed); + || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed); } return feasible(result2, target) || (!feasible(result1, target) && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target))); @@ -661,7 +649,7 @@ static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result fprintf(f, "/* %s */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */", - lvlstr, (double)srcSize / result.cSize, result.cSpeed / (1 << 20), result.dSpeed / (1 << 20)); + lvlstr, (double)srcSize / result.cSize, (double)result.cSpeed / (1 << 20), (double)result.dSpeed / (1 << 20)); if(TIMED) { fprintf(f, " - %1lu:%2lu:%05.2f", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); } fprintf(f, "\n"); @@ -696,8 +684,8 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz typedef struct { - double cSpeed_min; - double dSpeed_min; + U64 cSpeed_min; + U64 dSpeed_min; U32 windowLog_max; ZSTD_strategy strategy_max; } level_constraints_t; @@ -794,16 +782,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para /* too large compression speed difference for the compression benefit */ if (W_ratio > O_ratio) DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", - W_ratio, testResult.cSpeed / 1000000, - O_ratio, winners[cLevel].result.cSpeed / 1000000., cLevel); + W_ratio, (double)testResult.cSpeed / 1000000, + O_ratio, (double)winners[cLevel].result.cSpeed / 1000000., cLevel); continue; } if (W_DSpeed_note < O_DSpeed_note ) { /* too large decompression speed difference for the compression benefit */ if (W_ratio > O_ratio) DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", - W_ratio, testResult.dSpeed / 1000000., - O_ratio, winners[cLevel].result.dSpeed / 1000000., cLevel); + W_ratio, (double)testResult.dSpeed / 1000000., + O_ratio, (double)winners[cLevel].result.dSpeed / 1000000., cLevel); continue; } @@ -1173,7 +1161,7 @@ static void BMK_benchOnce(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, const void* srcBuffe g_params = ZSTD_adjustCParams(g_params, srcSize, 0); BMK_benchParam1(&testResult, srcBuffer, srcSize, cctx, dctx, g_params); DISPLAY("Compression Ratio: %.3f Compress Speed: %.1f MB/s Decompress Speed: %.1f MB/s\n", (double)srcSize / testResult.cSize, - testResult.cSpeed / 1000000, testResult.dSpeed / 1000000); + (double)testResult.cSpeed / 1000000, (double)testResult.dSpeed / 1000000); return; } @@ -1355,20 +1343,20 @@ static int allBench(BMK_result_t* resultPtr, *resultPtr = benchres.result; /* calculate uncertainty in compression / decompression runs */ - if(eqZero(benchres.result.cSpeed)) { + if(benchres.result.cSpeed) { + loopDurationC = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); + uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE; + } else { loopDurationC = 0; uncertaintyConstantC = 3; - } else { - loopDurationC = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); - uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE; } - if(eqZero(benchres.result.dSpeed)) { + if(benchres.result.dSpeed) { + loopDurationD = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); + uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE; + } else { loopDurationD = 0; uncertaintyConstantD = 3; - } else { - loopDurationD = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); - uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE; } /* anything with worse ratio in feas is definitely worse, discard */