Change speed representation from floating point to integral

This commit is contained in:
George Lu 2018-08-06 17:13:36 -07:00
parent 8faeb41679
commit 3d230db853
3 changed files with 38 additions and 55 deletions

View File

@ -555,9 +555,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn);
{
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
double const compressionSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * 1000;
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
results.result.cSpeed = compressionSpeed * 1000000;
results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / intermediateResultCompress.result.result.nanoSecPerRun);
cSize = intermediateResultCompress.result.result.sumOfReturn;
results.result.cSize = cSize;
ratio = (double)srcSize / results.result.cSize;
@ -565,7 +563,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
ratioAccuracy, ratio,
cSpeedAccuracy, compressionSpeed);
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB));
}
}
@ -579,16 +577,13 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
{
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
double const compressionSpeed = results.result.cSpeed / 1000000;
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
double const decompressionSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000;
results.result.dSpeed = decompressionSpeed * 1000000;
results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ intermediateResultDecompress.result.result.nanoSecPerRun);
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
ratioAccuracy, ratio,
cSpeedAccuracy, compressionSpeed,
decompressionSpeed);
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB),
(double)results.result.dSpeed / (1 MB));
}
}
}
@ -605,19 +600,20 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
if(compressionResults.result.nanoSecPerRun == 0) {
results.result.cSpeed = 0;
} else {
results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC;
results.result.cSpeed = srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun;
}
results.result.cSize = compressionResults.result.sumOfReturn;
{
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
double const compressionSpeed = results.result.cSpeed / 1000000;
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun);
cSize = compressionResults.result.sumOfReturn;
results.result.cSize = cSize;
ratio = (double)srcSize / results.result.cSize;
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
ratioAccuracy, ratio,
cSpeedAccuracy, compressionSpeed);
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB));
}
}
if(adv->mode != BMK_compressOnly) {
@ -633,19 +629,18 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
if(decompressionResults.result.nanoSecPerRun == 0) {
results.result.dSpeed = 0;
} else {
results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC;
results.result.dSpeed = srcSize * TIMELOOP_NANOSEC / decompressionResults.result.nanoSecPerRun;
}
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
double const compressionSpeed = results.result.cSpeed / 1000000;
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000;
results.result.dSpeed = decompressionSpeed * 1000000;
{
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ decompressionResults.result.nanoSecPerRun);
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
ratioAccuracy, ratio,
cSpeedAccuracy, compressionSpeed,
decompressionSpeed);
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB),
(double)results.result.dSpeed / (1 MB));
}
}
}
@ -693,8 +688,8 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
} /* CRC Checking */
if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
double const cSpeed = results.result.cSpeed / 1000000;
double const dSpeed = results.result.dSpeed / 1000000;
double const cSpeed = (double)results.result.cSpeed / (1 MB);
double const dSpeed = (double)results.result.dSpeed / (1 MB);
if (adv->additionalParam) {
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
} else {

View File

@ -32,8 +32,8 @@ extern "C" {
typedef struct {
size_t cSize;
double cSpeed; /* bytes / sec */
double dSpeed;
U64 cSpeed; /* bytes / sec */
U64 dSpeed;
size_t cMem;
} BMK_result_t;

View File

@ -249,18 +249,6 @@ static int feasible(const BMK_result_t results, const constraint_t target) {
return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem);
}
#define EPSILON 0.001
static int epsilonEqual(const double c1, const double c2) {
return MAX(c1/c2,c2/c1) < 1 + EPSILON;
}
/* checks exact equivalence to 0, to stop compiler complaining fpeq */
static int eqZero(const double c1) {
const double z1 = 0.0;
const double z2 = -0.0;
return !(memcmp(&c1, &z1, sizeof(double))) || !(memcmp(&c1, &z2, sizeof(double)));
}
/* hill climbing value for part 1 */
static double resultScore(const BMK_result_t res, const size_t srcSize, const constraint_t target) {
double cs = 0., ds = 0., rt, cm = 0.;
@ -280,7 +268,7 @@ static double resultScore(const BMK_result_t res, const size_t srcSize, const co
static int compareResultLT(const BMK_result_t result1, const BMK_result_t result2, const constraint_t target, size_t srcSize) {
if(feasible(result1, target) && feasible(result2, target)) {
return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
|| (result1.cSize == result2.cSize && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed);
|| (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
}
return feasible(result2, target) || (!feasible(result1, target) && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
@ -661,7 +649,7 @@ static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result
fprintf(f,
"/* %s */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */",
lvlstr, (double)srcSize / result.cSize, result.cSpeed / (1 << 20), result.dSpeed / (1 << 20));
lvlstr, (double)srcSize / result.cSize, (double)result.cSpeed / (1 << 20), (double)result.dSpeed / (1 << 20));
if(TIMED) { fprintf(f, " - %1lu:%2lu:%05.2f", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); }
fprintf(f, "\n");
@ -696,8 +684,8 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz
typedef struct {
double cSpeed_min;
double dSpeed_min;
U64 cSpeed_min;
U64 dSpeed_min;
U32 windowLog_max;
ZSTD_strategy strategy_max;
} level_constraints_t;
@ -794,16 +782,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para
/* too large compression speed difference for the compression benefit */
if (W_ratio > O_ratio)
DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
W_ratio, testResult.cSpeed / 1000000,
O_ratio, winners[cLevel].result.cSpeed / 1000000., cLevel);
W_ratio, (double)testResult.cSpeed / 1000000,
O_ratio, (double)winners[cLevel].result.cSpeed / 1000000., cLevel);
continue;
}
if (W_DSpeed_note < O_DSpeed_note ) {
/* too large decompression speed difference for the compression benefit */
if (W_ratio > O_ratio)
DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
W_ratio, testResult.dSpeed / 1000000.,
O_ratio, winners[cLevel].result.dSpeed / 1000000., cLevel);
W_ratio, (double)testResult.dSpeed / 1000000.,
O_ratio, (double)winners[cLevel].result.dSpeed / 1000000., cLevel);
continue;
}
@ -1173,7 +1161,7 @@ static void BMK_benchOnce(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, const void* srcBuffe
g_params = ZSTD_adjustCParams(g_params, srcSize, 0);
BMK_benchParam1(&testResult, srcBuffer, srcSize, cctx, dctx, g_params);
DISPLAY("Compression Ratio: %.3f Compress Speed: %.1f MB/s Decompress Speed: %.1f MB/s\n", (double)srcSize / testResult.cSize,
testResult.cSpeed / 1000000, testResult.dSpeed / 1000000);
(double)testResult.cSpeed / 1000000, (double)testResult.dSpeed / 1000000);
return;
}
@ -1355,20 +1343,20 @@ static int allBench(BMK_result_t* resultPtr,
*resultPtr = benchres.result;
/* calculate uncertainty in compression / decompression runs */
if(eqZero(benchres.result.cSpeed)) {
if(benchres.result.cSpeed) {
loopDurationC = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed);
uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE;
} else {
loopDurationC = 0;
uncertaintyConstantC = 3;
} else {
loopDurationC = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed);
uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE;
}
if(eqZero(benchres.result.dSpeed)) {
if(benchres.result.dSpeed) {
loopDurationD = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed);
uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE;
} else {
loopDurationD = 0;
uncertaintyConstantD = 3;
} else {
loopDurationD = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed);
uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE;
}
/* anything with worse ratio in feas is definitely worse, discard */