Change speed representation from floating point to integral
This commit is contained in:
parent
8faeb41679
commit
3d230db853
@ -555,9 +555,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||
ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn);
|
||||
{
|
||||
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
double const compressionSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * 1000;
|
||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||
results.result.cSpeed = compressionSpeed * 1000000;
|
||||
results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / intermediateResultCompress.result.result.nanoSecPerRun);
|
||||
cSize = intermediateResultCompress.result.result.sumOfReturn;
|
||||
results.result.cSize = cSize;
|
||||
ratio = (double)srcSize / results.result.cSize;
|
||||
@ -565,7 +563,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
||||
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
|
||||
ratioAccuracy, ratio,
|
||||
cSpeedAccuracy, compressionSpeed);
|
||||
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB));
|
||||
}
|
||||
}
|
||||
|
||||
@ -579,16 +577,13 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||
|
||||
{
|
||||
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
double const compressionSpeed = results.result.cSpeed / 1000000;
|
||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||
double const decompressionSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000;
|
||||
results.result.dSpeed = decompressionSpeed * 1000000;
|
||||
results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ intermediateResultDecompress.result.result.nanoSecPerRun);
|
||||
markNb = (markNb+1) % NB_MARKS;
|
||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
||||
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
|
||||
ratioAccuracy, ratio,
|
||||
cSpeedAccuracy, compressionSpeed,
|
||||
decompressionSpeed);
|
||||
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB),
|
||||
(double)results.result.dSpeed / (1 MB));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -605,19 +600,20 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||
if(compressionResults.result.nanoSecPerRun == 0) {
|
||||
results.result.cSpeed = 0;
|
||||
} else {
|
||||
results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC;
|
||||
results.result.cSpeed = srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun;
|
||||
}
|
||||
results.result.cSize = compressionResults.result.sumOfReturn;
|
||||
{
|
||||
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
double const compressionSpeed = results.result.cSpeed / 1000000;
|
||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||
results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun);
|
||||
cSize = compressionResults.result.sumOfReturn;
|
||||
results.result.cSize = cSize;
|
||||
ratio = (double)srcSize / results.result.cSize;
|
||||
markNb = (markNb+1) % NB_MARKS;
|
||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
||||
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
|
||||
ratioAccuracy, ratio,
|
||||
cSpeedAccuracy, compressionSpeed);
|
||||
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB));
|
||||
}
|
||||
}
|
||||
if(adv->mode != BMK_compressOnly) {
|
||||
@ -633,19 +629,18 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||
if(decompressionResults.result.nanoSecPerRun == 0) {
|
||||
results.result.dSpeed = 0;
|
||||
} else {
|
||||
results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC;
|
||||
results.result.dSpeed = srcSize * TIMELOOP_NANOSEC / decompressionResults.result.nanoSecPerRun;
|
||||
}
|
||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
double const compressionSpeed = results.result.cSpeed / 1000000;
|
||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||
double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000;
|
||||
results.result.dSpeed = decompressionSpeed * 1000000;
|
||||
|
||||
{
|
||||
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||
results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ decompressionResults.result.nanoSecPerRun);
|
||||
markNb = (markNb+1) % NB_MARKS;
|
||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
||||
marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
|
||||
ratioAccuracy, ratio,
|
||||
cSpeedAccuracy, compressionSpeed,
|
||||
decompressionSpeed);
|
||||
results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB),
|
||||
(double)results.result.dSpeed / (1 MB));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -693,8 +688,8 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||
} /* CRC Checking */
|
||||
|
||||
if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
|
||||
double const cSpeed = results.result.cSpeed / 1000000;
|
||||
double const dSpeed = results.result.dSpeed / 1000000;
|
||||
double const cSpeed = (double)results.result.cSpeed / (1 MB);
|
||||
double const dSpeed = (double)results.result.dSpeed / (1 MB);
|
||||
if (adv->additionalParam) {
|
||||
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
|
||||
} else {
|
||||
|
@ -32,8 +32,8 @@ extern "C" {
|
||||
|
||||
typedef struct {
|
||||
size_t cSize;
|
||||
double cSpeed; /* bytes / sec */
|
||||
double dSpeed;
|
||||
U64 cSpeed; /* bytes / sec */
|
||||
U64 dSpeed;
|
||||
size_t cMem;
|
||||
} BMK_result_t;
|
||||
|
||||
|
@ -249,18 +249,6 @@ static int feasible(const BMK_result_t results, const constraint_t target) {
|
||||
return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem);
|
||||
}
|
||||
|
||||
#define EPSILON 0.001
|
||||
static int epsilonEqual(const double c1, const double c2) {
|
||||
return MAX(c1/c2,c2/c1) < 1 + EPSILON;
|
||||
}
|
||||
|
||||
/* checks exact equivalence to 0, to stop compiler complaining fpeq */
|
||||
static int eqZero(const double c1) {
|
||||
const double z1 = 0.0;
|
||||
const double z2 = -0.0;
|
||||
return !(memcmp(&c1, &z1, sizeof(double))) || !(memcmp(&c1, &z2, sizeof(double)));
|
||||
}
|
||||
|
||||
/* hill climbing value for part 1 */
|
||||
static double resultScore(const BMK_result_t res, const size_t srcSize, const constraint_t target) {
|
||||
double cs = 0., ds = 0., rt, cm = 0.;
|
||||
@ -280,7 +268,7 @@ static double resultScore(const BMK_result_t res, const size_t srcSize, const co
|
||||
static int compareResultLT(const BMK_result_t result1, const BMK_result_t result2, const constraint_t target, size_t srcSize) {
|
||||
if(feasible(result1, target) && feasible(result2, target)) {
|
||||
return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
|
||||
|| (result1.cSize == result2.cSize && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed);
|
||||
|| (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
|
||||
}
|
||||
return feasible(result2, target) || (!feasible(result1, target) && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
|
||||
|
||||
@ -661,7 +649,7 @@ static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result
|
||||
|
||||
fprintf(f,
|
||||
"/* %s */ /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */",
|
||||
lvlstr, (double)srcSize / result.cSize, result.cSpeed / (1 << 20), result.dSpeed / (1 << 20));
|
||||
lvlstr, (double)srcSize / result.cSize, (double)result.cSpeed / (1 << 20), (double)result.dSpeed / (1 << 20));
|
||||
|
||||
if(TIMED) { fprintf(f, " - %1lu:%2lu:%05.2f", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); }
|
||||
fprintf(f, "\n");
|
||||
@ -696,8 +684,8 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz
|
||||
|
||||
|
||||
typedef struct {
|
||||
double cSpeed_min;
|
||||
double dSpeed_min;
|
||||
U64 cSpeed_min;
|
||||
U64 dSpeed_min;
|
||||
U32 windowLog_max;
|
||||
ZSTD_strategy strategy_max;
|
||||
} level_constraints_t;
|
||||
@ -794,16 +782,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para
|
||||
/* too large compression speed difference for the compression benefit */
|
||||
if (W_ratio > O_ratio)
|
||||
DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
|
||||
W_ratio, testResult.cSpeed / 1000000,
|
||||
O_ratio, winners[cLevel].result.cSpeed / 1000000., cLevel);
|
||||
W_ratio, (double)testResult.cSpeed / 1000000,
|
||||
O_ratio, (double)winners[cLevel].result.cSpeed / 1000000., cLevel);
|
||||
continue;
|
||||
}
|
||||
if (W_DSpeed_note < O_DSpeed_note ) {
|
||||
/* too large decompression speed difference for the compression benefit */
|
||||
if (W_ratio > O_ratio)
|
||||
DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
|
||||
W_ratio, testResult.dSpeed / 1000000.,
|
||||
O_ratio, winners[cLevel].result.dSpeed / 1000000., cLevel);
|
||||
W_ratio, (double)testResult.dSpeed / 1000000.,
|
||||
O_ratio, (double)winners[cLevel].result.dSpeed / 1000000., cLevel);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1173,7 +1161,7 @@ static void BMK_benchOnce(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, const void* srcBuffe
|
||||
g_params = ZSTD_adjustCParams(g_params, srcSize, 0);
|
||||
BMK_benchParam1(&testResult, srcBuffer, srcSize, cctx, dctx, g_params);
|
||||
DISPLAY("Compression Ratio: %.3f Compress Speed: %.1f MB/s Decompress Speed: %.1f MB/s\n", (double)srcSize / testResult.cSize,
|
||||
testResult.cSpeed / 1000000, testResult.dSpeed / 1000000);
|
||||
(double)testResult.cSpeed / 1000000, (double)testResult.dSpeed / 1000000);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1355,20 +1343,20 @@ static int allBench(BMK_result_t* resultPtr,
|
||||
*resultPtr = benchres.result;
|
||||
|
||||
/* calculate uncertainty in compression / decompression runs */
|
||||
if(eqZero(benchres.result.cSpeed)) {
|
||||
if(benchres.result.cSpeed) {
|
||||
loopDurationC = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed);
|
||||
uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE;
|
||||
} else {
|
||||
loopDurationC = 0;
|
||||
uncertaintyConstantC = 3;
|
||||
} else {
|
||||
loopDurationC = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed);
|
||||
uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE;
|
||||
}
|
||||
|
||||
if(eqZero(benchres.result.dSpeed)) {
|
||||
if(benchres.result.dSpeed) {
|
||||
loopDurationD = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed);
|
||||
uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE;
|
||||
} else {
|
||||
loopDurationD = 0;
|
||||
uncertaintyConstantD = 3;
|
||||
} else {
|
||||
loopDurationD = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed);
|
||||
uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE;
|
||||
}
|
||||
|
||||
/* anything with worse ratio in feas is definitely worse, discard */
|
||||
|
Loading…
Reference in New Issue
Block a user