59a7116cc2
benchfn used to rely on mem.h, and util, which in turn relied on platform.h. Using benchfn outside of zstd required to bring all these dependencies. Now, dependency is reduced to timefn only. This required to create a separate timefn from util, and rewrite benchfn and timefn to no longer need mem.h. Separating timefn from util has a wide effect accross the code base, as usage of time functions is widespread. A lot of build scripts had to be updated to also include timefn.
255 lines
9.5 KiB
C
255 lines
9.5 KiB
C
/*
|
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under both the BSD-style license (found in the
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
* in the COPYING file in the root directory of this source tree).
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
*/
|
|
|
|
|
|
|
|
/* *************************************
|
|
* Includes
|
|
***************************************/
|
|
#include <stdlib.h> /* malloc, free */
|
|
#include <string.h> /* memset */
|
|
#undef NDEBUG /* assert must not be disabled */
|
|
#include <assert.h> /* assert */
|
|
|
|
#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
|
|
#include "benchfn.h"
|
|
|
|
|
|
/* *************************************
|
|
* Constants
|
|
***************************************/
|
|
#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
|
|
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
|
|
|
|
#define KB *(1 <<10)
|
|
#define MB *(1 <<20)
|
|
#define GB *(1U<<30)
|
|
|
|
|
|
/* *************************************
|
|
* Debug errors
|
|
***************************************/
|
|
#if defined(DEBUG) && (DEBUG >= 1)
|
|
# include <stdio.h> /* fprintf */
|
|
# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
|
# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
|
|
#else
|
|
# define DEBUGOUTPUT(...)
|
|
#endif
|
|
|
|
|
|
/* error without displaying */
|
|
#define RETURN_QUIET_ERROR(retValue, ...) { \
|
|
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
|
|
DEBUGOUTPUT("Error : "); \
|
|
DEBUGOUTPUT(__VA_ARGS__); \
|
|
DEBUGOUTPUT(" \n"); \
|
|
return retValue; \
|
|
}
|
|
|
|
|
|
/* *************************************
|
|
* Benchmarking an arbitrary function
|
|
***************************************/
|
|
|
|
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
|
|
{
|
|
return outcome.error_tag_never_ever_use_directly == 0;
|
|
}
|
|
|
|
/* warning : this function will stop program execution if outcome is invalid !
|
|
* check outcome validity first, using BMK_isValid_runResult() */
|
|
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
|
|
{
|
|
assert(outcome.error_tag_never_ever_use_directly == 0);
|
|
return outcome.internal_never_ever_use_directly;
|
|
}
|
|
|
|
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
|
|
{
|
|
assert(outcome.error_tag_never_ever_use_directly != 0);
|
|
return outcome.error_result_never_ever_use_directly;
|
|
}
|
|
|
|
static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
|
|
{
|
|
BMK_runOutcome_t b;
|
|
memset(&b, 0, sizeof(b));
|
|
b.error_tag_never_ever_use_directly = 1;
|
|
b.error_result_never_ever_use_directly = errorResult;
|
|
return b;
|
|
}
|
|
|
|
static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
|
|
{
|
|
BMK_runOutcome_t outcome;
|
|
outcome.error_tag_never_ever_use_directly = 0;
|
|
outcome.internal_never_ever_use_directly = runTime;
|
|
return outcome;
|
|
}
|
|
|
|
|
|
/* initFn will be measured once, benchFn will be measured `nbLoops` times */
|
|
/* initFn is optional, provide NULL if none */
|
|
/* benchFn must return a size_t value that errorFn can interpret */
|
|
/* takes # of blocks and list of size & stuff for each. */
|
|
/* can report result of benchFn for each block into blockResult. */
|
|
/* blockResult is optional, provide NULL if this information is not required */
|
|
/* note : time per loop can be reported as zero if run time < timer resolution */
|
|
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
|
|
unsigned nbLoops)
|
|
{
|
|
size_t dstSize = 0;
|
|
nbLoops += !nbLoops; /* minimum nbLoops is 1 */
|
|
|
|
/* init */
|
|
{ size_t i;
|
|
for(i = 0; i < p.blockCount; i++) {
|
|
memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
|
|
} }
|
|
|
|
/* benchmark */
|
|
{ UTIL_time_t const clockStart = UTIL_getTime();
|
|
unsigned loopNb, blockNb;
|
|
if (p.initFn != NULL) p.initFn(p.initPayload);
|
|
for (loopNb = 0; loopNb < nbLoops; loopNb++) {
|
|
for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
|
|
size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
|
|
p.dstBuffers[blockNb], p.dstCapacities[blockNb],
|
|
p.benchPayload);
|
|
if (loopNb == 0) {
|
|
if (p.blockResults != NULL) p.blockResults[blockNb] = res;
|
|
if ((p.errorFn != NULL) && (p.errorFn(res))) {
|
|
RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
|
|
"Function benchmark failed on block %u (of size %u) with error %i",
|
|
blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
|
|
}
|
|
dstSize += res;
|
|
} }
|
|
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
|
|
|
|
{ PTime const totalTime = UTIL_clockSpanNano(clockStart);
|
|
BMK_runTime_t rt;
|
|
rt.nanoSecPerRun = (double)totalTime / nbLoops;
|
|
rt.sumOfReturn = dstSize;
|
|
return BMK_setValid_runTime(rt);
|
|
} }
|
|
}
|
|
|
|
|
|
/* ==== Benchmarking any function, providing intermediate results ==== */
|
|
|
|
struct BMK_timedFnState_s {
|
|
PTime timeSpent_ns;
|
|
PTime timeBudget_ns;
|
|
PTime runBudget_ns;
|
|
BMK_runTime_t fastestRun;
|
|
unsigned nbLoops;
|
|
UTIL_time_t coolTime;
|
|
}; /* typedef'd to BMK_timedFnState_t within bench.h */
|
|
|
|
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
|
|
{
|
|
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
|
|
if (r == NULL) return NULL; /* malloc() error */
|
|
BMK_resetTimedFnState(r, total_ms, run_ms);
|
|
return r;
|
|
}
|
|
|
|
void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
|
|
free(state);
|
|
}
|
|
|
|
BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
|
|
{
|
|
enum { timedFnState_staticSize_isLargeEnough=(1/(sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s))) }; /* static assert */
|
|
typedef struct { char c; long long ll; } ll_align; /* this will force ll to be aligned at its next best position */
|
|
size_t const ll_alignment = offsetof(ll_align, ll); /* provides the minimal alignment restriction for long long */
|
|
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
|
|
if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
|
|
if ((size_t)buffer % ll_alignment) return NULL; /* must be aligned to satisfy `long long` alignment requirement */
|
|
BMK_resetTimedFnState(r, total_ms, run_ms);
|
|
return r;
|
|
}
|
|
|
|
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
|
|
{
|
|
if (!total_ms) total_ms = 1 ;
|
|
if (!run_ms) run_ms = 1;
|
|
if (run_ms > total_ms) run_ms = total_ms;
|
|
timedFnState->timeSpent_ns = 0;
|
|
timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
|
|
timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
|
|
timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
|
|
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
|
|
timedFnState->nbLoops = 1;
|
|
timedFnState->coolTime = UTIL_getTime();
|
|
}
|
|
|
|
/* Tells if nb of seconds set in timedFnState for all runs is spent.
|
|
* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
|
|
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
|
|
{
|
|
return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
|
|
}
|
|
|
|
|
|
#undef MIN
|
|
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
|
|
|
|
#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
|
|
|
|
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
|
|
BMK_benchParams_t p)
|
|
{
|
|
PTime const runBudget_ns = cont->runBudget_ns;
|
|
PTime const runTimeMin_ns = runBudget_ns / 2;
|
|
int completed = 0;
|
|
BMK_runTime_t bestRunTime = cont->fastestRun;
|
|
|
|
while (!completed) {
|
|
BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
|
|
|
|
if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
|
|
return runResult;
|
|
}
|
|
|
|
{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
|
|
double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
|
|
|
|
cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
|
|
|
|
/* estimate nbLoops for next run to last approximately 1 second */
|
|
if (loopDuration_ns > (runBudget_ns / 50)) {
|
|
double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
|
|
cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
|
|
} else {
|
|
/* previous run was too short : blindly increase workload by x multiplier */
|
|
const unsigned multiplier = 10;
|
|
assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
|
|
cont->nbLoops *= multiplier;
|
|
}
|
|
|
|
if(loopDuration_ns < runTimeMin_ns) {
|
|
/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
|
|
assert(completed == 0);
|
|
continue;
|
|
} else {
|
|
if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
|
|
bestRunTime = newRunTime;
|
|
}
|
|
completed = 1;
|
|
}
|
|
}
|
|
} /* while (!completed) */
|
|
|
|
return BMK_setValid_runTime(bestRunTime);
|
|
}
|