mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-24 22:10:13 +00:00
Add math benchmark latency test
This patch further improves math function benchmarking by adding a latency test in addition to throughput. This enables more accurate comparisons of the math functions. The latency test works by creating a dependency on the previous iteration: func_res = F (func_res * zero + input[i]). The multiply by zero avoids changing the input. It reports reciprocal throughput and latency in nanoseconds (depending on the timing header used) and max/min throughput in iterations per second: "workload-spec2006.wrf": { "reciprocal-throughput": 100, "latency": 200, "max-throughput": 1.0e+07, "min-throughput": 5.0e+06 } * benchtests/bench-skeleton.c (main): Add support for latency benchmarking. * benchtests/scripts/bench.py: Add support for latency benchmarking.
This commit is contained in:
parent
34d6a3cbf2
commit
d4505b895f
@ -1,3 +1,9 @@
|
||||
2017-08-17 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* benchtests/bench-skeleton.c (main): Add support for
|
||||
latency benchmarking.
|
||||
* benchtests/scripts/bench.py: Add support for latency benchmarking.
|
||||
|
||||
2017-08-17 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* Makeconfig (+link-pie-before-libc): Add CRT-* hook to override
|
||||
|
@ -71,8 +71,10 @@ main (int argc, char **argv)
|
||||
bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
|
||||
double d_total_i = 0;
|
||||
timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
|
||||
timing_t throughput = 0, latency = 0;
|
||||
int64_t c = 0;
|
||||
uint64_t cur;
|
||||
BENCH_VARS;
|
||||
while (1)
|
||||
{
|
||||
if (is_bench)
|
||||
@ -86,7 +88,16 @@ main (int argc, char **argv)
|
||||
BENCH_FUNC (v, i);
|
||||
TIMING_NOW (end);
|
||||
TIMING_DIFF (cur, start, end);
|
||||
TIMING_ACCUM (total, cur);
|
||||
TIMING_ACCUM (throughput, cur);
|
||||
|
||||
TIMING_NOW (start);
|
||||
for (k = 0; k < iters; k++)
|
||||
for (i = 0; i < NUM_SAMPLES (v); i++)
|
||||
BENCH_FUNC_LAT (v, i);
|
||||
TIMING_NOW (end);
|
||||
TIMING_DIFF (cur, start, end);
|
||||
TIMING_ACCUM (latency, cur);
|
||||
|
||||
d_total_i += iters * NUM_SAMPLES (v);
|
||||
}
|
||||
else
|
||||
@ -131,12 +142,20 @@ main (int argc, char **argv)
|
||||
/* Begin variant. */
|
||||
json_attr_object_begin (&json_ctx, VARIANT (v));
|
||||
|
||||
json_attr_double (&json_ctx, "duration", d_total_s);
|
||||
json_attr_double (&json_ctx, "iterations", d_total_i);
|
||||
if (is_bench)
|
||||
json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
|
||||
{
|
||||
json_attr_double (&json_ctx, "reciprocal-throughput",
|
||||
throughput / d_total_i);
|
||||
json_attr_double (&json_ctx, "latency", latency / d_total_i);
|
||||
json_attr_double (&json_ctx, "max-throughput",
|
||||
d_total_i / throughput * 1000000000.0);
|
||||
json_attr_double (&json_ctx, "min-throughput",
|
||||
d_total_i / latency * 1000000000.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
json_attr_double (&json_ctx, "duration", d_total_s);
|
||||
json_attr_double (&json_ctx, "iterations", d_total_i);
|
||||
json_attr_double (&json_ctx, "max", max / d_iters);
|
||||
json_attr_double (&json_ctx, "min", min / d_iters);
|
||||
json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
|
||||
|
@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''
|
||||
# variant is represented by the _VARIANT structure. The ARGS structure
|
||||
# represents a single set of arguments.
|
||||
STRUCT_TEMPLATE = '''
|
||||
#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
|
||||
#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)
|
||||
|
||||
struct args
|
||||
{
|
||||
@ -84,7 +84,9 @@ EPILOGUE = '''
|
||||
#define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)
|
||||
#define RESULT_ACCUM(r, v, i, old, new) \\
|
||||
((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))
|
||||
#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
|
||||
#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})
|
||||
#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})
|
||||
#define BENCH_VARS %(defvar)s
|
||||
#define FUNCNAME "%(func)s"
|
||||
#include "bench-skeleton.c"'''
|
||||
|
||||
@ -122,17 +124,23 @@ def gen_source(func, directives, all_vals):
|
||||
# If we have a return value from the function, make sure it is
|
||||
# assigned to prevent the compiler from optimizing out the
|
||||
# call.
|
||||
getret = ''
|
||||
latarg = ''
|
||||
defvar = ''
|
||||
|
||||
if directives['ret']:
|
||||
print('static %s volatile ret;' % directives['ret'])
|
||||
getret = 'ret = '
|
||||
else:
|
||||
getret = ''
|
||||
print('static %s zero __attribute__((used)) = 0;' % directives['ret'])
|
||||
getret = 'ret = func_res = '
|
||||
# Note this may not work if argument and result type are incompatible.
|
||||
latarg = 'func_res * zero +'
|
||||
defvar = '%s func_res = 0;' % directives['ret']
|
||||
|
||||
# Test initialization.
|
||||
if directives['init']:
|
||||
print('#define BENCH_INIT %s' % directives['init'])
|
||||
|
||||
print(EPILOGUE % {'getret': getret, 'func': func})
|
||||
print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })
|
||||
|
||||
|
||||
def _print_arg_data(func, directives, all_vals):
|
||||
|
Loading…
Reference in New Issue
Block a user