draft gpu support in nanobench

R=bsalomon@google.com, mtklein@google.com

Author: mtklein@chromium.org

Review URL: https://codereview.chromium.org/359473004
This commit is contained in:
mtklein 2014-07-01 08:43:42 -07:00 committed by Commit bot
parent 60b08a0adf
commit bb6a028239
2 changed files with 183 additions and 89 deletions

View File

@ -18,8 +18,7 @@ enum { kNumInputs = 5 };
class ImageFilterDAGBench : public Benchmark {
ImageFilterDAGBench() {
ImageFilterDAGBench() {}
virtual const char* onGetName() SK_OVERRIDE {
@ -27,16 +26,18 @@ protected:
virtual void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
SkAutoTUnref<SkImageFilter> blur(SkBlurImageFilter::Create(20.0f, 20.0f));
SkImageFilter* inputs[kNumInputs];
for (int i = 0; i < kNumInputs; ++i) {
inputs[i] = blur.get();
for (int j = 0; j < loops; j++) {
SkAutoTUnref<SkImageFilter> blur(SkBlurImageFilter::Create(20.0f, 20.0f));
SkImageFilter* inputs[kNumInputs];
for (int i = 0; i < kNumInputs; ++i) {
inputs[i] = blur.get();
SkAutoTUnref<SkImageFilter> merge(SkMergeImageFilter::Create(inputs, kNumInputs));
SkPaint paint;
SkRect rect = SkRect::Make(SkIRect::MakeWH(400, 400));
canvas->drawRect(rect, paint);
SkAutoTUnref<SkImageFilter> merge(SkMergeImageFilter::Create(inputs, kNumInputs));
SkPaint paint;
SkRect rect = SkRect::Make(SkIRect::MakeWH(400, 400));
canvas->drawRect(rect, paint);

View File

@ -5,6 +5,8 @@
* found in the LICENSE file.
#include <ctype.h>
#include "Benchmark.h"
#include "CrashHandler.h"
#include "Stats.h"
@ -17,6 +19,11 @@
#include "SkString.h"
#include "SkSurface.h"
#include "GrContextFactory.h"
GrContextFactory gGrFactory;
DEFINE_int32(samples, 10, "Number of samples to measure for each bench.");
@ -26,10 +33,11 @@ DEFINE_double(overheadGoal, 0.0001,
DEFINE_string(match, "", "The usual filters on file names of benchmarks to measure.");
DEFINE_bool2(quiet, q, false, "Print only bench name and minimum sample.");
DEFINE_bool2(verbose, v, false, "Print all samples.");
DEFINE_string(config, "8888 nonrendering",
"Configs to measure. Options: 565 8888 nonrendering");
DEFINE_string(config, "nonrendering 8888 gpu", "Configs to measure. Options: "
"565 8888 gpu nonrendering debug nullgpu msaa4 msaa16 nvprmsaa4 nvprmsaa16 angle");
DEFINE_double(gpuMs, 5, "Target bench time in millseconds for GPU.");
DEFINE_int32(gpuFrameLag, 5, "Overestimate of maximum number of frames GPU allows to lag.");
// TODO: GPU benches
static SkString humanize(double ms) {
if (ms > 1e+3) return SkStringPrintf("%.3gs", ms/1e3);
@ -38,86 +46,175 @@ static SkString humanize(double ms) {
return SkStringPrintf("%.3gms", ms);
static double time(int loops, Benchmark* bench, SkCanvas* canvas, SkGLContextHelper* gl) {
WallTimer timer;
if (bench) {
bench->draw(loops, canvas);
if (canvas) {
if (gl) {
SK_GL(*gl, Flush());
return timer.fWall;
static double estimate_timer_overhead() {
double overhead = 0;
WallTimer timer;
for (int i = 0; i < FLAGS_overheadLoops; i++) {
overhead += timer.fWall;
overhead += time(1, NULL, NULL, NULL);
return overhead / FLAGS_overheadLoops;
static void safe_flush(SkCanvas* canvas) {
if (canvas) {
static int guess_loops(double overhead, Benchmark* bench, SkCanvas* canvas) {
WallTimer timer;
// Measure timer overhead and bench time together.
static int cpu_bench(const double overhead, Benchmark* bench, SkCanvas* canvas, double* samples) {
// First figure out approximately how many loops of bench it takes to make overhead negligible.
double bench_plus_overhead;
do {
bench->draw(1, canvas);
} while (timer.fWall < overhead); // Shouldn't normally happen.
bench_plus_overhead = time(1, bench, canvas, NULL);
} while (bench_plus_overhead < overhead); // Shouldn't normally happen.
// Later we'll just start and stop the timer once, but loop N times.
// Later we'll just start and stop the timer once but loop N times.
// We'll pick N to make timer overhead negligible:
// Timer Overhead
// ------------------------------- < FLAGS_overheadGoal
// Timer Overhead + N * Bench Time
// overhead
// ------------------------- < FLAGS_overheadGoal
// overhead + N * Bench Time
// where timer.fWall ≈ Timer Overhead + Bench Time.
// where bench_plus_overhead ≈ overhead + Bench Time.
// Doing some math, we get:
// (Timer Overhead / FLAGS_overheadGoal) - Timer Overhead
// ----------------------------------------------------- < N
// (timer.fWall - Timer Overhead)
// (overhead / FLAGS_overheadGoal) - overhead
// ------------------------------------------ < N
// bench_plus_overhead - overhead)
// Luckily, this also works well in practice. :)
const double numer = overhead / FLAGS_overheadGoal - overhead;
const double denom = timer.fWall - overhead;
return (int)ceil(numer / denom);
const double denom = bench_plus_overhead - overhead;
const int loops = (int)ceil(numer / denom);
for (int i = 0; i < FLAGS_samples; i++) {
samples[i] = time(loops, bench, canvas, NULL) / loops;
return loops;
static bool push_config_if_enabled(const char* config, SkTDArray<const char*>* configs) {
if (FLAGS_config.contains(config)) {
return true;
static int gpu_bench(SkGLContextHelper* gl,
Benchmark* bench,
SkCanvas* canvas,
double* samples) {
// Make sure we're done with whatever came before.
SK_GL(*gl, Finish);
// First, figure out how many loops it'll take to get a frame up to FLAGS_gpuMs.
int loops = 1;
double elapsed = 0;
do {
loops *= 2;
// If the GPU lets frames lag at all, we need to make sure we're timing
// _this_ round, not still timing last round. We force this by looping
// more times than any reasonable GPU will allow frames to lag.
for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
elapsed = time(loops, bench, canvas, gl);
} while (elapsed < FLAGS_gpuMs);
// We've overshot at least a little. Scale back linearly.
loops = (int)ceil(loops * FLAGS_gpuMs / elapsed);
// Might as well make sure we're not still timing our calibration.
SK_GL(*gl, Finish);
// Pretty much the same deal as the calibration: do some warmup to make
// sure we're timing steady-state pipelined frames.
for (int i = 0; i < FLAGS_gpuFrameLag; i++) {
time(loops, bench, canvas, gl);
return false;
// Now, actually do the timing!
for (int i = 0; i < FLAGS_samples; i++) {
samples[i] = time(loops, bench, canvas, gl) / loops;
return loops;
static SkString to_lower(const char* str) {
SkString lower(str);
for (size_t i = 0; i < lower.size(); i++) {
lower[i] = tolower(lower[i]);
return lower;
static void create_surfaces(Benchmark* bench,
SkTDArray<SkSurface*>* surfaces,
SkTDArray<const char*>* configs) {
struct Target {
const char* config;
Benchmark::Backend backend;
SkAutoTDelete<SkSurface> surface;
SkGLContextHelper* gl;
if (bench->isSuitableFor(Benchmark::kNonRendering_Backend)
&& push_config_if_enabled("nonrendering", configs)) {
// If bench is enabled for backend/config, returns a Target* for them, otherwise NULL.
static Target* is_enabled(Benchmark* bench, Benchmark::Backend backend, const char* config) {
if (!bench->isSuitableFor(backend)) {
return NULL;
if (bench->isSuitableFor(Benchmark::kRaster_Backend)) {
const int w = bench->getSize().fX,
h = bench->getSize().fY;
if (push_config_if_enabled("8888", configs)) {
const SkImageInfo info = { w, h, kN32_SkColorType, kPremul_SkAlphaType };
if (push_config_if_enabled("565", configs)) {
const SkImageInfo info = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType };
for (int i = 0; i < FLAGS_config.count(); i++) {
if (to_lower(FLAGS_config[i]).equals(config)) {
Target* target = new Target;
target->config = config;
target->backend = backend;
return target;
return NULL;
// Append all targets that are suitable for bench.
static void create_targets(Benchmark* bench, SkTDArray<Target*>* targets) {
const int w = bench->getSize().fX,
h = bench->getSize().fY;
const SkImageInfo _8888 = { w, h, kN32_SkColorType, kPremul_SkAlphaType },
_565 = { w, h, kRGB_565_SkColorType, kOpaque_SkAlphaType };
#define CPU_TARGET(config, backend, code) \
if (Target* t = is_enabled(bench, Benchmark::backend, #config)) { \
t->surface.reset(code); \
targets->push(t); \
CPU_TARGET(nonrendering, kNonRendering_Backend, NULL)
CPU_TARGET(8888, kRaster_Backend, SkSurface::NewRaster(_8888))
CPU_TARGET(565, kRaster_Backend, SkSurface::NewRaster(_565))
#define GPU_TARGET(config, ctxType, info, samples) \
if (Target* t = is_enabled(bench, Benchmark::kGPU_Backend, #config)) { \
t->surface.reset(SkSurface::NewRenderTarget(gGrFactory.get(ctxType), info, samples)); \
t->gl = gGrFactory.getGLContext(ctxType); \
targets->push(t); \
GPU_TARGET(gpu, GrContextFactory::kNative_GLContextType, _8888, 0)
GPU_TARGET(msaa4, GrContextFactory::kNative_GLContextType, _8888, 4)
GPU_TARGET(msaa16, GrContextFactory::kNative_GLContextType, _8888, 16)
GPU_TARGET(nvprmsaa4, GrContextFactory::kNVPR_GLContextType, _8888, 4)
GPU_TARGET(nvprmsaa16, GrContextFactory::kNVPR_GLContextType, _8888, 16)
GPU_TARGET(debug, GrContextFactory::kDebug_GLContextType, _8888, 0)
GPU_TARGET(nullgpu, GrContextFactory::kNull_GLContextType, _8888, 0)
GPU_TARGET(angle, GrContextFactory::kANGLE_GLContextType, _8888, 0)
int tool_main(int argc, char** argv);
@ -127,13 +224,16 @@ int tool_main(int argc, char** argv) {
SkCommandLineFlags::Parse(argc, argv);
const double overhead = estimate_timer_overhead();
SkAutoTMalloc<double> samples(FLAGS_samples);
// TODO: display add median, use it in --quiet mode
if (FLAGS_verbose) {
// No header.
} else if (FLAGS_quiet) {
} else {
for (const BenchRegistry* r = BenchRegistry::Head(); r != NULL; r = r->next()) {
@ -142,38 +242,31 @@ int tool_main(int argc, char** argv) {
SkTDArray<SkSurface*> surfaces;
SkTDArray<const char*> configs;
create_surfaces(bench.get(), &surfaces, &configs);
SkTDArray<Target*> targets;
create_targets(bench.get(), &targets);
for (int j = 0; j < surfaces.count(); j++) {
SkCanvas* canvas = surfaces[j] ? surfaces[j]->getCanvas() : NULL;
const char* config = configs[j];
for (int j = 0; j < targets.count(); j++) {
SkCanvas* canvas = targets[j]->surface.get() ? targets[j]->surface->getCanvas() : NULL;
bench->draw(1, canvas); // Just paranoid warmup.
const int loops = guess_loops(overhead, bench.get(), canvas);
SkAutoTMalloc<double> samples(FLAGS_samples);
WallTimer timer;
for (int i = 0; i < FLAGS_samples; i++) {
bench->draw(loops, canvas);
samples[i] = timer.fWall / loops;
const int loops =
Benchmark::kGPU_Backend == targets[j]->backend
? gpu_bench(targets[j]->gl, bench.get(), canvas, samples.get())
cpu_bench( overhead, bench.get(), canvas, samples.get());
Stats stats(samples.get(), FLAGS_samples);
const char* config = targets[j]->config;
if (FLAGS_verbose) {
for (int i = 0; i < FLAGS_samples; i++) {
SkDebugf("%s ", humanize(samples[i]).c_str());
SkDebugf("%s\n", bench->getName());
} else if (FLAGS_quiet) {
if (configs.count() == 1) {
if (targets.count() == 1) {
config = ""; // Only print the config if we run the same bench on more than one.
SkDebugf("%s\t%s\t%s\n", humanize(stats.min).c_str(), bench->getName(), config);
@ -185,12 +278,12 @@ int tool_main(int argc, char** argv) {
, humanize(stats.mean).c_str()
, humanize(stats.max).c_str()
, stddev_percent
, bench->getName()
, config
, bench->getName()
return 0;