Refine bench_record and bench_playback:
- use high-precision wall timer only - warm caches once before measuring - measure independent samples, calculating statistics - add --verbose to control how much data we output Also removed some unloved features from bench_record. BUG=skia: R=jcgregorio@google.com, mtklein@google.com Author: mtklein@chromium.org Review URL: https://codereview.chromium.org/338203002
This commit is contained in:
parent
ad2ab61b4e
commit
90c471e73f
@ -76,3 +76,19 @@ void BenchTimer::truncatedEnd() {
|
||||
fTruncatedCpu = fTruncatedSysTimer->endCpu() * fDurationScale;
|
||||
fTruncatedWall = fTruncatedSysTimer->endWall() * fDurationScale;
|
||||
}
|
||||
|
||||
WallTimer::WallTimer() : fWall(-1.0), fSysTimer(new BenchSysTimer) {}
|
||||
|
||||
WallTimer::~WallTimer() {
|
||||
delete fSysTimer;
|
||||
}
|
||||
|
||||
void WallTimer::start(double durationScale) {
|
||||
fDurationScale = durationScale;
|
||||
fSysTimer->startWall();
|
||||
}
|
||||
|
||||
void WallTimer::end() {
|
||||
fWall = fSysTimer->endWall() * fDurationScale;
|
||||
}
|
||||
|
||||
|
@ -39,12 +39,29 @@ public:
|
||||
double fGpu;
|
||||
|
||||
private:
|
||||
BenchSysTimer *fSysTimer;
|
||||
BenchSysTimer *fTruncatedSysTimer;
|
||||
BenchSysTimer* fSysTimer;
|
||||
BenchSysTimer* fTruncatedSysTimer;
|
||||
#if SK_SUPPORT_GPU
|
||||
BenchGpuTimer *fGpuTimer;
|
||||
BenchGpuTimer* fGpuTimer;
|
||||
#endif
|
||||
double fDurationScale; // for this start/end session
|
||||
};
|
||||
|
||||
// Same as BenchTimer above, supporting only fWall but with much lower overhead.
|
||||
// (Typically, ~30ns instead of BenchTimer's ~1us.)
|
||||
class WallTimer {
|
||||
public:
|
||||
WallTimer();
|
||||
~WallTimer();
|
||||
|
||||
void start(double durationScale = 1);
|
||||
void end();
|
||||
|
||||
double fWall;
|
||||
|
||||
private:
|
||||
BenchSysTimer* fSysTimer;
|
||||
double fDurationScale;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
32
tools/Stats.h
Normal file
32
tools/Stats.h
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef Stats_DEFINED
|
||||
#define Stats_DEFINED
|
||||
|
||||
struct Stats {
|
||||
Stats(const double samples[], int n) {
|
||||
min = samples[0];
|
||||
max = samples[0];
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (samples[i] < min) { min = samples[i]; }
|
||||
if (samples[i] > max) { max = samples[i]; }
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
for (int i = 0 ; i < n; i++) {
|
||||
sum += samples[i];
|
||||
}
|
||||
mean = sum / n;
|
||||
|
||||
double err = 0.0;
|
||||
for (int i = 0 ; i < n; i++) {
|
||||
err += (samples[i] - mean) * (samples[i] - mean);
|
||||
}
|
||||
var = err / (n-1);
|
||||
}
|
||||
|
||||
double min;
|
||||
double max;
|
||||
double mean; // Estimate of population mean.
|
||||
double var; // Estimate of population variance.
|
||||
};
|
||||
|
||||
#endif//Stats_DEFINED
|
@ -5,7 +5,6 @@
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "BenchTimer.h"
|
||||
#include "SkCommandLineFlags.h"
|
||||
#include "SkForceLinking.h"
|
||||
#include "SkGraphics.h"
|
||||
@ -16,19 +15,27 @@
|
||||
#include "SkStream.h"
|
||||
#include "SkString.h"
|
||||
|
||||
#include "BenchTimer.h"
|
||||
#include "Stats.h"
|
||||
|
||||
typedef WallTimer Timer;
|
||||
|
||||
__SK_FORCE_IMAGE_DECODER_LINKING;
|
||||
|
||||
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
|
||||
DEFINE_int32(loops, 10, "Number of times to play back each SKP.");
|
||||
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to playback.");
|
||||
DEFINE_int32(samples, 10, "Gather this many samples of each picture playback.");
|
||||
DEFINE_bool(skr, false, "Play via SkRecord instead of SkPicture.");
|
||||
DEFINE_int32(tile, 1000000000, "Simulated tile size.");
|
||||
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
|
||||
DEFINE_string(timescale, "ms", "Print times in ms, us, or ns");
|
||||
DEFINE_int32(verbose, 0, "0: print min sample; "
|
||||
"1: print min, mean, max and noise indication "
|
||||
"2: print all samples");
|
||||
|
||||
static double scale_time(double ms) {
|
||||
if (FLAGS_timescale.contains("us")) ms *= 1000;
|
||||
if (FLAGS_timescale.contains("ns")) ms *= 1000000;
|
||||
return ms;
|
||||
static double timescale() {
|
||||
if (FLAGS_timescale.contains("us")) return 1000;
|
||||
if (FLAGS_timescale.contains("ns")) return 1000000;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static SkPicture* rerecord_with_tilegrid(SkPicture& src) {
|
||||
@ -49,6 +56,14 @@ static EXPERIMENTAL::SkPlayback* rerecord_with_skr(SkPicture& src) {
|
||||
return recording.releasePlayback();
|
||||
}
|
||||
|
||||
static void draw(const EXPERIMENTAL::SkPlayback& skr, const SkPicture& skp, SkCanvas* canvas) {
|
||||
if (FLAGS_skr) {
|
||||
skr.draw(canvas);
|
||||
} else {
|
||||
skp.draw(canvas);
|
||||
}
|
||||
}
|
||||
|
||||
static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
|
||||
SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src));
|
||||
SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src));
|
||||
@ -59,19 +74,34 @@ static void bench(SkPMColor* scratch, SkPicture& src, const char* name) {
|
||||
src.width() * sizeof(SkPMColor)));
|
||||
canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile)));
|
||||
|
||||
BenchTimer timer;
|
||||
timer.start();
|
||||
for (int i = 0; i < FLAGS_loops; i++) {
|
||||
if (FLAGS_skr) {
|
||||
record->draw(canvas.get());
|
||||
} else {
|
||||
picture->draw(canvas.get());
|
||||
}
|
||||
}
|
||||
timer.end();
|
||||
// Draw once to warm any caches. The first sample otherwise can be very noisy.
|
||||
draw(*record, *picture, canvas.get());
|
||||
|
||||
const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
|
||||
printf("%f\t%s\n", scale_time(msPerLoop), name);
|
||||
Timer timer;
|
||||
SkAutoTMalloc<double> samples(FLAGS_samples);
|
||||
for (int i = 0; i < FLAGS_samples; i++) {
|
||||
// We assume timer overhead (typically, ~30ns) is insignificant
|
||||
// compared to draw runtime (at least ~100us, usually several ms).
|
||||
timer.start(timescale());
|
||||
draw(*record, *picture, canvas.get());
|
||||
timer.end();
|
||||
samples[i] = timer.fWall;
|
||||
}
|
||||
|
||||
Stats stats(samples.get(), FLAGS_samples);
|
||||
if (FLAGS_verbose == 0) {
|
||||
printf("%g\t%s\n", stats.min, name);
|
||||
} else if (FLAGS_verbose == 1) {
|
||||
// Get a rough idea of how noisy the measurements were.
|
||||
const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
|
||||
printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
|
||||
} else if (FLAGS_verbose == 2) {
|
||||
printf("%s", name);
|
||||
for (int i = 0; i < FLAGS_samples; i++) {
|
||||
printf("\t%g", samples[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
int tool_main(int argc, char** argv);
|
||||
|
@ -5,7 +5,6 @@
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "BenchTimer.h"
|
||||
#include "SkCommandLineFlags.h"
|
||||
#include "SkForceLinking.h"
|
||||
#include "SkGraphics.h"
|
||||
@ -15,28 +14,32 @@
|
||||
#include "SkRecording.h"
|
||||
#include "SkStream.h"
|
||||
#include "SkString.h"
|
||||
|
||||
#include "BenchTimer.h"
|
||||
#include "LazyDecodeBitmap.h"
|
||||
#include "Stats.h"
|
||||
|
||||
typedef WallTimer Timer;
|
||||
|
||||
__SK_FORCE_IMAGE_DECODER_LINKING;
|
||||
|
||||
// Just reading all the SKPs takes about 2 seconds for me, which is the same as about 100 loops of
|
||||
// rerecording all the SKPs. So we default to --loops=900, which makes ~90% of our time spent in
|
||||
// recording, and this should take ~20 seconds to run.
|
||||
|
||||
DEFINE_string2(skps, r, "skps", "Directory containing SKPs to read and re-record.");
|
||||
DEFINE_int32(loops, 900, "Number of times to re-record each SKP.");
|
||||
DEFINE_bool(endRecording, true, "If false, don't time SkPicture::endRecording()");
|
||||
DEFINE_int32(nullSize, 1000, "Pretend dimension of null source picture.");
|
||||
DEFINE_int32(samples, 10, "Number of times to re-record each SKP.");
|
||||
DEFINE_int32(tileGridSize, 512, "Set the tile grid size. Has no effect if bbh is not set to tilegrid.");
|
||||
DEFINE_string(bbh, "", "Turn on the bbh and select the type, one of rtree, tilegrid, quadtree");
|
||||
DEFINE_bool(skr, false, "Record SKR instead of SKP.");
|
||||
DEFINE_string(match, "", "The usual filters on file names of SKPs to bench.");
|
||||
DEFINE_string(timescale, "us", "Print times in ms, us, or ns");
|
||||
DEFINE_double(overheadGoal, 0.0001,
|
||||
"Try to make timer overhead at most this fraction of our sample measurements.");
|
||||
DEFINE_int32(verbose, 0, "0: print min sample; "
|
||||
"1: print min, mean, max and noise indication "
|
||||
"2: print all samples");
|
||||
|
||||
static double scale_time(double ms) {
|
||||
if (FLAGS_timescale.contains("us")) ms *= 1000;
|
||||
if (FLAGS_timescale.contains("ns")) ms *= 1000000;
|
||||
return ms;
|
||||
static double timescale() {
|
||||
if (FLAGS_timescale.contains("us")) return 1000;
|
||||
if (FLAGS_timescale.contains("ns")) return 1000000;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static SkBBHFactory* parse_FLAGS_bbh() {
|
||||
@ -61,35 +64,62 @@ static SkBBHFactory* parse_FLAGS_bbh() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bench_record(SkPicture* src, const char* name, SkBBHFactory* bbhFactory) {
|
||||
BenchTimer timer;
|
||||
timer.start();
|
||||
const int width = src ? src->width() : FLAGS_nullSize;
|
||||
const int height = src ? src->height() : FLAGS_nullSize;
|
||||
|
||||
for (int i = 0; i < FLAGS_loops; i++) {
|
||||
static void rerecord(const SkPicture& src, SkBBHFactory* bbhFactory) {
|
||||
if (FLAGS_skr) {
|
||||
EXPERIMENTAL::SkRecording recording(width, height);
|
||||
if (NULL != src) {
|
||||
src->draw(recording.canvas());
|
||||
}
|
||||
EXPERIMENTAL::SkRecording recording(src.width(), src.height());
|
||||
src.draw(recording.canvas());
|
||||
// Release and delete the SkPlayback so that recording optimizes its SkRecord.
|
||||
SkDELETE(recording.releasePlayback());
|
||||
} else {
|
||||
SkPictureRecorder recorder;
|
||||
SkCanvas* canvas = recorder.beginRecording(width, height, bbhFactory);
|
||||
if (NULL != src) {
|
||||
src->draw(canvas);
|
||||
}
|
||||
if (FLAGS_endRecording) {
|
||||
src.draw(recorder.beginRecording(src.width(), src.height(), bbhFactory));
|
||||
SkAutoTUnref<SkPicture> dst(recorder.endRecording());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void bench_record(const SkPicture& src,
|
||||
const double timerOverhead,
|
||||
const char* name,
|
||||
SkBBHFactory* bbhFactory) {
|
||||
// Rerecord once to warm up any caches. Otherwise the first sample can be very noisy.
|
||||
rerecord(src, bbhFactory);
|
||||
|
||||
// Rerecord once to see how many times we should loop to make timer overhead insignificant.
|
||||
Timer timer;
|
||||
do {
|
||||
timer.start(timescale());
|
||||
rerecord(src, bbhFactory);
|
||||
timer.end();
|
||||
} while (timer.fWall < timerOverhead); // Loop just in case something bizarre happens.
|
||||
|
||||
// We want (timer overhead / measurement) to be less than FLAGS_overheadGoal.
|
||||
// So in each sample, we'll loop enough times to have made that true for our first measurement.
|
||||
const int loops = (int)ceil(timerOverhead / timer.fWall / FLAGS_overheadGoal);
|
||||
|
||||
SkAutoTMalloc<double> samples(FLAGS_samples);
|
||||
for (int i = 0; i < FLAGS_samples; i++) {
|
||||
timer.start(timescale());
|
||||
for (int j = 0; j < loops; j++) {
|
||||
rerecord(src, bbhFactory);
|
||||
}
|
||||
timer.end();
|
||||
samples[i] = timer.fWall / loops;
|
||||
}
|
||||
|
||||
const double msPerLoop = timer.fCpu / (double)FLAGS_loops;
|
||||
printf("%f\t%s\n", scale_time(msPerLoop), name);
|
||||
Stats stats(samples.get(), FLAGS_samples);
|
||||
if (FLAGS_verbose == 0) {
|
||||
printf("%g\t%s\n", stats.min, name);
|
||||
} else if (FLAGS_verbose == 1) {
|
||||
// Get a rough idea of how noisy the measurements were.
|
||||
const double noisePercent = 100 * sqrt(stats.var) / stats.mean;
|
||||
printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name);
|
||||
} else if (FLAGS_verbose == 2) {
|
||||
printf("%s", name);
|
||||
for (int i = 0; i < FLAGS_samples; i++) {
|
||||
printf("\t%g", samples[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
int tool_main(int argc, char** argv);
|
||||
@ -103,10 +133,17 @@ int tool_main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
SkAutoTDelete<SkBBHFactory> bbhFactory(parse_FLAGS_bbh());
|
||||
bench_record(NULL, "NULL", bbhFactory.get());
|
||||
if (FLAGS_skps.isEmpty()) {
|
||||
return 0;
|
||||
|
||||
// Each run will use this timer overhead estimate to guess how many times it should run.
|
||||
static const int kOverheadLoops = 10000000;
|
||||
Timer timer;
|
||||
double overheadEstimate = 0.0;
|
||||
for (int i = 0; i < kOverheadLoops; i++) {
|
||||
timer.start(timescale());
|
||||
timer.end();
|
||||
overheadEstimate += timer.fWall;
|
||||
}
|
||||
overheadEstimate /= kOverheadLoops;
|
||||
|
||||
SkOSFile::Iter it(FLAGS_skps[0], ".skp");
|
||||
SkString filename;
|
||||
@ -131,7 +168,7 @@ int tool_main(int argc, char** argv) {
|
||||
failed = true;
|
||||
continue;
|
||||
}
|
||||
bench_record(src, filename.c_str(), bbhFactory.get());
|
||||
bench_record(*src, overheadEstimate, filename.c_str(), bbhFactory.get());
|
||||
}
|
||||
return failed ? 1 : 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user